feat: Managed to achieve a loss of 0.285
This commit is contained in:
@ -3,12 +3,11 @@ import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
# Hyperparameters
|
||||
batch_size = 64
|
||||
block_size = 256
|
||||
num_embed = 384 # Ensure consistency in naming
|
||||
num_embed = 512 # Increased embedding size
|
||||
num_heads = 8
|
||||
num_layers = 8
|
||||
dropout = 0.2
|
||||
num_layers = 12 # Increased number of layers
|
||||
dropout = 0.3
|
||||
|
||||
|
||||
class Head(nn.Module):
|
||||
@ -131,7 +130,6 @@ class GPT(nn.Module):
|
||||
|
||||
|
||||
def encode(s, string_to_int):
|
||||
# Replace unknown characters with a special token (e.g., "<unk>")
|
||||
return [string_to_int.get(c, string_to_int["<unk>"]) for c in s]
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user