feat: Managed to achieve a loss of 0.285

2024-05-23 22:39:46 -04:00
parent 47c8cce3dd
commit 509670c989
4 changed files with 51 additions and 31 deletions
--- a/phoebe/gpt_model.py
+++ b/phoebe/gpt_model.py
@ -3,12 +3,11 @@ import torch.nn as nn
 import torch.nn.functional as F

 # Hyperparameters
-batch_size = 64
 block_size = 256
-num_embed = 384  # Ensure consistency in naming
+num_embed = 512  # Increased embedding size
 num_heads = 8
-num_layers = 8
-dropout = 0.2
+num_layers = 12  # Increased number of layers
+dropout = 0.3


 class Head(nn.Module):
@ -131,7 +130,6 @@ class GPT(nn.Module):


 def encode(s, string_to_int):
-    # Replace unknown characters with a special token (e.g., "<unk>")
    return [string_to_int.get(c, string_to_int["<unk>"]) for c in s]