Updated the model capacity

2025-06-30 18:08:11 -04:00
parent 159be1eb82
commit 6366f72716
6 changed files with 95 additions and 10058 deletions
--- a/config.py
+++ b/config.py
@ -8,4 +8,4 @@ NUM_LAYERS = 6
 BATCH_SIZE = 16
 LEARNING_RATE = 3e-4
 DEVICE = "cuda"  # fallback handled in trainer
-MAX_TOKENS = 100_000  # Used to cap input corpus size
+MAX_TOKENS = 500_000  # Used to cap input corpus size