Updated the model capacity

This commit is contained in:
2025-06-30 18:08:11 -04:00
parent 159be1eb82
commit 6366f72716
6 changed files with 95 additions and 10058 deletions

View File

@ -8,4 +8,4 @@ NUM_LAYERS = 6
BATCH_SIZE = 16
LEARNING_RATE = 3e-4
DEVICE = "cuda" # fallback handled in trainer
MAX_TOKENS = 100_000 # Used to cap input corpus size
MAX_TOKENS = 500_000 # Used to cap input corpus size