@ -8,4 +8,4 @@ NUM_LAYERS = 6
BATCH_SIZE = 16
LEARNING_RATE = 3e-4
DEVICE = "cuda" # fallback handled in trainer
MAX_TOKENS = 100_000 # Used to cap input corpus size
MAX_TOKENS = 500_000 # Used to cap input corpus size
The note is not visible to the blocked user.