# config.py

VOCAB_SIZE = 50000
CONTEXT_SIZE = 128
EMBED_DIM = 256
NUM_HEADS = 8
NUM_LAYERS = 6
BATCH_SIZE = 16
LEARNING_RATE = 3e-4
DEVICE = "cuda"  # fallback handled in trainer
MAX_TOKENS = 100_000  # Used to cap input corpus size