Files
Lyra/configs/model_250M.yaml
2025-09-29 22:40:17 -04:00

24 lines
542 B
YAML

# Lyra 250M Model Configuration
# GPT-style decoder-only transformer
model:
name: "lyra-250M"
architecture: "gpt"
# Model dimensions
vocab_size: 50257
n_positions: 2048 # Larger context window
n_embd: 1024 # Embedding dimension
n_layer: 16 # Number of transformer layers
n_head: 16 # Number of attention heads
n_inner: 4096 # FFN inner dimension (4 * n_embd)
# Regularization
embd_pdrop: 0.1
resid_pdrop: 0.1
attn_pdrop: 0.1
# Activation
activation: "gelu"
# Total parameters: ~250M