24 lines
579 B
YAML
24 lines
579 B
YAML
# Lyra 125M Model Configuration
|
|
# GPT-style decoder-only transformer
|
|
|
|
model:
|
|
name: "lyra-125M"
|
|
architecture: "gpt"
|
|
|
|
# Model dimensions
|
|
vocab_size: 50257 # Will be updated after tokenizer training
|
|
n_positions: 1024 # Context window
|
|
n_embd: 768 # Embedding dimension
|
|
n_layer: 12 # Number of transformer layers
|
|
n_head: 12 # Number of attention heads
|
|
n_inner: 3072 # FFN inner dimension (4 * n_embd)
|
|
|
|
# Regularization
|
|
embd_pdrop: 0.1
|
|
resid_pdrop: 0.1
|
|
attn_pdrop: 0.1
|
|
|
|
# Activation
|
|
activation: "gelu"
|
|
|
|
# Total parameters: ~125M |