# Lyra 1B Model Configuration # GPT-style decoder-only transformer model: name: "lyra-1B" architecture: "gpt" # Model dimensions vocab_size: 50257 n_positions: 2048 # Context window n_embd: 2048 # Embedding dimension n_layer: 24 # Number of transformer layers n_head: 16 # Number of attention heads n_inner: 8192 # FFN inner dimension (4 * n_embd) # Regularization embd_pdrop: 0.1 resid_pdrop: 0.1 attn_pdrop: 0.1 # Activation activation: "gelu" # Total parameters: ~1B