Stage one of the project, done

This commit is contained in:
2025-09-29 22:40:17 -04:00
parent a3f14b18dc
commit c719c5873f
11 changed files with 529 additions and 0 deletions

24
configs/model_125M.yaml Normal file
View File

@@ -0,0 +1,24 @@
# Lyra 125M Model Configuration
# GPT-style decoder-only transformer
model:
name: "lyra-125M"
architecture: "gpt"
# Model dimensions
vocab_size: 50257 # Will be updated after tokenizer training
n_positions: 1024 # Context window
n_embd: 768 # Embedding dimension
n_layer: 12 # Number of transformer layers
n_head: 12 # Number of attention heads
n_inner: 3072 # FFN inner dimension (4 * n_embd)
# Regularization
embd_pdrop: 0.1
resid_pdrop: 0.1
attn_pdrop: 0.1
# Activation
activation: "gelu"
# Total parameters: ~125M