Stage one of the project, done
This commit is contained in:
24
configs/model_1B.yaml
Normal file
24
configs/model_1B.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 1B Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-1B"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257
|
||||
n_positions: 2048 # Context window
|
||||
n_embd: 2048 # Embedding dimension
|
||||
n_layer: 24 # Number of transformer layers
|
||||
n_head: 16 # Number of attention heads
|
||||
n_inner: 8192 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~1B
|
Reference in New Issue
Block a user