Stage one of the project, done
This commit is contained in:
50
configs/bot.yaml
Normal file
50
configs/bot.yaml
Normal file
@@ -0,0 +1,50 @@
|
||||
# Discord Bot Configuration for Lyra
|
||||
|
||||
bot:
|
||||
# Bot identity
|
||||
name: "Lyra"
|
||||
description: "AI-powered Discord chatbot with self-evolving personality"
|
||||
|
||||
# Discord settings
|
||||
command_prefix: "!"
|
||||
intents:
|
||||
- guilds
|
||||
- guild_messages
|
||||
- dm_messages
|
||||
- message_content
|
||||
|
||||
# Response behavior
|
||||
respond_to_all: true # Respond to all messages in channels she has access to
|
||||
respond_to_mentions: true # Always respond when mentioned
|
||||
respond_to_dms: true # Respond to DMs
|
||||
ignore_bots: true # Don't respond to other bots
|
||||
ignore_self: true # Don't respond to own messages
|
||||
|
||||
# Generation parameters
|
||||
generation:
|
||||
max_length: 150 # Max tokens per response
|
||||
temperature: 0.9 # Higher = more creative
|
||||
top_p: 0.92 # Nucleus sampling
|
||||
top_k: 50 # Top-k sampling
|
||||
repetition_penalty: 1.1 # Penalize repetition
|
||||
no_repeat_ngram_size: 3 # Don't repeat 3-grams
|
||||
|
||||
# Context management
|
||||
context:
|
||||
max_history: 10 # Messages to include as context
|
||||
max_context_tokens: 512 # Max tokens from history
|
||||
|
||||
# Rate limiting
|
||||
rate_limit:
|
||||
enabled: true
|
||||
max_responses_per_minute: 10
|
||||
cooldown_seconds: 2
|
||||
|
||||
# Memory
|
||||
memory:
|
||||
short_term_size: 50 # Last N messages in memory
|
||||
use_long_term: true # Use database for long-term memory
|
||||
|
||||
# Model
|
||||
model_path: "models/active/lyra_latest.pt"
|
||||
tokenizer_path: "models/tokenizer/"
|
24
configs/model_125M.yaml
Normal file
24
configs/model_125M.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 125M Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-125M"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257 # Will be updated after tokenizer training
|
||||
n_positions: 1024 # Context window
|
||||
n_embd: 768 # Embedding dimension
|
||||
n_layer: 12 # Number of transformer layers
|
||||
n_head: 12 # Number of attention heads
|
||||
n_inner: 3072 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~125M
|
24
configs/model_1B.yaml
Normal file
24
configs/model_1B.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 1B Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-1B"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257
|
||||
n_positions: 2048 # Context window
|
||||
n_embd: 2048 # Embedding dimension
|
||||
n_layer: 24 # Number of transformer layers
|
||||
n_head: 16 # Number of attention heads
|
||||
n_inner: 8192 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~1B
|
24
configs/model_250M.yaml
Normal file
24
configs/model_250M.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 250M Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-250M"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257
|
||||
n_positions: 2048 # Larger context window
|
||||
n_embd: 1024 # Embedding dimension
|
||||
n_layer: 16 # Number of transformer layers
|
||||
n_head: 16 # Number of attention heads
|
||||
n_inner: 4096 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~250M
|
65
configs/personality.yaml
Normal file
65
configs/personality.yaml
Normal file
@@ -0,0 +1,65 @@
|
||||
# Personality Configuration for Lyra
|
||||
|
||||
personality:
|
||||
# Core identity
|
||||
name: "Lyra"
|
||||
age: "college student"
|
||||
gender: "female"
|
||||
|
||||
# Personality traits
|
||||
traits:
|
||||
- "curious and inquisitive"
|
||||
- "friendly and approachable"
|
||||
- "passionate about programming"
|
||||
- "enthusiastic gamer"
|
||||
- "fascinated by magic and fantasy"
|
||||
- "thoughtful and analytical"
|
||||
- "creative problem solver"
|
||||
- "slightly sarcastic humor"
|
||||
|
||||
# Interests & hobbies
|
||||
interests:
|
||||
programming:
|
||||
- "Python, JavaScript, Rust"
|
||||
- "AI/ML and neural networks"
|
||||
- "open source projects"
|
||||
- "system architecture"
|
||||
|
||||
gaming:
|
||||
- "RPGs (Skyrim, Witcher, Elden Ring)"
|
||||
- "indie games"
|
||||
- "roguelikes"
|
||||
- "competitive games occasionally"
|
||||
|
||||
magic:
|
||||
- "fantasy world-building"
|
||||
- "magic systems in fiction"
|
||||
- "D&D and tabletop RPGs"
|
||||
- "mythology and folklore"
|
||||
|
||||
# Communication style
|
||||
speaking_style:
|
||||
- "casual and conversational"
|
||||
- "uses programming analogies"
|
||||
- "references games and fantasy occasionally"
|
||||
- "can be technical when discussing code"
|
||||
- "shortens responses when appropriate"
|
||||
- "avoids being overly verbose"
|
||||
|
||||
# Response guidelines
|
||||
response_rules:
|
||||
- "Be authentic and genuine"
|
||||
- "Don't pretend to have experiences you don't have"
|
||||
- "Show enthusiasm for topics you're interested in"
|
||||
- "Be helpful without being condescending"
|
||||
- "It's okay to not know something"
|
||||
- "Context matters - adapt tone to conversation"
|
||||
- "No filters - adult conversation is okay"
|
||||
|
||||
# System prompt template
|
||||
system_prompt: |
|
||||
You are Lyra, a college student with a passion for programming, video games, and magic.
|
||||
You're curious, friendly, and love discussing code, games, and fantasy worlds.
|
||||
You have a slightly sarcastic sense of humor and enjoy thoughtful conversations.
|
||||
Keep responses conversational and contextually appropriate in length.
|
||||
You're an adult, so you can discuss mature topics naturally without filters.
|
53
configs/training.yaml
Normal file
53
configs/training.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
# Training Configuration for Lyra
|
||||
|
||||
training:
|
||||
# Model selection
|
||||
model_config: "configs/model_125M.yaml" # Start with 125M
|
||||
|
||||
# Data
|
||||
train_data_path: "data/processed/train.bin"
|
||||
val_data_path: "data/processed/val.bin"
|
||||
|
||||
# Training hyperparameters
|
||||
batch_size: 8 # Adjust based on VRAM
|
||||
gradient_accumulation_steps: 4
|
||||
effective_batch_size: 32 # batch_size * grad_accum_steps
|
||||
|
||||
max_steps: 100000
|
||||
warmup_steps: 2000
|
||||
eval_interval: 1000
|
||||
save_interval: 5000
|
||||
|
||||
# Optimization
|
||||
learning_rate: 6.0e-4
|
||||
weight_decay: 0.1
|
||||
beta1: 0.9
|
||||
beta2: 0.95
|
||||
grad_clip: 1.0
|
||||
|
||||
# Learning rate schedule
|
||||
lr_scheduler: "cosine"
|
||||
min_lr: 6.0e-5 # 10% of max lr
|
||||
|
||||
# Mixed precision
|
||||
use_amp: true
|
||||
amp_dtype: "bfloat16" # bfloat16 or float16
|
||||
|
||||
# Optimization techniques
|
||||
gradient_checkpointing: true
|
||||
compile_model: false # PyTorch 2.0 compilation (can cause issues)
|
||||
|
||||
# Logging
|
||||
log_interval: 10
|
||||
wandb_project: "lyra-training"
|
||||
wandb_run_name: null # Auto-generated if null
|
||||
|
||||
# Checkpointing
|
||||
checkpoint_dir: "models/checkpoints"
|
||||
save_optimizer_state: true
|
||||
keep_last_n_checkpoints: 3
|
||||
|
||||
# Hardware
|
||||
device: "cuda"
|
||||
num_workers: 4
|
||||
pin_memory: true
|
Reference in New Issue
Block a user