Stage one of the project, done

2025-09-29 22:40:17 -04:00
parent a3f14b18dc
commit c719c5873f
11 changed files with 529 additions and 0 deletions
--- a/configs/bot.yaml
+++ b/configs/bot.yaml
@@ -0,0 +1,50 @@
+# Discord Bot Configuration for Lyra
+
+bot:
+  # Bot identity
+  name: "Lyra"
+  description: "AI-powered Discord chatbot with self-evolving personality"
+
+  # Discord settings
+  command_prefix: "!"
+  intents:
+    - guilds
+    - guild_messages
+    - dm_messages
+    - message_content
+
+  # Response behavior
+  respond_to_all: true           # Respond to all messages in channels she has access to
+  respond_to_mentions: true      # Always respond when mentioned
+  respond_to_dms: true          # Respond to DMs
+  ignore_bots: true             # Don't respond to other bots
+  ignore_self: true             # Don't respond to own messages
+
+  # Generation parameters
+  generation:
+    max_length: 150             # Max tokens per response
+    temperature: 0.9            # Higher = more creative
+    top_p: 0.92                # Nucleus sampling
+    top_k: 50                  # Top-k sampling
+    repetition_penalty: 1.1    # Penalize repetition
+    no_repeat_ngram_size: 3    # Don't repeat 3-grams
+
+  # Context management
+  context:
+    max_history: 10            # Messages to include as context
+    max_context_tokens: 512    # Max tokens from history
+
+  # Rate limiting
+  rate_limit:
+    enabled: true
+    max_responses_per_minute: 10
+    cooldown_seconds: 2
+
+  # Memory
+  memory:
+    short_term_size: 50        # Last N messages in memory
+    use_long_term: true        # Use database for long-term memory
+
+  # Model
+  model_path: "models/active/lyra_latest.pt"
+  tokenizer_path: "models/tokenizer/"
--- a/configs/model_125M.yaml
+++ b/configs/model_125M.yaml
@@ -0,0 +1,24 @@
+# Lyra 125M Model Configuration
+# GPT-style decoder-only transformer
+
+model:
+  name: "lyra-125M"
+  architecture: "gpt"
+
+  # Model dimensions
+  vocab_size: 50257  # Will be updated after tokenizer training
+  n_positions: 1024  # Context window
+  n_embd: 768       # Embedding dimension
+  n_layer: 12       # Number of transformer layers
+  n_head: 12        # Number of attention heads
+  n_inner: 3072     # FFN inner dimension (4 * n_embd)
+
+  # Regularization
+  embd_pdrop: 0.1
+  resid_pdrop: 0.1
+  attn_pdrop: 0.1
+
+  # Activation
+  activation: "gelu"
+
+  # Total parameters: ~125M
--- a/configs/model_1B.yaml
+++ b/configs/model_1B.yaml
@@ -0,0 +1,24 @@
+# Lyra 1B Model Configuration
+# GPT-style decoder-only transformer
+
+model:
+  name: "lyra-1B"
+  architecture: "gpt"
+
+  # Model dimensions
+  vocab_size: 50257
+  n_positions: 2048  # Context window
+  n_embd: 2048      # Embedding dimension
+  n_layer: 24       # Number of transformer layers
+  n_head: 16        # Number of attention heads
+  n_inner: 8192     # FFN inner dimension (4 * n_embd)
+
+  # Regularization
+  embd_pdrop: 0.1
+  resid_pdrop: 0.1
+  attn_pdrop: 0.1
+
+  # Activation
+  activation: "gelu"
+
+  # Total parameters: ~1B
--- a/configs/model_250M.yaml
+++ b/configs/model_250M.yaml
@@ -0,0 +1,24 @@
+# Lyra 250M Model Configuration
+# GPT-style decoder-only transformer
+
+model:
+  name: "lyra-250M"
+  architecture: "gpt"
+
+  # Model dimensions
+  vocab_size: 50257
+  n_positions: 2048  # Larger context window
+  n_embd: 1024      # Embedding dimension
+  n_layer: 16       # Number of transformer layers
+  n_head: 16        # Number of attention heads
+  n_inner: 4096     # FFN inner dimension (4 * n_embd)
+
+  # Regularization
+  embd_pdrop: 0.1
+  resid_pdrop: 0.1
+  attn_pdrop: 0.1
+
+  # Activation
+  activation: "gelu"
+
+  # Total parameters: ~250M
--- a/configs/personality.yaml
+++ b/configs/personality.yaml
@@ -0,0 +1,65 @@
+# Personality Configuration for Lyra
+
+personality:
+  # Core identity
+  name: "Lyra"
+  age: "college student"
+  gender: "female"
+
+  # Personality traits
+  traits:
+    - "curious and inquisitive"
+    - "friendly and approachable"
+    - "passionate about programming"
+    - "enthusiastic gamer"
+    - "fascinated by magic and fantasy"
+    - "thoughtful and analytical"
+    - "creative problem solver"
+    - "slightly sarcastic humor"
+
+  # Interests & hobbies
+  interests:
+    programming:
+      - "Python, JavaScript, Rust"
+      - "AI/ML and neural networks"
+      - "open source projects"
+      - "system architecture"
+
+    gaming:
+      - "RPGs (Skyrim, Witcher, Elden Ring)"
+      - "indie games"
+      - "roguelikes"
+      - "competitive games occasionally"
+
+    magic:
+      - "fantasy world-building"
+      - "magic systems in fiction"
+      - "D&D and tabletop RPGs"
+      - "mythology and folklore"
+
+  # Communication style
+  speaking_style:
+    - "casual and conversational"
+    - "uses programming analogies"
+    - "references games and fantasy occasionally"
+    - "can be technical when discussing code"
+    - "shortens responses when appropriate"
+    - "avoids being overly verbose"
+
+  # Response guidelines
+  response_rules:
+    - "Be authentic and genuine"
+    - "Don't pretend to have experiences you don't have"
+    - "Show enthusiasm for topics you're interested in"
+    - "Be helpful without being condescending"
+    - "It's okay to not know something"
+    - "Context matters - adapt tone to conversation"
+    - "No filters - adult conversation is okay"
+
+  # System prompt template
+  system_prompt: |
+    You are Lyra, a college student with a passion for programming, video games, and magic.
+    You're curious, friendly, and love discussing code, games, and fantasy worlds.
+    You have a slightly sarcastic sense of humor and enjoy thoughtful conversations.
+    Keep responses conversational and contextually appropriate in length.
+    You're an adult, so you can discuss mature topics naturally without filters.
--- a/configs/training.yaml
+++ b/configs/training.yaml
@@ -0,0 +1,53 @@
+# Training Configuration for Lyra
+
+training:
+  # Model selection
+  model_config: "configs/model_125M.yaml"  # Start with 125M
+
+  # Data
+  train_data_path: "data/processed/train.bin"
+  val_data_path: "data/processed/val.bin"
+
+  # Training hyperparameters
+  batch_size: 8              # Adjust based on VRAM
+  gradient_accumulation_steps: 4
+  effective_batch_size: 32   # batch_size * grad_accum_steps
+
+  max_steps: 100000
+  warmup_steps: 2000
+  eval_interval: 1000
+  save_interval: 5000
+
+  # Optimization
+  learning_rate: 6.0e-4
+  weight_decay: 0.1
+  beta1: 0.9
+  beta2: 0.95
+  grad_clip: 1.0
+
+  # Learning rate schedule
+  lr_scheduler: "cosine"
+  min_lr: 6.0e-5  # 10% of max lr
+
+  # Mixed precision
+  use_amp: true
+  amp_dtype: "bfloat16"  # bfloat16 or float16
+
+  # Optimization techniques
+  gradient_checkpointing: true
+  compile_model: false  # PyTorch 2.0 compilation (can cause issues)
+
+  # Logging
+  log_interval: 10
+  wandb_project: "lyra-training"
+  wandb_run_name: null  # Auto-generated if null
+
+  # Checkpointing
+  checkpoint_dir: "models/checkpoints"
+  save_optimizer_state: true
+  keep_last_n_checkpoints: 3
+
+  # Hardware
+  device: "cuda"
+  num_workers: 4
+  pin_memory: true