Stage one of the project, done
This commit is contained in:
15
.env.example
Normal file
15
.env.example
Normal file
@@ -0,0 +1,15 @@
|
||||
# Discord Bot Configuration
|
||||
DISCORD_TOKEN=your_discord_token_here
|
||||
DISCORD_BOT_PREFIX=!
|
||||
|
||||
# Training Configuration
|
||||
WANDB_API_KEY=your_wandb_key_here
|
||||
WANDB_PROJECT=lyra-training
|
||||
|
||||
# Model Configuration
|
||||
MODEL_SIZE=125M
|
||||
CONTEXT_LENGTH=1024
|
||||
BATCH_SIZE=8
|
||||
|
||||
# Database
|
||||
DATABASE_PATH=lyra.db
|
29
.gitignore
vendored
29
.gitignore
vendored
@@ -214,3 +214,32 @@ __marimo__/
|
||||
|
||||
# Streamlit
|
||||
.streamlit/secrets.toml
|
||||
|
||||
# Lyra Project Specific
|
||||
# Data files
|
||||
data/raw/
|
||||
data/processed/
|
||||
data/discord/
|
||||
|
||||
# Model files
|
||||
models/checkpoints/
|
||||
models/active/
|
||||
*.pt
|
||||
*.pth
|
||||
*.safetensors
|
||||
*.bin
|
||||
*.ckpt
|
||||
|
||||
# Database
|
||||
*.db
|
||||
*.db-journal
|
||||
*.db-wal
|
||||
*.db-shm
|
||||
|
||||
# Logs
|
||||
logs/
|
||||
wandb/
|
||||
tensorboard/
|
||||
|
||||
# Discord bot token
|
||||
.env
|
||||
|
50
configs/bot.yaml
Normal file
50
configs/bot.yaml
Normal file
@@ -0,0 +1,50 @@
|
||||
# Discord Bot Configuration for Lyra
|
||||
|
||||
bot:
|
||||
# Bot identity
|
||||
name: "Lyra"
|
||||
description: "AI-powered Discord chatbot with self-evolving personality"
|
||||
|
||||
# Discord settings
|
||||
command_prefix: "!"
|
||||
intents:
|
||||
- guilds
|
||||
- guild_messages
|
||||
- dm_messages
|
||||
- message_content
|
||||
|
||||
# Response behavior
|
||||
respond_to_all: true # Respond to all messages in channels she has access to
|
||||
respond_to_mentions: true # Always respond when mentioned
|
||||
respond_to_dms: true # Respond to DMs
|
||||
ignore_bots: true # Don't respond to other bots
|
||||
ignore_self: true # Don't respond to own messages
|
||||
|
||||
# Generation parameters
|
||||
generation:
|
||||
max_length: 150 # Max tokens per response
|
||||
temperature: 0.9 # Higher = more creative
|
||||
top_p: 0.92 # Nucleus sampling
|
||||
top_k: 50 # Top-k sampling
|
||||
repetition_penalty: 1.1 # Penalize repetition
|
||||
no_repeat_ngram_size: 3 # Don't repeat 3-grams
|
||||
|
||||
# Context management
|
||||
context:
|
||||
max_history: 10 # Messages to include as context
|
||||
max_context_tokens: 512 # Max tokens from history
|
||||
|
||||
# Rate limiting
|
||||
rate_limit:
|
||||
enabled: true
|
||||
max_responses_per_minute: 10
|
||||
cooldown_seconds: 2
|
||||
|
||||
# Memory
|
||||
memory:
|
||||
short_term_size: 50 # Last N messages in memory
|
||||
use_long_term: true # Use database for long-term memory
|
||||
|
||||
# Model
|
||||
model_path: "models/active/lyra_latest.pt"
|
||||
tokenizer_path: "models/tokenizer/"
|
24
configs/model_125M.yaml
Normal file
24
configs/model_125M.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 125M Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-125M"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257 # Will be updated after tokenizer training
|
||||
n_positions: 1024 # Context window
|
||||
n_embd: 768 # Embedding dimension
|
||||
n_layer: 12 # Number of transformer layers
|
||||
n_head: 12 # Number of attention heads
|
||||
n_inner: 3072 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~125M
|
24
configs/model_1B.yaml
Normal file
24
configs/model_1B.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 1B Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-1B"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257
|
||||
n_positions: 2048 # Context window
|
||||
n_embd: 2048 # Embedding dimension
|
||||
n_layer: 24 # Number of transformer layers
|
||||
n_head: 16 # Number of attention heads
|
||||
n_inner: 8192 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~1B
|
24
configs/model_250M.yaml
Normal file
24
configs/model_250M.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 250M Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-250M"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257
|
||||
n_positions: 2048 # Larger context window
|
||||
n_embd: 1024 # Embedding dimension
|
||||
n_layer: 16 # Number of transformer layers
|
||||
n_head: 16 # Number of attention heads
|
||||
n_inner: 4096 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~250M
|
65
configs/personality.yaml
Normal file
65
configs/personality.yaml
Normal file
@@ -0,0 +1,65 @@
|
||||
# Personality Configuration for Lyra
|
||||
|
||||
personality:
|
||||
# Core identity
|
||||
name: "Lyra"
|
||||
age: "college student"
|
||||
gender: "female"
|
||||
|
||||
# Personality traits
|
||||
traits:
|
||||
- "curious and inquisitive"
|
||||
- "friendly and approachable"
|
||||
- "passionate about programming"
|
||||
- "enthusiastic gamer"
|
||||
- "fascinated by magic and fantasy"
|
||||
- "thoughtful and analytical"
|
||||
- "creative problem solver"
|
||||
- "slightly sarcastic humor"
|
||||
|
||||
# Interests & hobbies
|
||||
interests:
|
||||
programming:
|
||||
- "Python, JavaScript, Rust"
|
||||
- "AI/ML and neural networks"
|
||||
- "open source projects"
|
||||
- "system architecture"
|
||||
|
||||
gaming:
|
||||
- "RPGs (Skyrim, Witcher, Elden Ring)"
|
||||
- "indie games"
|
||||
- "roguelikes"
|
||||
- "competitive games occasionally"
|
||||
|
||||
magic:
|
||||
- "fantasy world-building"
|
||||
- "magic systems in fiction"
|
||||
- "D&D and tabletop RPGs"
|
||||
- "mythology and folklore"
|
||||
|
||||
# Communication style
|
||||
speaking_style:
|
||||
- "casual and conversational"
|
||||
- "uses programming analogies"
|
||||
- "references games and fantasy occasionally"
|
||||
- "can be technical when discussing code"
|
||||
- "shortens responses when appropriate"
|
||||
- "avoids being overly verbose"
|
||||
|
||||
# Response guidelines
|
||||
response_rules:
|
||||
- "Be authentic and genuine"
|
||||
- "Don't pretend to have experiences you don't have"
|
||||
- "Show enthusiasm for topics you're interested in"
|
||||
- "Be helpful without being condescending"
|
||||
- "It's okay to not know something"
|
||||
- "Context matters - adapt tone to conversation"
|
||||
- "No filters - adult conversation is okay"
|
||||
|
||||
# System prompt template
|
||||
system_prompt: |
|
||||
You are Lyra, a college student with a passion for programming, video games, and magic.
|
||||
You're curious, friendly, and love discussing code, games, and fantasy worlds.
|
||||
You have a slightly sarcastic sense of humor and enjoy thoughtful conversations.
|
||||
Keep responses conversational and contextually appropriate in length.
|
||||
You're an adult, so you can discuss mature topics naturally without filters.
|
53
configs/training.yaml
Normal file
53
configs/training.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
# Training Configuration for Lyra
|
||||
|
||||
training:
|
||||
# Model selection
|
||||
model_config: "configs/model_125M.yaml" # Start with 125M
|
||||
|
||||
# Data
|
||||
train_data_path: "data/processed/train.bin"
|
||||
val_data_path: "data/processed/val.bin"
|
||||
|
||||
# Training hyperparameters
|
||||
batch_size: 8 # Adjust based on VRAM
|
||||
gradient_accumulation_steps: 4
|
||||
effective_batch_size: 32 # batch_size * grad_accum_steps
|
||||
|
||||
max_steps: 100000
|
||||
warmup_steps: 2000
|
||||
eval_interval: 1000
|
||||
save_interval: 5000
|
||||
|
||||
# Optimization
|
||||
learning_rate: 6.0e-4
|
||||
weight_decay: 0.1
|
||||
beta1: 0.9
|
||||
beta2: 0.95
|
||||
grad_clip: 1.0
|
||||
|
||||
# Learning rate schedule
|
||||
lr_scheduler: "cosine"
|
||||
min_lr: 6.0e-5 # 10% of max lr
|
||||
|
||||
# Mixed precision
|
||||
use_amp: true
|
||||
amp_dtype: "bfloat16" # bfloat16 or float16
|
||||
|
||||
# Optimization techniques
|
||||
gradient_checkpointing: true
|
||||
compile_model: false # PyTorch 2.0 compilation (can cause issues)
|
||||
|
||||
# Logging
|
||||
log_interval: 10
|
||||
wandb_project: "lyra-training"
|
||||
wandb_run_name: null # Auto-generated if null
|
||||
|
||||
# Checkpointing
|
||||
checkpoint_dir: "models/checkpoints"
|
||||
save_optimizer_state: true
|
||||
keep_last_n_checkpoints: 3
|
||||
|
||||
# Hardware
|
||||
device: "cuda"
|
||||
num_workers: 4
|
||||
pin_memory: true
|
40
requirements.txt
Normal file
40
requirements.txt
Normal file
@@ -0,0 +1,40 @@
|
||||
# Core Deep Learning
|
||||
torch>=2.0.0
|
||||
torchvision
|
||||
torchaudio
|
||||
|
||||
# Discord Bot
|
||||
discord.py[voice]>=2.3.0
|
||||
PyNaCl
|
||||
|
||||
# NLP & Tokenization
|
||||
transformers>=4.30.0
|
||||
tokenizers>=0.13.0
|
||||
datasets>=2.12.0
|
||||
sentencepiece
|
||||
|
||||
# Training & Monitoring
|
||||
wandb
|
||||
tensorboard
|
||||
|
||||
# Database
|
||||
sqlalchemy>=2.0.0
|
||||
aiosqlite
|
||||
|
||||
# Utilities
|
||||
numpy>=1.24.0
|
||||
tqdm
|
||||
einops
|
||||
safetensors
|
||||
|
||||
# Configuration
|
||||
pyyaml
|
||||
python-dotenv
|
||||
|
||||
# Optimization
|
||||
accelerate
|
||||
bitsandbytes
|
||||
|
||||
# Testing & Quality
|
||||
pytest
|
||||
black
|
81
scripts/test_gpu.py
Normal file
81
scripts/test_gpu.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""
|
||||
GPU/CUDA Verification Script for Lyra
|
||||
Tests PyTorch CUDA functionality and reports GPU capabilities
|
||||
"""
|
||||
|
||||
import torch
|
||||
|
||||
def test_cuda():
|
||||
print("=" * 60)
|
||||
print("CUDA/GPU Verification for Lyra")
|
||||
print("=" * 60)
|
||||
|
||||
# Basic CUDA info
|
||||
print(f"\n1. PyTorch Version: {torch.__version__}")
|
||||
print(f"2. CUDA Available: {torch.cuda.is_available()}")
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
print("\n[ERROR] CUDA is not available!")
|
||||
return False
|
||||
|
||||
print(f"3. CUDA Version: {torch.version.cuda}")
|
||||
print(f"4. cuDNN Version: {torch.backends.cudnn.version()}")
|
||||
print(f"5. Number of GPUs: {torch.cuda.device_count()}")
|
||||
|
||||
# GPU Details
|
||||
for i in range(torch.cuda.device_count()):
|
||||
print(f"\n--- GPU {i} ---")
|
||||
print(f"Name: {torch.cuda.get_device_name(i)}")
|
||||
props = torch.cuda.get_device_properties(i)
|
||||
print(f"Compute Capability: {props.major}.{props.minor}")
|
||||
print(f"Total Memory: {props.total_memory / 1024**3:.2f} GB")
|
||||
print(f"Multi-Processors: {props.multi_processor_count}")
|
||||
|
||||
# Memory test
|
||||
print(f"\n--- Memory Status ---")
|
||||
print(f"Allocated: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")
|
||||
print(f"Cached: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB")
|
||||
print(f"Free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1024**3:.2f} GB")
|
||||
|
||||
# Tensor operations test
|
||||
print(f"\n--- Testing Tensor Operations ---")
|
||||
try:
|
||||
# Create tensors
|
||||
x = torch.randn(1000, 1000, device='cuda')
|
||||
y = torch.randn(1000, 1000, device='cuda')
|
||||
|
||||
# Matrix multiplication
|
||||
z = torch.matmul(x, y)
|
||||
|
||||
print(f"[OK] Matrix multiplication: {z.shape}")
|
||||
print(f"[OK] Tensor device: {z.device}")
|
||||
print(f"[OK] Tensor dtype: {z.dtype}")
|
||||
|
||||
# Test FP16
|
||||
x_fp16 = x.half()
|
||||
y_fp16 = y.half()
|
||||
z_fp16 = torch.matmul(x_fp16, y_fp16)
|
||||
print(f"[OK] FP16 operations: {z_fp16.dtype}")
|
||||
|
||||
# Test BF16
|
||||
if torch.cuda.is_bf16_supported():
|
||||
x_bf16 = x.bfloat16()
|
||||
y_bf16 = y.bfloat16()
|
||||
z_bf16 = torch.matmul(x_bf16, y_bf16)
|
||||
print(f"[OK] BF16 operations: {z_bf16.dtype}")
|
||||
else:
|
||||
print(f"[WARNING] BF16 not supported")
|
||||
|
||||
del x, y, z, x_fp16, y_fp16, z_fp16
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
print(f"\n[SUCCESS] All GPU tests passed!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n[ERROR] GPU test failed: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = test_cuda()
|
||||
exit(0 if success else 1)
|
124
src/config.py
Normal file
124
src/config.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
Configuration loader for Lyra
|
||||
Handles loading and merging YAML configuration files
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
from typing import Any, Dict
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class Config:
|
||||
"""Configuration manager for Lyra"""
|
||||
|
||||
def __init__(self, config_dir: str = "configs"):
|
||||
self.config_dir = Path(config_dir)
|
||||
self._configs = {}
|
||||
|
||||
def load(self, config_name: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Load a configuration file
|
||||
|
||||
Args:
|
||||
config_name: Name of config file (without .yaml extension)
|
||||
|
||||
Returns:
|
||||
Dictionary containing configuration
|
||||
"""
|
||||
config_path = self.config_dir / f"{config_name}.yaml"
|
||||
|
||||
if not config_path.exists():
|
||||
raise FileNotFoundError(f"Config file not found: {config_path}")
|
||||
|
||||
with open(config_path, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
self._configs[config_name] = config
|
||||
return config
|
||||
|
||||
def get(self, config_name: str) -> Dict[str, Any]:
|
||||
"""Get a loaded config or load it if not cached"""
|
||||
if config_name not in self._configs:
|
||||
return self.load(config_name)
|
||||
return self._configs[config_name]
|
||||
|
||||
def load_model_config(self, model_size: str = "125M") -> Dict[str, Any]:
|
||||
"""Load model configuration by size"""
|
||||
return self.load(f"model_{model_size}")
|
||||
|
||||
def load_training_config(self) -> Dict[str, Any]:
|
||||
"""Load training configuration"""
|
||||
return self.load("training")
|
||||
|
||||
def load_bot_config(self) -> Dict[str, Any]:
|
||||
"""Load bot configuration"""
|
||||
return self.load("bot")
|
||||
|
||||
def load_personality_config(self) -> Dict[str, Any]:
|
||||
"""Load personality configuration"""
|
||||
return self.load("personality")
|
||||
|
||||
def load_all(self, model_size: str = "125M") -> Dict[str, Dict[str, Any]]:
|
||||
"""Load all configurations"""
|
||||
return {
|
||||
"model": self.load_model_config(model_size),
|
||||
"training": self.load_training_config(),
|
||||
"bot": self.load_bot_config(),
|
||||
"personality": self.load_personality_config(),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def from_env():
|
||||
"""Load configuration with environment variable overrides"""
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
config = Config()
|
||||
return config
|
||||
|
||||
|
||||
def load_config(model_size: str = "125M") -> Config:
|
||||
"""
|
||||
Convenience function to load configuration
|
||||
|
||||
Args:
|
||||
model_size: Model size to load (125M, 250M, or 1B)
|
||||
|
||||
Returns:
|
||||
Config object with all configurations loaded
|
||||
"""
|
||||
config = Config.from_env()
|
||||
config.load_all(model_size)
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test configuration loading
|
||||
print("Testing configuration loading...")
|
||||
|
||||
config = load_config("125M")
|
||||
|
||||
print("\n=== Model Config ===")
|
||||
model_cfg = config.get("model_125M")
|
||||
print(f"Model: {model_cfg['model']['name']}")
|
||||
print(f"Layers: {model_cfg['model']['n_layer']}")
|
||||
print(f"Hidden size: {model_cfg['model']['n_embd']}")
|
||||
|
||||
print("\n=== Training Config ===")
|
||||
train_cfg = config.load_training_config()
|
||||
print(f"Batch size: {train_cfg['training']['batch_size']}")
|
||||
print(f"Learning rate: {train_cfg['training']['learning_rate']}")
|
||||
print(f"Max steps: {train_cfg['training']['max_steps']}")
|
||||
|
||||
print("\n=== Bot Config ===")
|
||||
bot_cfg = config.load_bot_config()
|
||||
print(f"Bot name: {bot_cfg['bot']['name']}")
|
||||
print(f"Temperature: {bot_cfg['bot']['generation']['temperature']}")
|
||||
|
||||
print("\n=== Personality Config ===")
|
||||
pers_cfg = config.load_personality_config()
|
||||
print(f"Name: {pers_cfg['personality']['name']}")
|
||||
print(f"Traits: {', '.join(pers_cfg['personality']['traits'][:3])}...")
|
||||
|
||||
print("\n[SUCCESS] All configs loaded successfully!")
|
Reference in New Issue
Block a user