feat: Add database setup guide and local configuration files
- Added DATABASE_SETUP.md with comprehensive guide for PostgreSQL and Redis installation on Windows - Created .claude/settings.local.json with permission settings for pytest and database fix scripts - Updated .gitignore to exclude .env.backup file - Included database connection test utilities in lyra/database_setup.py - Added environment variable configuration examples for local development
This commit is contained in:
574
lyra/training/pipeline.py
Normal file
574
lyra/training/pipeline.py
Normal file
@@ -0,0 +1,574 @@
|
||||
"""
|
||||
Advanced training pipeline for Lyra with sliding context window and adaptive learning.
|
||||
|
||||
Implements sophisticated training strategies including:
|
||||
- Sliding context window for long conversations
|
||||
- Dynamic curriculum based on Lyra's emotional and personality state
|
||||
- Memory consolidation and replay
|
||||
- Human-like learning patterns
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
from torch.optim import AdamW
|
||||
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
|
||||
import numpy as np
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import json
|
||||
import asyncio
|
||||
from collections import deque
|
||||
import random
|
||||
|
||||
from ..config import config
|
||||
from ..core.lyra_model import LyraModel
|
||||
from ..database.manager import DatabaseManager
|
||||
from ..emotions.system import EmotionalState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrainingBatch:
|
||||
"""Represents a training batch with context."""
|
||||
input_ids: torch.Tensor
|
||||
attention_mask: torch.Tensor
|
||||
target_ids: torch.Tensor
|
||||
emotional_context: torch.Tensor
|
||||
personality_context: torch.Tensor
|
||||
conversation_id: str
|
||||
turn_index: int
|
||||
metadata: Dict[str, Any]
|
||||
|
||||
|
||||
@dataclass
|
||||
class LearningMemory:
|
||||
"""Represents a significant learning memory."""
|
||||
conversation_embedding: torch.Tensor
|
||||
emotional_state: EmotionalState
|
||||
user_feedback: float
|
||||
learning_outcome: str
|
||||
timestamp: datetime
|
||||
replay_count: int = 0
|
||||
|
||||
|
||||
class ConversationDataset(Dataset):
|
||||
"""Dataset for conversation training with sliding windows."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
conversations: List[Dict[str, Any]],
|
||||
tokenizer,
|
||||
max_length: int = 512,
|
||||
sliding_window: int = 256,
|
||||
overlap: int = 64
|
||||
):
|
||||
self.conversations = conversations
|
||||
self.tokenizer = tokenizer
|
||||
self.max_length = max_length
|
||||
self.sliding_window = sliding_window
|
||||
self.overlap = overlap
|
||||
self.samples = self._prepare_samples()
|
||||
|
||||
def _prepare_samples(self) -> List[Dict[str, Any]]:
|
||||
"""Prepare training samples with sliding windows."""
|
||||
samples = []
|
||||
|
||||
for conv in self.conversations:
|
||||
# Extract conversation turns
|
||||
turns = conv.get('turns', [])
|
||||
full_text = ""
|
||||
|
||||
# Build conversation context
|
||||
for i, turn in enumerate(turns):
|
||||
if turn['role'] == 'user':
|
||||
full_text += f"User: {turn['content']}\n"
|
||||
elif turn['role'] == 'assistant':
|
||||
full_text += f"Lyra: {turn['content']}\n"
|
||||
|
||||
# Create sliding windows
|
||||
tokens = self.tokenizer.encode(full_text)
|
||||
|
||||
for start_idx in range(0, len(tokens) - self.sliding_window,
|
||||
self.sliding_window - self.overlap):
|
||||
end_idx = min(start_idx + self.sliding_window, len(tokens))
|
||||
window_tokens = tokens[start_idx:end_idx]
|
||||
|
||||
if len(window_tokens) < 32: # Skip very short windows
|
||||
continue
|
||||
|
||||
# Target is the next token sequence
|
||||
input_tokens = window_tokens[:-1]
|
||||
target_tokens = window_tokens[1:]
|
||||
|
||||
samples.append({
|
||||
'input_ids': input_tokens,
|
||||
'target_ids': target_tokens,
|
||||
'conversation_id': conv.get('id', ''),
|
||||
'emotional_context': conv.get('emotional_state', {}),
|
||||
'personality_context': conv.get('personality_state', {}),
|
||||
'metadata': conv.get('metadata', {})
|
||||
})
|
||||
|
||||
return samples
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.samples)
|
||||
|
||||
def __getitem__(self, idx: int) -> Dict[str, Any]:
|
||||
return self.samples[idx]
|
||||
|
||||
|
||||
class AdaptiveLearningScheduler:
|
||||
"""Adaptive learning rate based on emotional and personality state."""
|
||||
|
||||
def __init__(self, base_lr: float = 1e-4):
|
||||
self.base_lr = base_lr
|
||||
self.emotional_multipliers = {
|
||||
'joy': 1.2, # Learn faster when happy
|
||||
'curiosity': 1.5, # Learn much faster when curious
|
||||
'frustration': 0.7, # Learn slower when frustrated
|
||||
'confusion': 0.5, # Learn slower when confused
|
||||
'confidence': 1.1 # Learn slightly faster when confident
|
||||
}
|
||||
|
||||
def get_learning_rate(
|
||||
self,
|
||||
emotional_state: EmotionalState,
|
||||
personality_openness: float,
|
||||
recent_performance: float
|
||||
) -> float:
|
||||
"""Calculate adaptive learning rate."""
|
||||
# Base rate adjustment
|
||||
lr = self.base_lr
|
||||
|
||||
# Emotional adjustment
|
||||
dominant_emotion, intensity = emotional_state.get_dominant_emotion()
|
||||
if dominant_emotion in self.emotional_multipliers:
|
||||
lr *= self.emotional_multipliers[dominant_emotion] * intensity
|
||||
|
||||
# Personality adjustment (openness to experience)
|
||||
lr *= (1.0 + personality_openness * 0.3)
|
||||
|
||||
# Performance adjustment
|
||||
if recent_performance > 0.8:
|
||||
lr *= 1.1 # Increase when performing well
|
||||
elif recent_performance < 0.4:
|
||||
lr *= 0.8 # Decrease when struggling
|
||||
|
||||
return max(lr, self.base_lr * 0.1) # Don't go too low
|
||||
|
||||
|
||||
class LyraTrainingPipeline:
|
||||
"""Complete training pipeline for Lyra with human-like learning patterns."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: LyraModel,
|
||||
tokenizer,
|
||||
device: torch.device,
|
||||
database_manager: Optional[DatabaseManager] = None
|
||||
):
|
||||
self.model = model
|
||||
self.tokenizer = tokenizer
|
||||
self.device = device
|
||||
self.database_manager = database_manager
|
||||
|
||||
# Training components
|
||||
self.optimizer = AdamW(model.parameters(), lr=config.learning_rate)
|
||||
self.scheduler = CosineAnnealingWarmRestarts(
|
||||
self.optimizer, T_0=1000, eta_min=1e-6
|
||||
)
|
||||
self.adaptive_scheduler = AdaptiveLearningScheduler()
|
||||
|
||||
# Memory systems
|
||||
self.learning_memories = deque(maxlen=1000)
|
||||
self.replay_buffer = deque(maxlen=5000)
|
||||
|
||||
# Training state
|
||||
self.global_step = 0
|
||||
self.epoch = 0
|
||||
self.best_performance = 0.0
|
||||
self.training_history = []
|
||||
|
||||
# Human-like learning patterns
|
||||
self.forgetting_curve = self._initialize_forgetting_curve()
|
||||
self.consolidation_schedule = self._create_consolidation_schedule()
|
||||
|
||||
def _initialize_forgetting_curve(self) -> Dict[str, float]:
|
||||
"""Initialize forgetting curve parameters."""
|
||||
return {
|
||||
'initial_strength': 1.0,
|
||||
'decay_rate': 0.05,
|
||||
'consolidation_boost': 1.3,
|
||||
'interference_factor': 0.1
|
||||
}
|
||||
|
||||
def _create_consolidation_schedule(self) -> List[int]:
|
||||
"""Create memory consolidation schedule (like sleep cycles)."""
|
||||
# Consolidate at increasing intervals: 1h, 6h, 24h, 72h, 168h
|
||||
return [100, 600, 2400, 7200, 16800] # In training steps
|
||||
|
||||
async def train_epoch(
|
||||
self,
|
||||
train_dataloader: DataLoader,
|
||||
val_dataloader: Optional[DataLoader] = None
|
||||
) -> Dict[str, float]:
|
||||
"""Train for one epoch with adaptive learning."""
|
||||
self.model.train()
|
||||
|
||||
epoch_loss = 0.0
|
||||
num_batches = 0
|
||||
emotional_adjustments = 0
|
||||
|
||||
for batch_idx, batch in enumerate(train_dataloader):
|
||||
# Move batch to device
|
||||
batch = self._prepare_batch(batch)
|
||||
|
||||
# Get current emotional and personality state
|
||||
emotional_state = self._get_current_emotional_state()
|
||||
personality_state = self._get_current_personality_state()
|
||||
|
||||
# Adaptive learning rate
|
||||
current_performance = self._calculate_recent_performance()
|
||||
adaptive_lr = self.adaptive_scheduler.get_learning_rate(
|
||||
emotional_state,
|
||||
personality_state.get('openness', 0.5),
|
||||
current_performance
|
||||
)
|
||||
|
||||
# Adjust optimizer learning rate if significantly different
|
||||
current_lr = self.optimizer.param_groups[0]['lr']
|
||||
if abs(adaptive_lr - current_lr) > current_lr * 0.1:
|
||||
for param_group in self.optimizer.param_groups:
|
||||
param_group['lr'] = adaptive_lr
|
||||
emotional_adjustments += 1
|
||||
|
||||
# Forward pass
|
||||
self.optimizer.zero_grad()
|
||||
|
||||
outputs, lyra_info = self.model(
|
||||
input_ids=batch['input_ids'],
|
||||
attention_mask=batch['attention_mask'],
|
||||
user_id=batch.get('user_id'),
|
||||
conversation_context=batch.get('context')
|
||||
)
|
||||
|
||||
# Calculate loss
|
||||
loss = self._calculate_adaptive_loss(
|
||||
outputs, batch['target_ids'], emotional_state
|
||||
)
|
||||
|
||||
# Backward pass
|
||||
loss.backward()
|
||||
|
||||
# Gradient clipping (human-like learning stability)
|
||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
|
||||
|
||||
# Optimizer step
|
||||
self.optimizer.step()
|
||||
self.scheduler.step()
|
||||
|
||||
# Update training state
|
||||
epoch_loss += loss.item()
|
||||
num_batches += 1
|
||||
self.global_step += 1
|
||||
|
||||
# Memory consolidation
|
||||
if self.global_step in self.consolidation_schedule:
|
||||
await self._consolidate_memories()
|
||||
|
||||
# Experience replay (20% chance)
|
||||
if random.random() < 0.2 and len(self.replay_buffer) > 10:
|
||||
await self._experience_replay()
|
||||
|
||||
# Log progress
|
||||
if batch_idx % 100 == 0:
|
||||
logger.info(
|
||||
f"Epoch {self.epoch}, Batch {batch_idx}, "
|
||||
f"Loss: {loss.item():.4f}, "
|
||||
f"LR: {adaptive_lr:.2e}, "
|
||||
f"Emotional adjustments: {emotional_adjustments}"
|
||||
)
|
||||
|
||||
# Validation
|
||||
val_metrics = {}
|
||||
if val_dataloader:
|
||||
val_metrics = await self._validate(val_dataloader)
|
||||
|
||||
# Record training history
|
||||
epoch_metrics = {
|
||||
'epoch': self.epoch,
|
||||
'train_loss': epoch_loss / num_batches,
|
||||
'learning_rate': self.optimizer.param_groups[0]['lr'],
|
||||
'emotional_adjustments': emotional_adjustments,
|
||||
'global_step': self.global_step,
|
||||
**val_metrics
|
||||
}
|
||||
|
||||
self.training_history.append(epoch_metrics)
|
||||
self.epoch += 1
|
||||
|
||||
return epoch_metrics
|
||||
|
||||
def _prepare_batch(self, batch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
|
||||
"""Prepare batch for training."""
|
||||
prepared = {}
|
||||
|
||||
for key, value in batch.items():
|
||||
if isinstance(value, torch.Tensor):
|
||||
prepared[key] = value.to(self.device)
|
||||
elif isinstance(value, list):
|
||||
# Convert list to tensor if numeric
|
||||
try:
|
||||
prepared[key] = torch.tensor(value).to(self.device)
|
||||
except:
|
||||
prepared[key] = value
|
||||
else:
|
||||
prepared[key] = value
|
||||
|
||||
return prepared
|
||||
|
||||
def _get_current_emotional_state(self) -> EmotionalState:
|
||||
"""Get Lyra's current emotional state."""
|
||||
# This would normally come from the emotional system
|
||||
# For now, create a default state
|
||||
emotions = torch.rand(19) # 19 emotion dimensions
|
||||
return EmotionalState.from_tensor(emotions, self.device)
|
||||
|
||||
def _get_current_personality_state(self) -> Dict[str, float]:
|
||||
"""Get current personality traits."""
|
||||
return {
|
||||
'openness': 0.7,
|
||||
'conscientiousness': 0.8,
|
||||
'extraversion': 0.6,
|
||||
'agreeableness': 0.9,
|
||||
'neuroticism': 0.3
|
||||
}
|
||||
|
||||
def _calculate_recent_performance(self) -> float:
|
||||
"""Calculate recent performance score."""
|
||||
if not self.training_history:
|
||||
return 0.5
|
||||
|
||||
recent_epochs = self.training_history[-5:] # Last 5 epochs
|
||||
if not recent_epochs:
|
||||
return 0.5
|
||||
|
||||
# Simple performance metric based on loss improvement
|
||||
losses = [epoch['train_loss'] for epoch in recent_epochs]
|
||||
if len(losses) < 2:
|
||||
return 0.5
|
||||
|
||||
improvement = (losses[0] - losses[-1]) / losses[0]
|
||||
return min(max(0.5 + improvement, 0.0), 1.0)
|
||||
|
||||
def _calculate_adaptive_loss(
|
||||
self,
|
||||
outputs: torch.Tensor,
|
||||
targets: torch.Tensor,
|
||||
emotional_state: EmotionalState
|
||||
) -> torch.Tensor:
|
||||
"""Calculate loss adjusted for emotional state."""
|
||||
# Base cross-entropy loss
|
||||
base_loss = nn.CrossEntropyLoss()(
|
||||
outputs.view(-1, outputs.size(-1)),
|
||||
targets.view(-1)
|
||||
)
|
||||
|
||||
# Emotional adjustment
|
||||
dominant_emotion, intensity = emotional_state.get_dominant_emotion()
|
||||
|
||||
if dominant_emotion == 'frustration' and intensity > 0.7:
|
||||
# Reduce learning when frustrated (like humans)
|
||||
base_loss *= 0.8
|
||||
elif dominant_emotion == 'curiosity' and intensity > 0.6:
|
||||
# Increase learning when curious
|
||||
base_loss *= 1.2
|
||||
|
||||
return base_loss
|
||||
|
||||
async def _consolidate_memories(self):
|
||||
"""Consolidate important memories (like sleep-based learning)."""
|
||||
if not self.learning_memories:
|
||||
return
|
||||
|
||||
logger.info(f"Consolidating {len(self.learning_memories)} memories...")
|
||||
|
||||
# Sort memories by importance (feedback score + recency)
|
||||
important_memories = sorted(
|
||||
self.learning_memories,
|
||||
key=lambda m: m.user_feedback * (1.0 - m.replay_count * 0.1),
|
||||
reverse=True
|
||||
)[:50] # Top 50 memories
|
||||
|
||||
# Replay important memories
|
||||
for memory in important_memories[:10]:
|
||||
# Convert memory to training sample
|
||||
self.replay_buffer.append({
|
||||
'conversation_embedding': memory.conversation_embedding,
|
||||
'emotional_state': memory.emotional_state,
|
||||
'feedback': memory.user_feedback,
|
||||
'outcome': memory.learning_outcome
|
||||
})
|
||||
memory.replay_count += 1
|
||||
|
||||
logger.info("Memory consolidation complete")
|
||||
|
||||
async def _experience_replay(self):
|
||||
"""Replay past experiences for better learning."""
|
||||
if len(self.replay_buffer) < 5:
|
||||
return
|
||||
|
||||
# Sample random memories
|
||||
replay_samples = random.sample(list(self.replay_buffer), min(5, len(self.replay_buffer)))
|
||||
|
||||
# Process replay samples (simplified)
|
||||
for sample in replay_samples:
|
||||
# This would normally involve re-training on the sample
|
||||
# For now, just log the replay
|
||||
logger.debug(f"Replaying memory with feedback: {sample['feedback']}")
|
||||
|
||||
async def _validate(self, val_dataloader: DataLoader) -> Dict[str, float]:
|
||||
"""Validate model performance."""
|
||||
self.model.eval()
|
||||
|
||||
total_loss = 0.0
|
||||
num_batches = 0
|
||||
|
||||
with torch.no_grad():
|
||||
for batch in val_dataloader:
|
||||
batch = self._prepare_batch(batch)
|
||||
|
||||
outputs, _ = self.model(
|
||||
input_ids=batch['input_ids'],
|
||||
attention_mask=batch['attention_mask']
|
||||
)
|
||||
|
||||
loss = nn.CrossEntropyLoss()(
|
||||
outputs.view(-1, outputs.size(-1)),
|
||||
batch['target_ids'].view(-1)
|
||||
)
|
||||
|
||||
total_loss += loss.item()
|
||||
num_batches += 1
|
||||
|
||||
self.model.train()
|
||||
|
||||
avg_val_loss = total_loss / num_batches if num_batches > 0 else 0.0
|
||||
|
||||
return {
|
||||
'val_loss': avg_val_loss,
|
||||
'perplexity': torch.exp(torch.tensor(avg_val_loss)).item()
|
||||
}
|
||||
|
||||
async def save_checkpoint(self, filepath: Path, metadata: Optional[Dict] = None):
|
||||
"""Save training checkpoint."""
|
||||
checkpoint = {
|
||||
'model_state_dict': self.model.state_dict(),
|
||||
'optimizer_state_dict': self.optimizer.state_dict(),
|
||||
'scheduler_state_dict': self.scheduler.state_dict(),
|
||||
'global_step': self.global_step,
|
||||
'epoch': self.epoch,
|
||||
'training_history': self.training_history,
|
||||
'best_performance': self.best_performance,
|
||||
'learning_memories': list(self.learning_memories),
|
||||
'forgetting_curve': self.forgetting_curve,
|
||||
'metadata': metadata or {}
|
||||
}
|
||||
|
||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
torch.save(checkpoint, filepath)
|
||||
|
||||
logger.info(f"Checkpoint saved to {filepath}")
|
||||
|
||||
async def load_checkpoint(self, filepath: Path):
|
||||
"""Load training checkpoint."""
|
||||
checkpoint = torch.load(filepath, map_location=self.device)
|
||||
|
||||
self.model.load_state_dict(checkpoint['model_state_dict'])
|
||||
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
|
||||
self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
|
||||
|
||||
self.global_step = checkpoint.get('global_step', 0)
|
||||
self.epoch = checkpoint.get('epoch', 0)
|
||||
self.training_history = checkpoint.get('training_history', [])
|
||||
self.best_performance = checkpoint.get('best_performance', 0.0)
|
||||
self.learning_memories = deque(
|
||||
checkpoint.get('learning_memories', []), maxlen=1000
|
||||
)
|
||||
self.forgetting_curve = checkpoint.get('forgetting_curve', self.forgetting_curve)
|
||||
|
||||
logger.info(f"Checkpoint loaded from {filepath}")
|
||||
|
||||
def add_learning_memory(
|
||||
self,
|
||||
conversation_embedding: torch.Tensor,
|
||||
emotional_state: EmotionalState,
|
||||
user_feedback: float,
|
||||
learning_outcome: str
|
||||
):
|
||||
"""Add a significant learning memory."""
|
||||
memory = LearningMemory(
|
||||
conversation_embedding=conversation_embedding,
|
||||
emotional_state=emotional_state,
|
||||
user_feedback=user_feedback,
|
||||
learning_outcome=learning_outcome,
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
|
||||
self.learning_memories.append(memory)
|
||||
|
||||
def get_training_statistics(self) -> Dict[str, Any]:
|
||||
"""Get comprehensive training statistics."""
|
||||
if not self.training_history:
|
||||
return {'status': 'no_training_data'}
|
||||
|
||||
recent_performance = self._calculate_recent_performance()
|
||||
|
||||
return {
|
||||
'global_step': self.global_step,
|
||||
'current_epoch': self.epoch,
|
||||
'total_epochs_trained': len(self.training_history),
|
||||
'recent_performance': recent_performance,
|
||||
'best_performance': self.best_performance,
|
||||
'learning_memories_count': len(self.learning_memories),
|
||||
'replay_buffer_size': len(self.replay_buffer),
|
||||
'current_learning_rate': self.optimizer.param_groups[0]['lr'],
|
||||
'last_consolidation': max(
|
||||
[step for step in self.consolidation_schedule if step <= self.global_step],
|
||||
default=0
|
||||
),
|
||||
'training_history_summary': {
|
||||
'best_train_loss': min(h['train_loss'] for h in self.training_history),
|
||||
'latest_train_loss': self.training_history[-1]['train_loss'],
|
||||
'average_emotional_adjustments': np.mean([
|
||||
h['emotional_adjustments'] for h in self.training_history
|
||||
])
|
||||
} if self.training_history else {}
|
||||
}
|
||||
|
||||
|
||||
async def create_training_pipeline(
|
||||
model: LyraModel,
|
||||
tokenizer,
|
||||
device: torch.device,
|
||||
database_manager: Optional[DatabaseManager] = None
|
||||
) -> LyraTrainingPipeline:
|
||||
"""Create and initialize training pipeline."""
|
||||
pipeline = LyraTrainingPipeline(model, tokenizer, device, database_manager)
|
||||
|
||||
# Load existing checkpoint if available
|
||||
checkpoint_path = Path(config.models_dir) / "checkpoints" / "latest_training.pt"
|
||||
if checkpoint_path.exists():
|
||||
try:
|
||||
await pipeline.load_checkpoint(checkpoint_path)
|
||||
logger.info("Loaded existing training checkpoint")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load checkpoint: {e}")
|
||||
|
||||
return pipeline
|
Reference in New Issue
Block a user