feat: Add database setup guide and local configuration files

- Added DATABASE_SETUP.md with comprehensive guide for PostgreSQL and Redis installation on Windows - Created .claude/settings.local.json with permission settings for pytest and database fix scripts - Updated .gitignore to exclude .env.backup file - Included database connection test utilities in lyra/database_setup.py - Added environment variable configuration examples for local development
2025-09-29 16:29:18 -04:00
parent faa23d596e
commit d9c526fa5c
26 changed files with 3624 additions and 39 deletions
--- a/lyra/training/pipeline.py
+++ b/lyra/training/pipeline.py
@@ -0,0 +1,574 @@
+"""
+Advanced training pipeline for Lyra with sliding context window and adaptive learning.
+
+Implements sophisticated training strategies including:
+- Sliding context window for long conversations
+- Dynamic curriculum based on Lyra's emotional and personality state
+- Memory consolidation and replay
+- Human-like learning patterns
+"""
+
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader, Dataset
+from torch.optim import AdamW
+from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
+import numpy as np
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple, Any
+from dataclasses import dataclass
+from datetime import datetime
+import json
+import asyncio
+from collections import deque
+import random
+
+from ..config import config
+from ..core.lyra_model import LyraModel
+from ..database.manager import DatabaseManager
+from ..emotions.system import EmotionalState
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TrainingBatch:
+    """Represents a training batch with context."""
+    input_ids: torch.Tensor
+    attention_mask: torch.Tensor
+    target_ids: torch.Tensor
+    emotional_context: torch.Tensor
+    personality_context: torch.Tensor
+    conversation_id: str
+    turn_index: int
+    metadata: Dict[str, Any]
+
+
+@dataclass
+class LearningMemory:
+    """Represents a significant learning memory."""
+    conversation_embedding: torch.Tensor
+    emotional_state: EmotionalState
+    user_feedback: float
+    learning_outcome: str
+    timestamp: datetime
+    replay_count: int = 0
+
+
+class ConversationDataset(Dataset):
+    """Dataset for conversation training with sliding windows."""
+
+    def __init__(
+        self,
+        conversations: List[Dict[str, Any]],
+        tokenizer,
+        max_length: int = 512,
+        sliding_window: int = 256,
+        overlap: int = 64
+    ):
+        self.conversations = conversations
+        self.tokenizer = tokenizer
+        self.max_length = max_length
+        self.sliding_window = sliding_window
+        self.overlap = overlap
+        self.samples = self._prepare_samples()
+
+    def _prepare_samples(self) -> List[Dict[str, Any]]:
+        """Prepare training samples with sliding windows."""
+        samples = []
+
+        for conv in self.conversations:
+            # Extract conversation turns
+            turns = conv.get('turns', [])
+            full_text = ""
+
+            # Build conversation context
+            for i, turn in enumerate(turns):
+                if turn['role'] == 'user':
+                    full_text += f"User: {turn['content']}\n"
+                elif turn['role'] == 'assistant':
+                    full_text += f"Lyra: {turn['content']}\n"
+
+            # Create sliding windows
+            tokens = self.tokenizer.encode(full_text)
+
+            for start_idx in range(0, len(tokens) - self.sliding_window,
+                                 self.sliding_window - self.overlap):
+                end_idx = min(start_idx + self.sliding_window, len(tokens))
+                window_tokens = tokens[start_idx:end_idx]
+
+                if len(window_tokens) < 32:  # Skip very short windows
+                    continue
+
+                # Target is the next token sequence
+                input_tokens = window_tokens[:-1]
+                target_tokens = window_tokens[1:]
+
+                samples.append({
+                    'input_ids': input_tokens,
+                    'target_ids': target_tokens,
+                    'conversation_id': conv.get('id', ''),
+                    'emotional_context': conv.get('emotional_state', {}),
+                    'personality_context': conv.get('personality_state', {}),
+                    'metadata': conv.get('metadata', {})
+                })
+
+        return samples
+
+    def __len__(self) -> int:
+        return len(self.samples)
+
+    def __getitem__(self, idx: int) -> Dict[str, Any]:
+        return self.samples[idx]
+
+
+class AdaptiveLearningScheduler:
+    """Adaptive learning rate based on emotional and personality state."""
+
+    def __init__(self, base_lr: float = 1e-4):
+        self.base_lr = base_lr
+        self.emotional_multipliers = {
+            'joy': 1.2,        # Learn faster when happy
+            'curiosity': 1.5,  # Learn much faster when curious
+            'frustration': 0.7, # Learn slower when frustrated
+            'confusion': 0.5,   # Learn slower when confused
+            'confidence': 1.1   # Learn slightly faster when confident
+        }
+
+    def get_learning_rate(
+        self,
+        emotional_state: EmotionalState,
+        personality_openness: float,
+        recent_performance: float
+    ) -> float:
+        """Calculate adaptive learning rate."""
+        # Base rate adjustment
+        lr = self.base_lr
+
+        # Emotional adjustment
+        dominant_emotion, intensity = emotional_state.get_dominant_emotion()
+        if dominant_emotion in self.emotional_multipliers:
+            lr *= self.emotional_multipliers[dominant_emotion] * intensity
+
+        # Personality adjustment (openness to experience)
+        lr *= (1.0 + personality_openness * 0.3)
+
+        # Performance adjustment
+        if recent_performance > 0.8:
+            lr *= 1.1  # Increase when performing well
+        elif recent_performance < 0.4:
+            lr *= 0.8  # Decrease when struggling
+
+        return max(lr, self.base_lr * 0.1)  # Don't go too low
+
+
+class LyraTrainingPipeline:
+    """Complete training pipeline for Lyra with human-like learning patterns."""
+
+    def __init__(
+        self,
+        model: LyraModel,
+        tokenizer,
+        device: torch.device,
+        database_manager: Optional[DatabaseManager] = None
+    ):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.device = device
+        self.database_manager = database_manager
+
+        # Training components
+        self.optimizer = AdamW(model.parameters(), lr=config.learning_rate)
+        self.scheduler = CosineAnnealingWarmRestarts(
+            self.optimizer, T_0=1000, eta_min=1e-6
+        )
+        self.adaptive_scheduler = AdaptiveLearningScheduler()
+
+        # Memory systems
+        self.learning_memories = deque(maxlen=1000)
+        self.replay_buffer = deque(maxlen=5000)
+
+        # Training state
+        self.global_step = 0
+        self.epoch = 0
+        self.best_performance = 0.0
+        self.training_history = []
+
+        # Human-like learning patterns
+        self.forgetting_curve = self._initialize_forgetting_curve()
+        self.consolidation_schedule = self._create_consolidation_schedule()
+
+    def _initialize_forgetting_curve(self) -> Dict[str, float]:
+        """Initialize forgetting curve parameters."""
+        return {
+            'initial_strength': 1.0,
+            'decay_rate': 0.05,
+            'consolidation_boost': 1.3,
+            'interference_factor': 0.1
+        }
+
+    def _create_consolidation_schedule(self) -> List[int]:
+        """Create memory consolidation schedule (like sleep cycles)."""
+        # Consolidate at increasing intervals: 1h, 6h, 24h, 72h, 168h
+        return [100, 600, 2400, 7200, 16800]  # In training steps
+
+    async def train_epoch(
+        self,
+        train_dataloader: DataLoader,
+        val_dataloader: Optional[DataLoader] = None
+    ) -> Dict[str, float]:
+        """Train for one epoch with adaptive learning."""
+        self.model.train()
+
+        epoch_loss = 0.0
+        num_batches = 0
+        emotional_adjustments = 0
+
+        for batch_idx, batch in enumerate(train_dataloader):
+            # Move batch to device
+            batch = self._prepare_batch(batch)
+
+            # Get current emotional and personality state
+            emotional_state = self._get_current_emotional_state()
+            personality_state = self._get_current_personality_state()
+
+            # Adaptive learning rate
+            current_performance = self._calculate_recent_performance()
+            adaptive_lr = self.adaptive_scheduler.get_learning_rate(
+                emotional_state,
+                personality_state.get('openness', 0.5),
+                current_performance
+            )
+
+            # Adjust optimizer learning rate if significantly different
+            current_lr = self.optimizer.param_groups[0]['lr']
+            if abs(adaptive_lr - current_lr) > current_lr * 0.1:
+                for param_group in self.optimizer.param_groups:
+                    param_group['lr'] = adaptive_lr
+                emotional_adjustments += 1
+
+            # Forward pass
+            self.optimizer.zero_grad()
+
+            outputs, lyra_info = self.model(
+                input_ids=batch['input_ids'],
+                attention_mask=batch['attention_mask'],
+                user_id=batch.get('user_id'),
+                conversation_context=batch.get('context')
+            )
+
+            # Calculate loss
+            loss = self._calculate_adaptive_loss(
+                outputs, batch['target_ids'], emotional_state
+            )
+
+            # Backward pass
+            loss.backward()
+
+            # Gradient clipping (human-like learning stability)
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+
+            # Optimizer step
+            self.optimizer.step()
+            self.scheduler.step()
+
+            # Update training state
+            epoch_loss += loss.item()
+            num_batches += 1
+            self.global_step += 1
+
+            # Memory consolidation
+            if self.global_step in self.consolidation_schedule:
+                await self._consolidate_memories()
+
+            # Experience replay (20% chance)
+            if random.random() < 0.2 and len(self.replay_buffer) > 10:
+                await self._experience_replay()
+
+            # Log progress
+            if batch_idx % 100 == 0:
+                logger.info(
+                    f"Epoch {self.epoch}, Batch {batch_idx}, "
+                    f"Loss: {loss.item():.4f}, "
+                    f"LR: {adaptive_lr:.2e}, "
+                    f"Emotional adjustments: {emotional_adjustments}"
+                )
+
+        # Validation
+        val_metrics = {}
+        if val_dataloader:
+            val_metrics = await self._validate(val_dataloader)
+
+        # Record training history
+        epoch_metrics = {
+            'epoch': self.epoch,
+            'train_loss': epoch_loss / num_batches,
+            'learning_rate': self.optimizer.param_groups[0]['lr'],
+            'emotional_adjustments': emotional_adjustments,
+            'global_step': self.global_step,
+            **val_metrics
+        }
+
+        self.training_history.append(epoch_metrics)
+        self.epoch += 1
+
+        return epoch_metrics
+
+    def _prepare_batch(self, batch: Dict[str, Any]) -> Dict[str, torch.Tensor]:
+        """Prepare batch for training."""
+        prepared = {}
+
+        for key, value in batch.items():
+            if isinstance(value, torch.Tensor):
+                prepared[key] = value.to(self.device)
+            elif isinstance(value, list):
+                # Convert list to tensor if numeric
+                try:
+                    prepared[key] = torch.tensor(value).to(self.device)
+                except:
+                    prepared[key] = value
+            else:
+                prepared[key] = value
+
+        return prepared
+
+    def _get_current_emotional_state(self) -> EmotionalState:
+        """Get Lyra's current emotional state."""
+        # This would normally come from the emotional system
+        # For now, create a default state
+        emotions = torch.rand(19)  # 19 emotion dimensions
+        return EmotionalState.from_tensor(emotions, self.device)
+
+    def _get_current_personality_state(self) -> Dict[str, float]:
+        """Get current personality traits."""
+        return {
+            'openness': 0.7,
+            'conscientiousness': 0.8,
+            'extraversion': 0.6,
+            'agreeableness': 0.9,
+            'neuroticism': 0.3
+        }
+
+    def _calculate_recent_performance(self) -> float:
+        """Calculate recent performance score."""
+        if not self.training_history:
+            return 0.5
+
+        recent_epochs = self.training_history[-5:]  # Last 5 epochs
+        if not recent_epochs:
+            return 0.5
+
+        # Simple performance metric based on loss improvement
+        losses = [epoch['train_loss'] for epoch in recent_epochs]
+        if len(losses) < 2:
+            return 0.5
+
+        improvement = (losses[0] - losses[-1]) / losses[0]
+        return min(max(0.5 + improvement, 0.0), 1.0)
+
+    def _calculate_adaptive_loss(
+        self,
+        outputs: torch.Tensor,
+        targets: torch.Tensor,
+        emotional_state: EmotionalState
+    ) -> torch.Tensor:
+        """Calculate loss adjusted for emotional state."""
+        # Base cross-entropy loss
+        base_loss = nn.CrossEntropyLoss()(
+            outputs.view(-1, outputs.size(-1)),
+            targets.view(-1)
+        )
+
+        # Emotional adjustment
+        dominant_emotion, intensity = emotional_state.get_dominant_emotion()
+
+        if dominant_emotion == 'frustration' and intensity > 0.7:
+            # Reduce learning when frustrated (like humans)
+            base_loss *= 0.8
+        elif dominant_emotion == 'curiosity' and intensity > 0.6:
+            # Increase learning when curious
+            base_loss *= 1.2
+
+        return base_loss
+
+    async def _consolidate_memories(self):
+        """Consolidate important memories (like sleep-based learning)."""
+        if not self.learning_memories:
+            return
+
+        logger.info(f"Consolidating {len(self.learning_memories)} memories...")
+
+        # Sort memories by importance (feedback score + recency)
+        important_memories = sorted(
+            self.learning_memories,
+            key=lambda m: m.user_feedback * (1.0 - m.replay_count * 0.1),
+            reverse=True
+        )[:50]  # Top 50 memories
+
+        # Replay important memories
+        for memory in important_memories[:10]:
+            # Convert memory to training sample
+            self.replay_buffer.append({
+                'conversation_embedding': memory.conversation_embedding,
+                'emotional_state': memory.emotional_state,
+                'feedback': memory.user_feedback,
+                'outcome': memory.learning_outcome
+            })
+            memory.replay_count += 1
+
+        logger.info("Memory consolidation complete")
+
+    async def _experience_replay(self):
+        """Replay past experiences for better learning."""
+        if len(self.replay_buffer) < 5:
+            return
+
+        # Sample random memories
+        replay_samples = random.sample(list(self.replay_buffer), min(5, len(self.replay_buffer)))
+
+        # Process replay samples (simplified)
+        for sample in replay_samples:
+            # This would normally involve re-training on the sample
+            # For now, just log the replay
+            logger.debug(f"Replaying memory with feedback: {sample['feedback']}")
+
+    async def _validate(self, val_dataloader: DataLoader) -> Dict[str, float]:
+        """Validate model performance."""
+        self.model.eval()
+
+        total_loss = 0.0
+        num_batches = 0
+
+        with torch.no_grad():
+            for batch in val_dataloader:
+                batch = self._prepare_batch(batch)
+
+                outputs, _ = self.model(
+                    input_ids=batch['input_ids'],
+                    attention_mask=batch['attention_mask']
+                )
+
+                loss = nn.CrossEntropyLoss()(
+                    outputs.view(-1, outputs.size(-1)),
+                    batch['target_ids'].view(-1)
+                )
+
+                total_loss += loss.item()
+                num_batches += 1
+
+        self.model.train()
+
+        avg_val_loss = total_loss / num_batches if num_batches > 0 else 0.0
+
+        return {
+            'val_loss': avg_val_loss,
+            'perplexity': torch.exp(torch.tensor(avg_val_loss)).item()
+        }
+
+    async def save_checkpoint(self, filepath: Path, metadata: Optional[Dict] = None):
+        """Save training checkpoint."""
+        checkpoint = {
+            'model_state_dict': self.model.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'scheduler_state_dict': self.scheduler.state_dict(),
+            'global_step': self.global_step,
+            'epoch': self.epoch,
+            'training_history': self.training_history,
+            'best_performance': self.best_performance,
+            'learning_memories': list(self.learning_memories),
+            'forgetting_curve': self.forgetting_curve,
+            'metadata': metadata or {}
+        }
+
+        filepath.parent.mkdir(parents=True, exist_ok=True)
+        torch.save(checkpoint, filepath)
+
+        logger.info(f"Checkpoint saved to {filepath}")
+
+    async def load_checkpoint(self, filepath: Path):
+        """Load training checkpoint."""
+        checkpoint = torch.load(filepath, map_location=self.device)
+
+        self.model.load_state_dict(checkpoint['model_state_dict'])
+        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+        self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
+
+        self.global_step = checkpoint.get('global_step', 0)
+        self.epoch = checkpoint.get('epoch', 0)
+        self.training_history = checkpoint.get('training_history', [])
+        self.best_performance = checkpoint.get('best_performance', 0.0)
+        self.learning_memories = deque(
+            checkpoint.get('learning_memories', []), maxlen=1000
+        )
+        self.forgetting_curve = checkpoint.get('forgetting_curve', self.forgetting_curve)
+
+        logger.info(f"Checkpoint loaded from {filepath}")
+
+    def add_learning_memory(
+        self,
+        conversation_embedding: torch.Tensor,
+        emotional_state: EmotionalState,
+        user_feedback: float,
+        learning_outcome: str
+    ):
+        """Add a significant learning memory."""
+        memory = LearningMemory(
+            conversation_embedding=conversation_embedding,
+            emotional_state=emotional_state,
+            user_feedback=user_feedback,
+            learning_outcome=learning_outcome,
+            timestamp=datetime.now()
+        )
+
+        self.learning_memories.append(memory)
+
+    def get_training_statistics(self) -> Dict[str, Any]:
+        """Get comprehensive training statistics."""
+        if not self.training_history:
+            return {'status': 'no_training_data'}
+
+        recent_performance = self._calculate_recent_performance()
+
+        return {
+            'global_step': self.global_step,
+            'current_epoch': self.epoch,
+            'total_epochs_trained': len(self.training_history),
+            'recent_performance': recent_performance,
+            'best_performance': self.best_performance,
+            'learning_memories_count': len(self.learning_memories),
+            'replay_buffer_size': len(self.replay_buffer),
+            'current_learning_rate': self.optimizer.param_groups[0]['lr'],
+            'last_consolidation': max(
+                [step for step in self.consolidation_schedule if step <= self.global_step],
+                default=0
+            ),
+            'training_history_summary': {
+                'best_train_loss': min(h['train_loss'] for h in self.training_history),
+                'latest_train_loss': self.training_history[-1]['train_loss'],
+                'average_emotional_adjustments': np.mean([
+                    h['emotional_adjustments'] for h in self.training_history
+                ])
+            } if self.training_history else {}
+        }
+
+
+async def create_training_pipeline(
+    model: LyraModel,
+    tokenizer,
+    device: torch.device,
+    database_manager: Optional[DatabaseManager] = None
+) -> LyraTrainingPipeline:
+    """Create and initialize training pipeline."""
+    pipeline = LyraTrainingPipeline(model, tokenizer, device, database_manager)
+
+    # Load existing checkpoint if available
+    checkpoint_path = Path(config.models_dir) / "checkpoints" / "latest_training.pt"
+    if checkpoint_path.exists():
+        try:
+            await pipeline.load_checkpoint(checkpoint_path)
+            logger.info("Loaded existing training checkpoint")
+        except Exception as e:
+            logger.warning(f"Could not load checkpoint: {e}")
+
+    return pipeline