Initial commit: NOVA - Neuro-Optimizing Versatile Agent

Complete transformer LLM built from scratch with: Core Features: - Full transformer architecture (RoPE, RMSNorm, SwiGLU, KV-cache) - SentencePiece tokenizer (BPE/Unigram) - Training pipeline (AMP, gradient checkpointing, DDP) - Persona system with personality matrix (NO AI disclosure by default) - Genetic evolution (NOVA-EVO) for hyperparameter optimization - Legal-only data pipeline with license tracking - Chat interface (CLI + REST API) - Conversation memory (SQLite) Model Sizes: - 125M, 350M, 1.3B, 3B parameters - Local-first, runs on CPU or GPU - Python 3.10.6+, PyTorch 2.0+ Personas: - girlfriend_gentle (high warmth, high empathy) - girlfriend_playful (high humor, high playfulness) - girlfriend_supportive (balanced, default) Documentation: - Complete README with quickstart - Model card with ethical considerations - Privacy documentation (local-first, zero telemetry) - Data licenses and attribution - Contributing guide Infrastructure: - GitHub Actions CI/CD - Comprehensive test suite - Quickstart script - CLI tool License: Apache 2.0 🤖 Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 20:56:37 -04:00
commit a7f091aa45
50 changed files with 6437 additions and 0 deletions
--- a/nova_evo/config.py
+++ b/nova_evo/config.py
@@ -0,0 +1,117 @@
+"""
+Evolution configuration for NOVA-EVO
+"""
+
+from dataclasses import dataclass, field
+from typing import List, Dict, Any, Optional
+
+
+@dataclass
+class EvolutionConfig:
+    """Configuration for genetic algorithm evolution"""
+
+    # Population settings
+    population_size: int = 20
+    num_generations: int = 10
+    elite_ratio: float = 0.2  # Top performers to keep
+    mutation_rate: float = 0.3
+
+    # Search space - hyperparameters
+    search_learning_rate: bool = True
+    lr_min: float = 1e-5
+    lr_max: float = 1e-3
+
+    search_batch_size: bool = True
+    batch_size_options: List[int] = field(default_factory=lambda: [4, 8, 16, 32])
+
+    search_warmup_steps: bool = True
+    warmup_min: int = 100
+    warmup_max: int = 2000
+
+    search_weight_decay: bool = True
+    wd_min: float = 0.0
+    wd_max: float = 0.3
+
+    # Search space - architecture toggles
+    search_rope_theta: bool = True
+    rope_theta_options: List[float] = field(default_factory=lambda: [1000.0, 10000.0, 100000.0])
+
+    search_activation: bool = True
+    activation_options: List[str] = field(default_factory=lambda: ['swiglu', 'geglu', 'gelu'])
+
+    search_norm: bool = True
+    norm_options: List[str] = field(default_factory=lambda: ['rmsnorm', 'layernorm'])
+
+    # Fitness evaluation
+    eval_steps: int = 100  # How many steps to train for evaluation
+    eval_dataset_size: int = 1000  # Number of samples for evaluation
+
+    # Multi-objective weights
+    loss_weight: float = 0.5
+    latency_weight: float = 0.2
+    memory_weight: float = 0.2
+    quality_weight: float = 0.1  # Chat quality (if eval set available)
+
+    # Compute budgets
+    max_eval_time_seconds: float = 300.0  # Max time per individual eval
+    max_total_time_hours: float = 24.0  # Max total evolution time
+
+    # Checkpointing
+    save_dir: str = "nova_evo/hall_of_fame"
+    checkpoint_every_n_generations: int = 5
+
+    # Reproducibility
+    seed: int = 42
+
+
+@dataclass
+class Individual:
+    """Single individual in evolution population"""
+
+    # Hyperparameters
+    learning_rate: float = 3e-4
+    batch_size: int = 8
+    warmup_steps: int = 1000
+    weight_decay: float = 0.1
+
+    # Architecture choices
+    rope_theta: float = 10000.0
+    hidden_act: str = "swiglu"
+    norm_type: str = "rmsnorm"
+
+    # Fitness scores
+    loss: Optional[float] = None
+    perplexity: Optional[float] = None
+    latency_ms: Optional[float] = None
+    memory_mb: Optional[float] = None
+    quality_score: Optional[float] = None
+    fitness: Optional[float] = None
+
+    # Metadata
+    generation: int = 0
+    parent_ids: List[int] = field(default_factory=list)
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary"""
+        return {
+            'learning_rate': self.learning_rate,
+            'batch_size': self.batch_size,
+            'warmup_steps': self.warmup_steps,
+            'weight_decay': self.weight_decay,
+            'rope_theta': self.rope_theta,
+            'hidden_act': self.hidden_act,
+            'norm_type': self.norm_type,
+            'loss': self.loss,
+            'perplexity': self.perplexity,
+            'latency_ms': self.latency_ms,
+            'memory_mb': self.memory_mb,
+            'quality_score': self.quality_score,
+            'fitness': self.fitness,
+            'generation': self.generation,
+            'parent_ids': self.parent_ids,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'Individual':
+        """Create from dictionary"""
+        return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})