Complete transformer LLM built from scratch with: Core Features: - Full transformer architecture (RoPE, RMSNorm, SwiGLU, KV-cache) - SentencePiece tokenizer (BPE/Unigram) - Training pipeline (AMP, gradient checkpointing, DDP) - Persona system with personality matrix (NO AI disclosure by default) - Genetic evolution (NOVA-EVO) for hyperparameter optimization - Legal-only data pipeline with license tracking - Chat interface (CLI + REST API) - Conversation memory (SQLite) Model Sizes: - 125M, 350M, 1.3B, 3B parameters - Local-first, runs on CPU or GPU - Python 3.10.6+, PyTorch 2.0+ Personas: - girlfriend_gentle (high warmth, high empathy) - girlfriend_playful (high humor, high playfulness) - girlfriend_supportive (balanced, default) Documentation: - Complete README with quickstart - Model card with ethical considerations - Privacy documentation (local-first, zero telemetry) - Data licenses and attribution - Contributing guide Infrastructure: - GitHub Actions CI/CD - Comprehensive test suite - Quickstart script - CLI tool License: Apache 2.0 🤖 Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude <noreply@anthropic.com>
118 lines
3.5 KiB
Python
118 lines
3.5 KiB
Python
"""
|
|
Evolution configuration for NOVA-EVO
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
|
|
@dataclass
|
|
class EvolutionConfig:
|
|
"""Configuration for genetic algorithm evolution"""
|
|
|
|
# Population settings
|
|
population_size: int = 20
|
|
num_generations: int = 10
|
|
elite_ratio: float = 0.2 # Top performers to keep
|
|
mutation_rate: float = 0.3
|
|
|
|
# Search space - hyperparameters
|
|
search_learning_rate: bool = True
|
|
lr_min: float = 1e-5
|
|
lr_max: float = 1e-3
|
|
|
|
search_batch_size: bool = True
|
|
batch_size_options: List[int] = field(default_factory=lambda: [4, 8, 16, 32])
|
|
|
|
search_warmup_steps: bool = True
|
|
warmup_min: int = 100
|
|
warmup_max: int = 2000
|
|
|
|
search_weight_decay: bool = True
|
|
wd_min: float = 0.0
|
|
wd_max: float = 0.3
|
|
|
|
# Search space - architecture toggles
|
|
search_rope_theta: bool = True
|
|
rope_theta_options: List[float] = field(default_factory=lambda: [1000.0, 10000.0, 100000.0])
|
|
|
|
search_activation: bool = True
|
|
activation_options: List[str] = field(default_factory=lambda: ['swiglu', 'geglu', 'gelu'])
|
|
|
|
search_norm: bool = True
|
|
norm_options: List[str] = field(default_factory=lambda: ['rmsnorm', 'layernorm'])
|
|
|
|
# Fitness evaluation
|
|
eval_steps: int = 100 # How many steps to train for evaluation
|
|
eval_dataset_size: int = 1000 # Number of samples for evaluation
|
|
|
|
# Multi-objective weights
|
|
loss_weight: float = 0.5
|
|
latency_weight: float = 0.2
|
|
memory_weight: float = 0.2
|
|
quality_weight: float = 0.1 # Chat quality (if eval set available)
|
|
|
|
# Compute budgets
|
|
max_eval_time_seconds: float = 300.0 # Max time per individual eval
|
|
max_total_time_hours: float = 24.0 # Max total evolution time
|
|
|
|
# Checkpointing
|
|
save_dir: str = "nova_evo/hall_of_fame"
|
|
checkpoint_every_n_generations: int = 5
|
|
|
|
# Reproducibility
|
|
seed: int = 42
|
|
|
|
|
|
@dataclass
|
|
class Individual:
|
|
"""Single individual in evolution population"""
|
|
|
|
# Hyperparameters
|
|
learning_rate: float = 3e-4
|
|
batch_size: int = 8
|
|
warmup_steps: int = 1000
|
|
weight_decay: float = 0.1
|
|
|
|
# Architecture choices
|
|
rope_theta: float = 10000.0
|
|
hidden_act: str = "swiglu"
|
|
norm_type: str = "rmsnorm"
|
|
|
|
# Fitness scores
|
|
loss: Optional[float] = None
|
|
perplexity: Optional[float] = None
|
|
latency_ms: Optional[float] = None
|
|
memory_mb: Optional[float] = None
|
|
quality_score: Optional[float] = None
|
|
fitness: Optional[float] = None
|
|
|
|
# Metadata
|
|
generation: int = 0
|
|
parent_ids: List[int] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary"""
|
|
return {
|
|
'learning_rate': self.learning_rate,
|
|
'batch_size': self.batch_size,
|
|
'warmup_steps': self.warmup_steps,
|
|
'weight_decay': self.weight_decay,
|
|
'rope_theta': self.rope_theta,
|
|
'hidden_act': self.hidden_act,
|
|
'norm_type': self.norm_type,
|
|
'loss': self.loss,
|
|
'perplexity': self.perplexity,
|
|
'latency_ms': self.latency_ms,
|
|
'memory_mb': self.memory_mb,
|
|
'quality_score': self.quality_score,
|
|
'fitness': self.fitness,
|
|
'generation': self.generation,
|
|
'parent_ids': self.parent_ids,
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> 'Individual':
|
|
"""Create from dictionary"""
|
|
return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
|