Initial commit: NOVA - Neuro-Optimizing Versatile Agent
Complete transformer LLM built from scratch with: Core Features: - Full transformer architecture (RoPE, RMSNorm, SwiGLU, KV-cache) - SentencePiece tokenizer (BPE/Unigram) - Training pipeline (AMP, gradient checkpointing, DDP) - Persona system with personality matrix (NO AI disclosure by default) - Genetic evolution (NOVA-EVO) for hyperparameter optimization - Legal-only data pipeline with license tracking - Chat interface (CLI + REST API) - Conversation memory (SQLite) Model Sizes: - 125M, 350M, 1.3B, 3B parameters - Local-first, runs on CPU or GPU - Python 3.10.6+, PyTorch 2.0+ Personas: - girlfriend_gentle (high warmth, high empathy) - girlfriend_playful (high humor, high playfulness) - girlfriend_supportive (balanced, default) Documentation: - Complete README with quickstart - Model card with ethical considerations - Privacy documentation (local-first, zero telemetry) - Data licenses and attribution - Contributing guide Infrastructure: - GitHub Actions CI/CD - Comprehensive test suite - Quickstart script - CLI tool License: Apache 2.0 🤖 Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
117
nova_evo/config.py
Normal file
117
nova_evo/config.py
Normal file
@@ -0,0 +1,117 @@
|
||||
"""
|
||||
Evolution configuration for NOVA-EVO
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvolutionConfig:
|
||||
"""Configuration for genetic algorithm evolution"""
|
||||
|
||||
# Population settings
|
||||
population_size: int = 20
|
||||
num_generations: int = 10
|
||||
elite_ratio: float = 0.2 # Top performers to keep
|
||||
mutation_rate: float = 0.3
|
||||
|
||||
# Search space - hyperparameters
|
||||
search_learning_rate: bool = True
|
||||
lr_min: float = 1e-5
|
||||
lr_max: float = 1e-3
|
||||
|
||||
search_batch_size: bool = True
|
||||
batch_size_options: List[int] = field(default_factory=lambda: [4, 8, 16, 32])
|
||||
|
||||
search_warmup_steps: bool = True
|
||||
warmup_min: int = 100
|
||||
warmup_max: int = 2000
|
||||
|
||||
search_weight_decay: bool = True
|
||||
wd_min: float = 0.0
|
||||
wd_max: float = 0.3
|
||||
|
||||
# Search space - architecture toggles
|
||||
search_rope_theta: bool = True
|
||||
rope_theta_options: List[float] = field(default_factory=lambda: [1000.0, 10000.0, 100000.0])
|
||||
|
||||
search_activation: bool = True
|
||||
activation_options: List[str] = field(default_factory=lambda: ['swiglu', 'geglu', 'gelu'])
|
||||
|
||||
search_norm: bool = True
|
||||
norm_options: List[str] = field(default_factory=lambda: ['rmsnorm', 'layernorm'])
|
||||
|
||||
# Fitness evaluation
|
||||
eval_steps: int = 100 # How many steps to train for evaluation
|
||||
eval_dataset_size: int = 1000 # Number of samples for evaluation
|
||||
|
||||
# Multi-objective weights
|
||||
loss_weight: float = 0.5
|
||||
latency_weight: float = 0.2
|
||||
memory_weight: float = 0.2
|
||||
quality_weight: float = 0.1 # Chat quality (if eval set available)
|
||||
|
||||
# Compute budgets
|
||||
max_eval_time_seconds: float = 300.0 # Max time per individual eval
|
||||
max_total_time_hours: float = 24.0 # Max total evolution time
|
||||
|
||||
# Checkpointing
|
||||
save_dir: str = "nova_evo/hall_of_fame"
|
||||
checkpoint_every_n_generations: int = 5
|
||||
|
||||
# Reproducibility
|
||||
seed: int = 42
|
||||
|
||||
|
||||
@dataclass
|
||||
class Individual:
|
||||
"""Single individual in evolution population"""
|
||||
|
||||
# Hyperparameters
|
||||
learning_rate: float = 3e-4
|
||||
batch_size: int = 8
|
||||
warmup_steps: int = 1000
|
||||
weight_decay: float = 0.1
|
||||
|
||||
# Architecture choices
|
||||
rope_theta: float = 10000.0
|
||||
hidden_act: str = "swiglu"
|
||||
norm_type: str = "rmsnorm"
|
||||
|
||||
# Fitness scores
|
||||
loss: Optional[float] = None
|
||||
perplexity: Optional[float] = None
|
||||
latency_ms: Optional[float] = None
|
||||
memory_mb: Optional[float] = None
|
||||
quality_score: Optional[float] = None
|
||||
fitness: Optional[float] = None
|
||||
|
||||
# Metadata
|
||||
generation: int = 0
|
||||
parent_ids: List[int] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
'learning_rate': self.learning_rate,
|
||||
'batch_size': self.batch_size,
|
||||
'warmup_steps': self.warmup_steps,
|
||||
'weight_decay': self.weight_decay,
|
||||
'rope_theta': self.rope_theta,
|
||||
'hidden_act': self.hidden_act,
|
||||
'norm_type': self.norm_type,
|
||||
'loss': self.loss,
|
||||
'perplexity': self.perplexity,
|
||||
'latency_ms': self.latency_ms,
|
||||
'memory_mb': self.memory_mb,
|
||||
'quality_score': self.quality_score,
|
||||
'fitness': self.fitness,
|
||||
'generation': self.generation,
|
||||
'parent_ids': self.parent_ids,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'Individual':
|
||||
"""Create from dictionary"""
|
||||
return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
|
Reference in New Issue
Block a user