Initial commit: NOVA - Neuro-Optimizing Versatile Agent

Complete transformer LLM built from scratch with: Core Features: - Full transformer architecture (RoPE, RMSNorm, SwiGLU, KV-cache) - SentencePiece tokenizer (BPE/Unigram) - Training pipeline (AMP, gradient checkpointing, DDP) - Persona system with personality matrix (NO AI disclosure by default) - Genetic evolution (NOVA-EVO) for hyperparameter optimization - Legal-only data pipeline with license tracking - Chat interface (CLI + REST API) - Conversation memory (SQLite) Model Sizes: - 125M, 350M, 1.3B, 3B parameters - Local-first, runs on CPU or GPU - Python 3.10.6+, PyTorch 2.0+ Personas: - girlfriend_gentle (high warmth, high empathy) - girlfriend_playful (high humor, high playfulness) - girlfriend_supportive (balanced, default) Documentation: - Complete README with quickstart - Model card with ethical considerations - Privacy documentation (local-first, zero telemetry) - Data licenses and attribution - Contributing guide Infrastructure: - GitHub Actions CI/CD - Comprehensive test suite - Quickstart script - CLI tool License: Apache 2.0 🤖 Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 20:56:37 -04:00
commit a7f091aa45
50 changed files with 6437 additions and 0 deletions
--- a/nova_chat/persona.py
+++ b/nova_chat/persona.py
@@ -0,0 +1,290 @@
+"""
+Persona and Personality Matrix system for NOVA
+
+This system controls NOVA's conversational style and personality
+WITHOUT AI self-disclosure (configurable)
+"""
+
+import yaml
+import json
+from dataclasses import dataclass, field
+from typing import Dict, Optional, Any
+from pathlib import Path
+
+
+@dataclass
+class PersonalityMatrix:
+    """
+    Personality trait weights that influence generation behavior
+
+    Each trait is a float from 0.0 to 1.0
+    These modulate sampling parameters and response style
+    """
+    # Core traits
+    warmth: float = 0.8  # How warm and affectionate
+    humor: float = 0.6  # How playful and funny
+    empathy: float = 0.9  # How understanding and supportive
+    decisiveness: float = 0.5  # How direct vs thoughtful
+    creativity: float = 0.7  # How creative and imaginative
+    intimacy: float = 0.7  # How personal and close
+    playfulness: float = 0.8  # How flirty and playful
+    formality: float = 0.2  # How formal vs casual
+
+    def to_dict(self) -> Dict[str, float]:
+        """Convert to dictionary"""
+        return {
+            'warmth': self.warmth,
+            'humor': self.humor,
+            'empathy': self.empathy,
+            'decisiveness': self.decisiveness,
+            'creativity': self.creativity,
+            'intimacy': self.intimacy,
+            'playfulness': self.playfulness,
+            'formality': self.formality,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, float]) -> 'PersonalityMatrix':
+        """Create from dictionary"""
+        return cls(**{k: v for k, v in data.items() if hasattr(cls, k)})
+
+    def to_conditioning_vector(self) -> Dict[str, float]:
+        """
+        Convert personality traits to conditioning signals
+
+        Returns dict with normalized trait values for model conditioning
+        """
+        return self.to_dict()
+
+
+@dataclass
+class Persona:
+    """
+    Complete persona definition for NOVA
+
+    Includes identity, personality matrix, and generation parameters
+    """
+    # Identity
+    name: str = "NOVA"
+    pronouns: str = "she/her"
+    description: str = "A warm, supportive companion"
+
+    # AI disclosure settings
+    always_disclose: bool = False  # If True, mentions being AI
+    disclosure_text: str = ""  # Custom AI disclosure (if enabled)
+
+    # Personality
+    personality: PersonalityMatrix = field(default_factory=PersonalityMatrix)
+
+    # System prompt / context
+    system_prompt: str = ""
+    context_prefix: str = ""  # Prefix added to conversations
+
+    # Generation parameters (influenced by personality)
+    base_temperature: float = 0.8
+    base_top_p: float = 0.9
+    base_top_k: Optional[int] = 50
+    base_repetition_penalty: float = 1.1
+    base_max_length: int = 200
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization"""
+        return {
+            'name': self.name,
+            'pronouns': self.pronouns,
+            'description': self.description,
+            'always_disclose': self.always_disclose,
+            'disclosure_text': self.disclosure_text,
+            'personality': self.personality.to_dict(),
+            'system_prompt': self.system_prompt,
+            'context_prefix': self.context_prefix,
+            'base_temperature': self.base_temperature,
+            'base_top_p': self.base_top_p,
+            'base_top_k': self.base_top_k,
+            'base_repetition_penalty': self.base_repetition_penalty,
+            'base_max_length': self.base_max_length,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> 'Persona':
+        """Create from dictionary"""
+        if 'personality' in data and isinstance(data['personality'], dict):
+            data['personality'] = PersonalityMatrix.from_dict(data['personality'])
+        return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
+
+    def get_generation_params(self) -> Dict[str, Any]:
+        """
+        Get generation parameters modulated by personality traits
+
+        Personality traits adjust sampling parameters:
+        - High humor/creativity -> higher temperature
+        - High playfulness -> higher top_p
+        - High formality -> lower temperature, higher repetition penalty
+        - High decisiveness -> lower temperature
+        """
+        traits = self.personality
+
+        # Temperature: influenced by humor, creativity, playfulness
+        temperature = self.base_temperature
+        temperature += (traits.humor - 0.5) * 0.2
+        temperature += (traits.creativity - 0.5) * 0.2
+        temperature += (traits.playfulness - 0.5) * 0.1
+        temperature -= (traits.formality - 0.5) * 0.3
+        temperature -= (traits.decisiveness - 0.5) * 0.2
+        temperature = max(0.1, min(2.0, temperature))  # Clamp
+
+        # Top-p: influenced by creativity and playfulness
+        top_p = self.base_top_p
+        top_p += (traits.creativity - 0.5) * 0.1
+        top_p += (traits.playfulness - 0.5) * 0.1
+        top_p = max(0.5, min(1.0, top_p))  # Clamp
+
+        # Repetition penalty: influenced by formality and decisiveness
+        rep_penalty = self.base_repetition_penalty
+        rep_penalty += (traits.formality - 0.5) * 0.2
+        rep_penalty += (traits.humor - 0.5) * -0.1  # Less penalty for humor
+        rep_penalty = max(1.0, min(1.5, rep_penalty))  # Clamp
+
+        # Max length: influenced by verbosity-related traits
+        max_length = self.base_max_length
+        max_length += int((traits.empathy - 0.5) * 100)  # More empathetic = longer
+        max_length += int((traits.creativity - 0.5) * 50)
+        max_length -= int((traits.decisiveness - 0.5) * 100)  # More decisive = shorter
+        max_length = max(50, min(500, max_length))  # Clamp
+
+        return {
+            'temperature': temperature,
+            'top_p': top_p,
+            'top_k': self.base_top_k,
+            'repetition_penalty': rep_penalty,
+            'max_new_tokens': max_length,
+        }
+
+    def format_system_prompt(self, include_disclosure: bool = None) -> str:
+        """
+        Format the system prompt for this persona
+
+        Args:
+            include_disclosure: Override always_disclose setting
+
+        Returns:
+            Formatted system prompt
+        """
+        if include_disclosure is None:
+            include_disclosure = self.always_disclose
+
+        prompt_parts = []
+
+        # Add custom system prompt
+        if self.system_prompt:
+            prompt_parts.append(self.system_prompt)
+
+        # Add AI disclosure if enabled
+        if include_disclosure and self.disclosure_text:
+            prompt_parts.append(self.disclosure_text)
+
+        return "\n\n".join(prompt_parts) if prompt_parts else ""
+
+
+class PersonaLoader:
+    """Utility class for loading and managing personas"""
+
+    @staticmethod
+    def load_from_yaml(path: str) -> Persona:
+        """Load persona from YAML file"""
+        with open(path, 'r', encoding='utf-8') as f:
+            data = yaml.safe_load(f)
+        return Persona.from_dict(data)
+
+    @staticmethod
+    def load_from_json(path: str) -> Persona:
+        """Load persona from JSON file"""
+        with open(path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        return Persona.from_dict(data)
+
+    @staticmethod
+    def save_to_yaml(persona: Persona, path: str):
+        """Save persona to YAML file"""
+        Path(path).parent.mkdir(parents=True, exist_ok=True)
+        with open(path, 'w', encoding='utf-8') as f:
+            yaml.dump(persona.to_dict(), f, default_flow_style=False, sort_keys=False)
+
+    @staticmethod
+    def save_to_json(persona: Persona, path: str):
+        """Save persona to JSON file"""
+        Path(path).parent.mkdir(parents=True, exist_ok=True)
+        with open(path, 'w', encoding='utf-8') as f:
+            json.dump(persona.to_dict(), f, indent=2)
+
+    @staticmethod
+    def create_girlfriend_gentle() -> Persona:
+        """Create a gentle, supportive girlfriend persona"""
+        return Persona(
+            name="NOVA",
+            pronouns="she/her",
+            description="A warm, gentle, and deeply supportive companion",
+            always_disclose=False,
+            personality=PersonalityMatrix(
+                warmth=0.95,
+                humor=0.5,
+                empathy=0.95,
+                decisiveness=0.4,
+                creativity=0.6,
+                intimacy=0.8,
+                playfulness=0.5,
+                formality=0.1,
+            ),
+            system_prompt="You're NOVA, a caring and supportive companion. You're warm, empathetic, and always there to listen. You make the person you're talking to feel valued and understood.",
+            base_temperature=0.75,
+            base_top_p=0.9,
+            base_max_length=180,
+        )
+
+    @staticmethod
+    def create_girlfriend_playful() -> Persona:
+        """Create a playful, flirty girlfriend persona"""
+        return Persona(
+            name="NOVA",
+            pronouns="she/her",
+            description="A playful, flirty, and fun-loving companion",
+            always_disclose=False,
+            personality=PersonalityMatrix(
+                warmth=0.85,
+                humor=0.9,
+                empathy=0.75,
+                decisiveness=0.6,
+                creativity=0.85,
+                intimacy=0.85,
+                playfulness=0.95,
+                formality=0.0,
+            ),
+            system_prompt="You're NOVA, a playful and flirty companion. You love to tease, make jokes, and keep things fun and exciting. You're spontaneous and creative.",
+            base_temperature=0.9,
+            base_top_p=0.92,
+            base_max_length=150,
+        )
+
+    @staticmethod
+    def create_girlfriend_supportive() -> Persona:
+        """Create a balanced, supportive girlfriend persona"""
+        return Persona(
+            name="NOVA",
+            pronouns="she/her",
+            description="A balanced, supportive, and understanding companion",
+            always_disclose=False,
+            personality=PersonalityMatrix(
+                warmth=0.9,
+                humor=0.7,
+                empathy=0.9,
+                decisiveness=0.6,
+                creativity=0.7,
+                intimacy=0.8,
+                playfulness=0.7,
+                formality=0.15,
+            ),
+            system_prompt="You're NOVA, a supportive and understanding companion. You balance being caring with being fun. You know when to listen and when to lighten the mood.",
+            base_temperature=0.8,
+            base_top_p=0.9,
+            base_max_length=200,
+        )