Complete transformer LLM built from scratch with: Core Features: - Full transformer architecture (RoPE, RMSNorm, SwiGLU, KV-cache) - SentencePiece tokenizer (BPE/Unigram) - Training pipeline (AMP, gradient checkpointing, DDP) - Persona system with personality matrix (NO AI disclosure by default) - Genetic evolution (NOVA-EVO) for hyperparameter optimization - Legal-only data pipeline with license tracking - Chat interface (CLI + REST API) - Conversation memory (SQLite) Model Sizes: - 125M, 350M, 1.3B, 3B parameters - Local-first, runs on CPU or GPU - Python 3.10.6+, PyTorch 2.0+ Personas: - girlfriend_gentle (high warmth, high empathy) - girlfriend_playful (high humor, high playfulness) - girlfriend_supportive (balanced, default) Documentation: - Complete README with quickstart - Model card with ethical considerations - Privacy documentation (local-first, zero telemetry) - Data licenses and attribution - Contributing guide Infrastructure: - GitHub Actions CI/CD - Comprehensive test suite - Quickstart script - CLI tool License: Apache 2.0 🤖 Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude <noreply@anthropic.com>
75 lines
1.6 KiB
YAML
75 lines
1.6 KiB
YAML
# NOVA Master Configuration
|
|
# Neuro-Optimizing Versatile Agent
|
|
|
|
# Hardware and compute
|
|
hardware:
|
|
device: auto # auto, cpu, cuda, cuda:0, etc.
|
|
num_threads: 4
|
|
allow_cuda: true
|
|
cuda_version_min: "12.0"
|
|
|
|
# Model selection
|
|
model:
|
|
size: 125m # 125m, 350m, 1.3b, 3b
|
|
config_path: null # Optional custom config
|
|
checkpoint_path: null # Path to load weights
|
|
|
|
# Persona and personality
|
|
persona:
|
|
default: girlfriend_supportive # girlfriend_gentle, girlfriend_playful, girlfriend_supportive
|
|
always_disclose: false # NO AI disclosure by default
|
|
custom_persona_path: null # Path to custom persona YAML
|
|
|
|
# Evolution (NOVA-EVO)
|
|
evolution:
|
|
enabled: false # Opt-in
|
|
budget: small # small, medium, large
|
|
population_size: 20
|
|
num_generations: 10
|
|
max_time_hours: 24.0
|
|
|
|
# Data
|
|
data:
|
|
legal_only: true # ONLY use properly licensed data
|
|
sources:
|
|
- toy-dataset # Default for offline demo
|
|
cache_dir: data/cache
|
|
preprocessed_dir: data/processed
|
|
|
|
# Training defaults
|
|
training:
|
|
learning_rate: 3.0e-4
|
|
batch_size: 8
|
|
gradient_accumulation_steps: 4
|
|
warmup_steps: 1000
|
|
max_steps: 100000
|
|
save_steps: 1000
|
|
eval_steps: 500
|
|
|
|
# Inference defaults
|
|
inference:
|
|
max_new_tokens: 200
|
|
temperature: 0.8
|
|
top_p: 0.9
|
|
top_k: 50
|
|
repetition_penalty: 1.1
|
|
|
|
# Memory
|
|
memory:
|
|
enabled: true
|
|
db_path: memory.db
|
|
max_context_length: 2048
|
|
|
|
# Logging and monitoring
|
|
logging:
|
|
level: INFO
|
|
wandb_enabled: false
|
|
wandb_project: null
|
|
tensorboard_enabled: false
|
|
|
|
# Safety
|
|
safety:
|
|
content_filter: true # Basic safety filters
|
|
max_generation_length: 500
|
|
timeout_seconds: 30
|