Complete transformer LLM built from scratch with: Core Features: - Full transformer architecture (RoPE, RMSNorm, SwiGLU, KV-cache) - SentencePiece tokenizer (BPE/Unigram) - Training pipeline (AMP, gradient checkpointing, DDP) - Persona system with personality matrix (NO AI disclosure by default) - Genetic evolution (NOVA-EVO) for hyperparameter optimization - Legal-only data pipeline with license tracking - Chat interface (CLI + REST API) - Conversation memory (SQLite) Model Sizes: - 125M, 350M, 1.3B, 3B parameters - Local-first, runs on CPU or GPU - Python 3.10.6+, PyTorch 2.0+ Personas: - girlfriend_gentle (high warmth, high empathy) - girlfriend_playful (high humor, high playfulness) - girlfriend_supportive (balanced, default) Documentation: - Complete README with quickstart - Model card with ethical considerations - Privacy documentation (local-first, zero telemetry) - Data licenses and attribution - Contributing guide Infrastructure: - GitHub Actions CI/CD - Comprehensive test suite - Quickstart script - CLI tool License: Apache 2.0 🤖 Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude <noreply@anthropic.com>
142 lines
3.2 KiB
Python
142 lines
3.2 KiB
Python
"""
|
|
Tests for NOVA core transformer
|
|
"""
|
|
|
|
import pytest
|
|
import torch
|
|
from nova_core import NovaTransformer, ModelConfig, MODEL_125M
|
|
|
|
|
|
def test_model_config():
|
|
"""Test model configuration"""
|
|
config = ModelConfig(
|
|
vocab_size=1000,
|
|
hidden_size=256,
|
|
num_hidden_layers=4,
|
|
num_attention_heads=4,
|
|
)
|
|
|
|
assert config.vocab_size == 1000
|
|
assert config.hidden_size == 256
|
|
assert config.num_hidden_layers == 4
|
|
|
|
|
|
def test_model_creation():
|
|
"""Test creating a small model"""
|
|
config = ModelConfig(
|
|
vocab_size=1000,
|
|
hidden_size=128,
|
|
num_hidden_layers=2,
|
|
num_attention_heads=4,
|
|
intermediate_size=512,
|
|
max_position_embeddings=512,
|
|
)
|
|
|
|
model = NovaTransformer(config)
|
|
|
|
assert model is not None
|
|
assert model.config == config
|
|
assert model.vocab_size == 1000
|
|
|
|
|
|
def test_model_forward():
|
|
"""Test forward pass"""
|
|
config = ModelConfig(
|
|
vocab_size=1000,
|
|
hidden_size=128,
|
|
num_hidden_layers=2,
|
|
num_attention_heads=4,
|
|
intermediate_size=512,
|
|
max_position_embeddings=512,
|
|
)
|
|
|
|
model = NovaTransformer(config)
|
|
model.eval()
|
|
|
|
# Create dummy input
|
|
batch_size = 2
|
|
seq_len = 10
|
|
input_ids = torch.randint(0, 1000, (batch_size, seq_len))
|
|
|
|
# Forward pass
|
|
with torch.no_grad():
|
|
outputs = model(input_ids=input_ids)
|
|
|
|
assert 'logits' in outputs
|
|
assert outputs['logits'].shape == (batch_size, seq_len, 1000)
|
|
|
|
|
|
def test_model_generation():
|
|
"""Test text generation"""
|
|
config = ModelConfig(
|
|
vocab_size=1000,
|
|
hidden_size=128,
|
|
num_hidden_layers=2,
|
|
num_attention_heads=4,
|
|
intermediate_size=512,
|
|
max_position_embeddings=512,
|
|
)
|
|
|
|
model = NovaTransformer(config)
|
|
model.eval()
|
|
|
|
# Create dummy input
|
|
input_ids = torch.randint(0, 1000, (1, 5))
|
|
|
|
# Generate
|
|
with torch.no_grad():
|
|
output_ids = model.generate(
|
|
input_ids=input_ids,
|
|
max_new_tokens=10,
|
|
temperature=1.0,
|
|
do_sample=True,
|
|
)
|
|
|
|
assert output_ids.shape[1] == 15 # 5 input + 10 generated
|
|
|
|
|
|
def test_kv_cache():
|
|
"""Test KV-cache functionality"""
|
|
config = ModelConfig(
|
|
vocab_size=1000,
|
|
hidden_size=128,
|
|
num_hidden_layers=2,
|
|
num_attention_heads=4,
|
|
use_cache=True,
|
|
)
|
|
|
|
model = NovaTransformer(config)
|
|
model.eval()
|
|
|
|
input_ids = torch.randint(0, 1000, (1, 5))
|
|
|
|
with torch.no_grad():
|
|
# First forward with cache
|
|
outputs1 = model(input_ids=input_ids, use_cache=True)
|
|
past_kv = outputs1['past_key_values']
|
|
|
|
assert past_kv is not None
|
|
assert len(past_kv) == config.num_hidden_layers
|
|
|
|
# Second forward with cache
|
|
new_input = torch.randint(0, 1000, (1, 1))
|
|
outputs2 = model(input_ids=new_input, past_key_values=past_kv, use_cache=True)
|
|
|
|
assert outputs2['logits'].shape[1] == 1 # Only new token
|
|
|
|
|
|
def test_param_count():
|
|
"""Test parameter counting"""
|
|
config = MODEL_125M
|
|
|
|
model = NovaTransformer(config)
|
|
|
|
num_params = model.get_num_params(non_embedding=False)
|
|
|
|
# Should be around 125M
|
|
assert 100_000_000 < num_params < 150_000_000
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|