Initial commit: NOVA - Neuro-Optimizing Versatile Agent
Complete transformer LLM built from scratch with: Core Features: - Full transformer architecture (RoPE, RMSNorm, SwiGLU, KV-cache) - SentencePiece tokenizer (BPE/Unigram) - Training pipeline (AMP, gradient checkpointing, DDP) - Persona system with personality matrix (NO AI disclosure by default) - Genetic evolution (NOVA-EVO) for hyperparameter optimization - Legal-only data pipeline with license tracking - Chat interface (CLI + REST API) - Conversation memory (SQLite) Model Sizes: - 125M, 350M, 1.3B, 3B parameters - Local-first, runs on CPU or GPU - Python 3.10.6+, PyTorch 2.0+ Personas: - girlfriend_gentle (high warmth, high empathy) - girlfriend_playful (high humor, high playfulness) - girlfriend_supportive (balanced, default) Documentation: - Complete README with quickstart - Model card with ethical considerations - Privacy documentation (local-first, zero telemetry) - Data licenses and attribution - Contributing guide Infrastructure: - GitHub Actions CI/CD - Comprehensive test suite - Quickstart script - CLI tool License: Apache 2.0 🤖 Generated with Claude Code https://claude.com/claude-code Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
74
nova_core/normalization.py
Normal file
74
nova_core/normalization.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""
|
||||
Normalization layers for NOVA
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class RMSNorm(nn.Module):
|
||||
"""
|
||||
Root Mean Square Layer Normalization
|
||||
More efficient than LayerNorm, used in LLaMA and other modern LLMs
|
||||
"""
|
||||
|
||||
def __init__(self, hidden_size: int, eps: float = 1e-6):
|
||||
"""
|
||||
Args:
|
||||
hidden_size: Size of the hidden dimension
|
||||
eps: Small constant for numerical stability
|
||||
"""
|
||||
super().__init__()
|
||||
self.weight = nn.Parameter(torch.ones(hidden_size))
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
||||
"""
|
||||
Apply RMS normalization
|
||||
|
||||
Args:
|
||||
hidden_states: Input tensor [..., hidden_size]
|
||||
|
||||
Returns:
|
||||
Normalized tensor
|
||||
"""
|
||||
input_dtype = hidden_states.dtype
|
||||
hidden_states = hidden_states.to(torch.float32)
|
||||
|
||||
# Compute RMS
|
||||
variance = hidden_states.pow(2).mean(-1, keepdim=True)
|
||||
hidden_states = hidden_states * torch.rsqrt(variance + self.eps)
|
||||
|
||||
return self.weight * hidden_states.to(input_dtype)
|
||||
|
||||
|
||||
class LayerNorm(nn.LayerNorm):
|
||||
"""
|
||||
Standard LayerNorm with optional bias
|
||||
Wrapper around PyTorch's LayerNorm for consistency
|
||||
"""
|
||||
|
||||
def __init__(self, hidden_size: int, eps: float = 1e-6, bias: bool = True):
|
||||
super().__init__(hidden_size, eps=eps, elementwise_affine=True)
|
||||
if not bias:
|
||||
self.bias = None
|
||||
|
||||
|
||||
def get_norm_layer(norm_type: str, hidden_size: int, eps: float = 1e-6) -> nn.Module:
|
||||
"""
|
||||
Factory function to get normalization layer
|
||||
|
||||
Args:
|
||||
norm_type: Type of normalization ('rmsnorm' or 'layernorm')
|
||||
hidden_size: Size of hidden dimension
|
||||
eps: Epsilon for numerical stability
|
||||
|
||||
Returns:
|
||||
Normalization layer
|
||||
"""
|
||||
if norm_type.lower() == "rmsnorm":
|
||||
return RMSNorm(hidden_size, eps)
|
||||
elif norm_type.lower() == "layernorm":
|
||||
return LayerNorm(hidden_size, eps)
|
||||
else:
|
||||
raise ValueError(f"Unknown norm_type: {norm_type}. Use 'rmsnorm' or 'layernorm'")
|
Reference in New Issue
Block a user