Files
NOVA/nova_core/normalization.py
Dani a7f091aa45 Initial commit: NOVA - Neuro-Optimizing Versatile Agent
Complete transformer LLM built from scratch with:

Core Features:
- Full transformer architecture (RoPE, RMSNorm, SwiGLU, KV-cache)
- SentencePiece tokenizer (BPE/Unigram)
- Training pipeline (AMP, gradient checkpointing, DDP)
- Persona system with personality matrix (NO AI disclosure by default)
- Genetic evolution (NOVA-EVO) for hyperparameter optimization
- Legal-only data pipeline with license tracking
- Chat interface (CLI + REST API)
- Conversation memory (SQLite)

Model Sizes:
- 125M, 350M, 1.3B, 3B parameters
- Local-first, runs on CPU or GPU
- Python 3.10.6+, PyTorch 2.0+

Personas:
- girlfriend_gentle (high warmth, high empathy)
- girlfriend_playful (high humor, high playfulness)
- girlfriend_supportive (balanced, default)

Documentation:
- Complete README with quickstart
- Model card with ethical considerations
- Privacy documentation (local-first, zero telemetry)
- Data licenses and attribution
- Contributing guide

Infrastructure:
- GitHub Actions CI/CD
- Comprehensive test suite
- Quickstart script
- CLI tool

License: Apache 2.0

🤖 Generated with Claude Code
https://claude.com/claude-code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-12 20:56:37 -04:00

75 lines
2.0 KiB
Python

"""
Normalization layers for NOVA
"""
import torch
import torch.nn as nn
class RMSNorm(nn.Module):
"""
Root Mean Square Layer Normalization
More efficient than LayerNorm, used in LLaMA and other modern LLMs
"""
def __init__(self, hidden_size: int, eps: float = 1e-6):
"""
Args:
hidden_size: Size of the hidden dimension
eps: Small constant for numerical stability
"""
super().__init__()
self.weight = nn.Parameter(torch.ones(hidden_size))
self.eps = eps
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
"""
Apply RMS normalization
Args:
hidden_states: Input tensor [..., hidden_size]
Returns:
Normalized tensor
"""
input_dtype = hidden_states.dtype
hidden_states = hidden_states.to(torch.float32)
# Compute RMS
variance = hidden_states.pow(2).mean(-1, keepdim=True)
hidden_states = hidden_states * torch.rsqrt(variance + self.eps)
return self.weight * hidden_states.to(input_dtype)
class LayerNorm(nn.LayerNorm):
"""
Standard LayerNorm with optional bias
Wrapper around PyTorch's LayerNorm for consistency
"""
def __init__(self, hidden_size: int, eps: float = 1e-6, bias: bool = True):
super().__init__(hidden_size, eps=eps, elementwise_affine=True)
if not bias:
self.bias = None
def get_norm_layer(norm_type: str, hidden_size: int, eps: float = 1e-6) -> nn.Module:
"""
Factory function to get normalization layer
Args:
norm_type: Type of normalization ('rmsnorm' or 'layernorm')
hidden_size: Size of hidden dimension
eps: Epsilon for numerical stability
Returns:
Normalization layer
"""
if norm_type.lower() == "rmsnorm":
return RMSNorm(hidden_size, eps)
elif norm_type.lower() == "layernorm":
return LayerNorm(hidden_size, eps)
else:
raise ValueError(f"Unknown norm_type: {norm_type}. Use 'rmsnorm' or 'layernorm'")