""" Fitness evaluator for NOVA-EVO """ import torch import time from typing import Dict from pathlib import Path from .config import Individual, EvolutionConfig from nova_core import NovaTransformer, ModelConfig from nova_train import NovaTrainer, TrainingConfig class FitnessEvaluator: """ Evaluates fitness of individuals by training and measuring metrics Metrics: - Loss/perplexity (quality of learning) - Latency (inference speed) - Memory usage (peak RAM/VRAM) - Chat quality (optional, if eval set available) """ def __init__( self, base_model_config: ModelConfig, evo_config: EvolutionConfig, train_dataset, eval_dataset=None, device: str = "auto", ): """ Args: base_model_config: Base model configuration evo_config: Evolution configuration train_dataset: Training dataset for fitness eval eval_dataset: Optional evaluation dataset device: Device for training """ self.base_model_config = base_model_config self.evo_config = evo_config self.train_dataset = train_dataset self.eval_dataset = eval_dataset self.device = device def evaluate(self, individual: Individual) -> Dict[str, float]: """ Evaluate fitness of an individual Args: individual: Individual to evaluate Returns: Dictionary of metrics """ # Create model with individual's architecture choices model_config = self._create_model_config(individual) model = NovaTransformer(model_config) # Create training config with individual's hyperparameters train_config = self._create_training_config(individual) # Train for eval_steps train_loader = self._create_dataloader( self.train_dataset, batch_size=individual.batch_size ) # Quick training loss = self._quick_train(model, train_config, train_loader) # Measure latency latency_ms = self._measure_latency(model) # Measure memory memory_mb = self._measure_memory(model) # Calculate perplexity perplexity = torch.exp(torch.tensor(loss)).item() if loss < 100 else float('inf') return { 'loss': loss, 'perplexity': perplexity, 'latency_ms': latency_ms, 'memory_mb': memory_mb, 'quality_score': 0.0, # TODO: Implement chat quality eval } def _create_model_config(self, individual: Individual) -> ModelConfig: """Create model config from individual's genes""" config = ModelConfig( vocab_size=self.base_model_config.vocab_size, hidden_size=self.base_model_config.hidden_size, num_hidden_layers=self.base_model_config.num_hidden_layers, num_attention_heads=self.base_model_config.num_attention_heads, intermediate_size=self.base_model_config.intermediate_size, max_position_embeddings=self.base_model_config.max_position_embeddings, # Individual's choices rope_theta=individual.rope_theta, hidden_act=individual.hidden_act, norm_type=individual.norm_type, ) return config def _create_training_config(self, individual: Individual) -> TrainingConfig: """Create training config from individual's hyperparameters""" config = TrainingConfig( learning_rate=individual.learning_rate, batch_size=individual.batch_size, warmup_steps=individual.warmup_steps, weight_decay=individual.weight_decay, num_epochs=1, # Just one pass for eval save_steps=999999, # Don't save during eval device=self.device, ) return config def _create_dataloader(self, dataset, batch_size: int): """Create dataloader for training""" from torch.utils.data import DataLoader return DataLoader( dataset, batch_size=batch_size, shuffle=True, num_workers=0, ) def _quick_train( self, model: NovaTransformer, train_config: TrainingConfig, train_loader ) -> float: """ Quick training for evaluation Returns: Final loss """ # Limit to eval_steps limited_loader = [] for i, batch in enumerate(train_loader): if i >= self.evo_config.eval_steps: break limited_loader.append(batch) if not limited_loader: return float('inf') # Simple training loop device = torch.device(self.device if self.device != "auto" else "cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.train() optimizer = torch.optim.AdamW( model.parameters(), lr=train_config.learning_rate, weight_decay=train_config.weight_decay, ) total_loss = 0.0 num_batches = 0 for batch in limited_loader: input_ids = batch['input_ids'].to(device) labels = batch.get('labels', input_ids).to(device) outputs = model(input_ids=input_ids) logits = outputs['logits'] # Calculate loss shift_logits = logits[..., :-1, :].contiguous() shift_labels = labels[..., 1:].contiguous() loss = torch.nn.functional.cross_entropy( shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1), ignore_index=-100 ) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() num_batches += 1 return total_loss / num_batches if num_batches > 0 else float('inf') @torch.no_grad() def _measure_latency(self, model: NovaTransformer) -> float: """ Measure average inference latency in milliseconds Args: model: Model to measure Returns: Average latency in ms """ device = next(model.parameters()).device model.eval() # Dummy input input_ids = torch.randint(0, model.config.vocab_size, (1, 128), device=device) # Warmup for _ in range(3): _ = model(input_ids=input_ids) # Measure num_runs = 10 start = time.time() for _ in range(num_runs): _ = model(input_ids=input_ids) if device.type == 'cuda': torch.cuda.synchronize() elapsed = (time.time() - start) / num_runs return elapsed * 1000 # Convert to ms def _measure_memory(self, model: NovaTransformer) -> float: """ Measure peak memory usage in MB Args: model: Model to measure Returns: Peak memory in MB """ # Count parameters num_params = sum(p.numel() for p in model.parameters()) # Approximate memory (4 bytes per float32 parameter) memory_mb = (num_params * 4) / (1024 ** 2) return memory_mb