""" Tests for NOVA core transformer """ import pytest import torch from nova_core import NovaTransformer, ModelConfig, MODEL_125M def test_model_config(): """Test model configuration""" config = ModelConfig( vocab_size=1000, hidden_size=256, num_hidden_layers=4, num_attention_heads=4, ) assert config.vocab_size == 1000 assert config.hidden_size == 256 assert config.num_hidden_layers == 4 def test_model_creation(): """Test creating a small model""" config = ModelConfig( vocab_size=1000, hidden_size=128, num_hidden_layers=2, num_attention_heads=4, intermediate_size=512, max_position_embeddings=512, ) model = NovaTransformer(config) assert model is not None assert model.config == config assert model.vocab_size == 1000 def test_model_forward(): """Test forward pass""" config = ModelConfig( vocab_size=1000, hidden_size=128, num_hidden_layers=2, num_attention_heads=4, intermediate_size=512, max_position_embeddings=512, ) model = NovaTransformer(config) model.eval() # Create dummy input batch_size = 2 seq_len = 10 input_ids = torch.randint(0, 1000, (batch_size, seq_len)) # Forward pass with torch.no_grad(): outputs = model(input_ids=input_ids) assert 'logits' in outputs assert outputs['logits'].shape == (batch_size, seq_len, 1000) def test_model_generation(): """Test text generation""" config = ModelConfig( vocab_size=1000, hidden_size=128, num_hidden_layers=2, num_attention_heads=4, intermediate_size=512, max_position_embeddings=512, ) model = NovaTransformer(config) model.eval() # Create dummy input input_ids = torch.randint(0, 1000, (1, 5)) # Generate with torch.no_grad(): output_ids = model.generate( input_ids=input_ids, max_new_tokens=10, temperature=1.0, do_sample=True, ) assert output_ids.shape[1] == 15 # 5 input + 10 generated def test_kv_cache(): """Test KV-cache functionality""" config = ModelConfig( vocab_size=1000, hidden_size=128, num_hidden_layers=2, num_attention_heads=4, use_cache=True, ) model = NovaTransformer(config) model.eval() input_ids = torch.randint(0, 1000, (1, 5)) with torch.no_grad(): # First forward with cache outputs1 = model(input_ids=input_ids, use_cache=True) past_kv = outputs1['past_key_values'] assert past_kv is not None assert len(past_kv) == config.num_hidden_layers # Second forward with cache new_input = torch.randint(0, 1000, (1, 1)) outputs2 = model(input_ids=new_input, past_key_values=past_kv, use_cache=True) assert outputs2['logits'].shape[1] == 1 # Only new token def test_param_count(): """Test parameter counting""" config = MODEL_125M model = NovaTransformer(config) num_params = model.get_num_params(non_embedding=False) # Should be around 125M assert 100_000_000 < num_params < 150_000_000 if __name__ == "__main__": pytest.main([__file__, "-v"])