""" GPU/CUDA Verification Script for Lyra Tests PyTorch CUDA functionality and reports GPU capabilities """ import torch def test_cuda(): print("=" * 60) print("CUDA/GPU Verification for Lyra") print("=" * 60) # Basic CUDA info print(f"\n1. PyTorch Version: {torch.__version__}") print(f"2. CUDA Available: {torch.cuda.is_available()}") if not torch.cuda.is_available(): print("\n[ERROR] CUDA is not available!") return False print(f"3. CUDA Version: {torch.version.cuda}") print(f"4. cuDNN Version: {torch.backends.cudnn.version()}") print(f"5. Number of GPUs: {torch.cuda.device_count()}") # GPU Details for i in range(torch.cuda.device_count()): print(f"\n--- GPU {i} ---") print(f"Name: {torch.cuda.get_device_name(i)}") props = torch.cuda.get_device_properties(i) print(f"Compute Capability: {props.major}.{props.minor}") print(f"Total Memory: {props.total_memory / 1024**3:.2f} GB") print(f"Multi-Processors: {props.multi_processor_count}") # Memory test print(f"\n--- Memory Status ---") print(f"Allocated: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB") print(f"Cached: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB") print(f"Free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1024**3:.2f} GB") # Tensor operations test print(f"\n--- Testing Tensor Operations ---") try: # Create tensors x = torch.randn(1000, 1000, device='cuda') y = torch.randn(1000, 1000, device='cuda') # Matrix multiplication z = torch.matmul(x, y) print(f"[OK] Matrix multiplication: {z.shape}") print(f"[OK] Tensor device: {z.device}") print(f"[OK] Tensor dtype: {z.dtype}") # Test FP16 x_fp16 = x.half() y_fp16 = y.half() z_fp16 = torch.matmul(x_fp16, y_fp16) print(f"[OK] FP16 operations: {z_fp16.dtype}") # Test BF16 if torch.cuda.is_bf16_supported(): x_bf16 = x.bfloat16() y_bf16 = y.bfloat16() z_bf16 = torch.matmul(x_bf16, y_bf16) print(f"[OK] BF16 operations: {z_bf16.dtype}") else: print(f"[WARNING] BF16 not supported") del x, y, z, x_fp16, y_fp16, z_fp16 torch.cuda.empty_cache() print(f"\n[SUCCESS] All GPU tests passed!") return True except Exception as e: print(f"\n[ERROR] GPU test failed: {e}") return False if __name__ == "__main__": success = test_cuda() exit(0 if success else 1)