"""System resource monitoring for intelligent model selection.""" import psutil import time from typing import Dict, List, Optional, Tuple import logging # Try to import pynvml for NVIDIA GPU monitoring try: import pynvml PYNVML_AVAILABLE = True except ImportError: PYNVML_AVAILABLE = False pynvml = None class ResourceMonitor: """Monitor system resources for model selection decisions.""" def __init__(self, memory_threshold: float = 80.0, cpu_threshold: float = 80.0): """Initialize resource monitor. Args: memory_threshold: Memory usage % that triggers model switching cpu_threshold: CPU usage % that triggers model switching """ self.memory_threshold = memory_threshold self.cpu_threshold = cpu_threshold self.logger = logging.getLogger(__name__) # Track resource history for trend analysis self.resource_history: List[Dict[str, float]] = [] self.max_history_size = 100 # Keep last 100 samples # Cache GPU info to avoid repeated initialization overhead self._gpu_cache: Optional[Dict[str, float]] = None self._gpu_cache_time: float = 0 self._gpu_cache_duration: float = 1.0 # Cache for 1 second # Track if we've already tried pynvml and failed self._pynvml_failed: bool = False def get_current_resources(self) -> Dict[str, float]: """Get current system resource usage. Returns: Dict with: - memory_percent: Memory usage percentage (0-100) - cpu_percent: CPU usage percentage (0-100) - available_memory_gb: Available RAM in GB - gpu_vram_gb: Available GPU VRAM in GB (0 if no GPU) - gpu_total_vram_gb: Total VRAM capacity in GB (0 if no GPU) - gpu_used_vram_gb: Used VRAM in GB (0 if no GPU) - gpu_free_vram_gb: Available VRAM in GB (0 if no GPU) - gpu_utilization_percent: GPU utilization (0-100, 0 if no GPU) - gpu_temperature_c: GPU temperature in Celsius (0 if no GPU) """ try: # Memory information memory = psutil.virtual_memory() memory_percent = memory.percent available_memory_gb = memory.available / (1024**3) # CPU information (use very short interval for performance) cpu_percent = psutil.cpu_percent(interval=0.05) # GPU information (if available) - with caching for performance gpu_info = self._get_cached_gpu_info() return { "memory_percent": memory_percent, "cpu_percent": cpu_percent, "available_memory_gb": available_memory_gb, "gpu_vram_gb": gpu_info.get( "free_vram_gb", 0.0 ), # Backward compatibility "gpu_total_vram_gb": gpu_info.get("total_vram_gb", 0.0), "gpu_used_vram_gb": gpu_info.get("used_vram_gb", 0.0), "gpu_free_vram_gb": gpu_info.get("free_vram_gb", 0.0), "gpu_utilization_percent": gpu_info.get("utilization_percent", 0.0), "gpu_temperature_c": gpu_info.get("temperature_c", 0.0), } except Exception as e: self.logger.error(f"Failed to get system resources: {e}") return { "memory_percent": 0.0, "cpu_percent": 0.0, "available_memory_gb": 0.0, "gpu_vram_gb": 0.0, "gpu_total_vram_gb": 0.0, "gpu_used_vram_gb": 0.0, "gpu_free_vram_gb": 0.0, "gpu_utilization_percent": 0.0, "gpu_temperature_c": 0.0, } def get_resource_trend(self, window_minutes: int = 5) -> Dict[str, str]: """Analyze resource usage trend over time window. Args: window_minutes: Time window in minutes to analyze Returns: Dict with trend indicators: "increasing", "decreasing", "stable" """ cutoff_time = time.time() - (window_minutes * 60) # Filter recent history recent_data = [ entry for entry in self.resource_history if entry.get("timestamp", 0) > cutoff_time ] if len(recent_data) < 2: return {"memory": "insufficient_data", "cpu": "insufficient_data"} # Calculate trends memory_trend = self._calculate_trend([entry["memory"] for entry in recent_data]) cpu_trend = self._calculate_trend([entry["cpu"] for entry in recent_data]) return { "memory": memory_trend, "cpu": cpu_trend, } def can_load_model(self, model_size_gb: float) -> bool: """Check if enough resources are available to load a model. Args: model_size_gb: Required memory in GB for the model Returns: True if model can be loaded, False otherwise """ resources = self.get_current_resources() # Check if enough available memory (with 50% safety margin) required_memory_with_margin = model_size_gb * 1.5 available_memory = resources["available_memory_gb"] if available_memory < required_memory_with_margin: self.logger.warning( f"Insufficient memory: need {required_memory_with_margin:.1f}GB, " f"have {available_memory:.1f}GB" ) return False # Check if GPU has enough VRAM if available if resources["gpu_vram_gb"] > 0: if resources["gpu_vram_gb"] < model_size_gb: self.logger.warning( f"Insufficient GPU VRAM: need {model_size_gb:.1f}GB, " f"have {resources['gpu_vram_gb']:.1f}GB" ) return False return True def is_system_overloaded(self) -> bool: """Check if system resources exceed configured thresholds. Returns: True if system is overloaded, False otherwise """ resources = self.get_current_resources() # Check memory threshold if resources["memory_percent"] > self.memory_threshold: return True # Check CPU threshold if resources["cpu_percent"] > self.cpu_threshold: return True return False def update_history(self) -> None: """Update resource history for trend analysis.""" resources = self.get_current_resources() # Add timestamp and sample resources["timestamp"] = time.time() self.resource_history.append(resources) # Trim history if too large if len(self.resource_history) > self.max_history_size: self.resource_history = self.resource_history[-self.max_history_size :] def get_best_model_size(self) -> str: """Recommend model size category based on current resources. Returns: Model size category: "small", "medium", or "large" """ resources = self.get_current_resources() available_memory_gb = resources["available_memory_gb"] if available_memory_gb >= 8: return "large" elif available_memory_gb >= 4: return "medium" else: return "small" def _get_cached_gpu_info(self) -> Dict[str, float]: """Get GPU info with caching to avoid repeated initialization overhead. Returns: GPU info dict (cached or fresh) """ current_time = time.time() # Return cached info if still valid if ( self._gpu_cache is not None and current_time - self._gpu_cache_time < self._gpu_cache_duration ): return self._gpu_cache # Get fresh GPU info and cache it self._gpu_cache = self._get_gpu_info() self._gpu_cache_time = current_time return self._gpu_cache def _get_gpu_info(self) -> Dict[str, float]: """Get detailed GPU information using pynvml or fallback methods. Returns: Dict with GPU metrics: - total_vram_gb: Total VRAM capacity in GB - used_vram_gb: Used VRAM in GB - free_vram_gb: Available VRAM in GB - utilization_percent: GPU utilization (0-100) - temperature_c: GPU temperature in Celsius """ gpu_info = { "total_vram_gb": 0.0, "used_vram_gb": 0.0, "free_vram_gb": 0.0, "utilization_percent": 0.0, "temperature_c": 0.0, } # Try pynvml first for NVIDIA GPUs (but not if we already know it failed) if PYNVML_AVAILABLE and pynvml is not None and not self._pynvml_failed: try: # Initialize pynvml pynvml.nvmlInit() # Get number of GPUs device_count = pynvml.nvmlDeviceGetCount() if device_count > 0: # Use first GPU (can be extended for multi-GPU support) handle = pynvml.nvmlDeviceGetHandleByIndex(0) # Get memory info memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle) total_bytes = memory_info.total used_bytes = memory_info.used free_bytes = memory_info.free # Convert to GB gpu_info["total_vram_gb"] = total_bytes / (1024**3) gpu_info["used_vram_gb"] = used_bytes / (1024**3) gpu_info["free_vram_gb"] = free_bytes / (1024**3) # Get utilization (GPU and memory) try: utilization = pynvml.nvmlDeviceGetUtilizationRates(handle) gpu_info["utilization_percent"] = utilization.gpu except Exception: # Some GPUs don't support utilization queries pass # Get temperature try: temp = pynvml.nvmlDeviceGetTemperature( handle, pynvml.NVML_TEMPERATURE_GPU ) gpu_info["temperature_c"] = float(temp) except Exception: # Some GPUs don't support temperature queries pass # Always shutdown pynvml when done pynvml.nvmlShutdown() self.logger.debug( f"GPU detected via pynvml: {gpu_info['total_vram_gb']:.1f}GB total, " f"{gpu_info['used_vram_gb']:.1f}GB used, " f"{gpu_info['utilization_percent']:.0f}% utilization, " f"{gpu_info['temperature_c']:.0f}°C" ) return gpu_info except Exception as e: self.logger.debug(f"pynvml GPU detection failed: {e}") # Mark pynvml as failed to avoid repeated attempts self._pynvml_failed = True # Fall through to gpu-tracker # Fallback to gpu-tracker for other GPUs or when pynvml fails try: import gpu_tracker as gt gpu_list = gt.get_gpus() if gpu_list: gpu = gpu_list[0] # Use first GPU # Convert MB to GB for consistency total_mb = getattr(gpu, "memory_total", 0) used_mb = getattr(gpu, "memory_used", 0) gpu_info["total_vram_gb"] = total_mb / 1024.0 gpu_info["used_vram_gb"] = used_mb / 1024.0 gpu_info["free_vram_gb"] = (total_mb - used_mb) / 1024.0 self.logger.debug( f"GPU detected via gpu-tracker: {gpu_info['total_vram_gb']:.1f}GB total, " f"{gpu_info['used_vram_gb']:.1f}GB used" ) return gpu_info except ImportError: self.logger.debug("gpu-tracker not available") except Exception as e: self.logger.debug(f"gpu-tracker failed: {e}") # No GPU detected - return default values self.logger.debug("No GPU detected") return gpu_info def _calculate_trend(self, values: List[float]) -> str: """Calculate trend direction from a list of values. Args: values: List of numeric values in chronological order Returns: Trend indicator: "increasing", "decreasing", or "stable" """ if len(values) < 2: return "insufficient_data" # Simple linear regression to determine trend n = len(values) x_values = list(range(n)) # Calculate slope sum_x = sum(x_values) sum_y = sum(values) sum_xy = sum(x * y for x, y in zip(x_values, values)) sum_x2 = sum(x * x for x in x_values) slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x) # Determine trend based on slope magnitude if abs(slope) < 0.1: return "stable" elif slope > 0: return "increasing" else: return "decreasing"