feat(03-02): implement HardwareTierDetector class
Some checks failed
Discord Webhook / git (push) Has been cancelled
Some checks failed
Discord Webhook / git (push) Has been cancelled
- Created comprehensive hardware tier detection system - Loads configurable tier definitions from YAML - Classifies systems based on RAM, CPU cores, and GPU capabilities - Provides model recommendations and performance characteristics - Includes caching for performance and error handling - Integrates with ResourceMonitor for real-time data
This commit is contained in:
324
src/resource/tiers.py
Normal file
324
src/resource/tiers.py
Normal file
@@ -0,0 +1,324 @@
|
|||||||
|
"""Hardware tier detection and management system."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
import logging
|
||||||
|
from typing import Dict, List, Optional, Any, Tuple
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from ..models.resource_monitor import ResourceMonitor
|
||||||
|
|
||||||
|
|
||||||
|
class HardwareTierDetector:
|
||||||
|
"""Detects and classifies hardware capabilities into performance tiers.
|
||||||
|
|
||||||
|
This class loads configurable tier definitions and uses system resource
|
||||||
|
monitoring to classify the current system into appropriate tiers for
|
||||||
|
intelligent model selection.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config_path: Optional[str] = None):
|
||||||
|
"""Initialize hardware tier detector.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config_path: Path to tier configuration file. If None, uses default.
|
||||||
|
"""
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Set default config path relative to this file
|
||||||
|
if config_path is None:
|
||||||
|
config_path = (
|
||||||
|
Path(__file__).parent.parent / "config" / "resource_tiers.yaml"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.config_path = Path(config_path)
|
||||||
|
self.tier_config: Optional[Dict[str, Any]] = None
|
||||||
|
self.resource_monitor = ResourceMonitor()
|
||||||
|
|
||||||
|
# Cache tier detection result
|
||||||
|
self._cached_tier: Optional[str] = None
|
||||||
|
self._cache_time: float = 0
|
||||||
|
self._cache_duration: float = 60.0 # Cache for 1 minute
|
||||||
|
|
||||||
|
# Load configuration
|
||||||
|
self._load_tier_config()
|
||||||
|
|
||||||
|
def _load_tier_config(self) -> None:
|
||||||
|
"""Load tier definitions from YAML configuration file.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FileNotFoundError: If config file doesn't exist
|
||||||
|
yaml.YAMLError: If config file is invalid
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||||
|
self.tier_config = yaml.safe_load(f)
|
||||||
|
self.logger.info(f"Loaded tier configuration from {self.config_path}")
|
||||||
|
except FileNotFoundError:
|
||||||
|
self.logger.error(f"Tier configuration file not found: {self.config_path}")
|
||||||
|
raise
|
||||||
|
except yaml.YAMLError as e:
|
||||||
|
self.logger.error(f"Invalid YAML in tier configuration: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def detect_current_tier(self) -> str:
|
||||||
|
"""Determine system tier based on current resources.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tier name: 'low_end', 'mid_range', or 'high_end'
|
||||||
|
"""
|
||||||
|
# Check cache first
|
||||||
|
import time
|
||||||
|
|
||||||
|
current_time = time.time()
|
||||||
|
if (
|
||||||
|
self._cached_tier is not None
|
||||||
|
and current_time - self._cache_time < self._cache_duration
|
||||||
|
):
|
||||||
|
return self._cached_tier
|
||||||
|
|
||||||
|
try:
|
||||||
|
resources = self.resource_monitor.get_current_resources()
|
||||||
|
tier = self._classify_resources(resources)
|
||||||
|
|
||||||
|
# Cache result
|
||||||
|
self._cached_tier = tier
|
||||||
|
self._cache_time = current_time
|
||||||
|
|
||||||
|
self.logger.info(f"Detected hardware tier: {tier}")
|
||||||
|
return tier
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Failed to detect tier: {e}")
|
||||||
|
return "low_end" # Conservative fallback
|
||||||
|
|
||||||
|
def _classify_resources(self, resources: Dict[str, float]) -> str:
|
||||||
|
"""Classify system resources into tier based on configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resources: Current system resources from ResourceMonitor
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tier classification
|
||||||
|
"""
|
||||||
|
if not self.tier_config or "tiers" not in self.tier_config:
|
||||||
|
self.logger.error("No tier configuration loaded")
|
||||||
|
return "low_end"
|
||||||
|
|
||||||
|
tiers = self.tier_config["tiers"]
|
||||||
|
|
||||||
|
# Extract key metrics
|
||||||
|
ram_gb = resources.get("available_memory_gb", 0)
|
||||||
|
cpu_cores = os.cpu_count() or 1
|
||||||
|
gpu_vram_gb = resources.get("gpu_free_vram_gb", 0)
|
||||||
|
gpu_total_vram_gb = resources.get("gpu_total_vram_gb", 0)
|
||||||
|
|
||||||
|
self.logger.debug(
|
||||||
|
f"Resources: RAM={ram_gb:.1f}GB, CPU={cpu_cores}, GPU={gpu_total_vram_gb:.1f}GB"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check tiers in order: high_end -> mid_range -> low_end
|
||||||
|
for tier_name in ["high_end", "mid_range", "low_end"]:
|
||||||
|
if tier_name not in tiers:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tier_config = tiers[tier_name]
|
||||||
|
|
||||||
|
if self._meets_tier_requirements(
|
||||||
|
tier_config, ram_gb, cpu_cores, gpu_vram_gb, gpu_total_vram_gb
|
||||||
|
):
|
||||||
|
return tier_name
|
||||||
|
|
||||||
|
return "low_end" # Conservative fallback
|
||||||
|
|
||||||
|
def _meets_tier_requirements(
|
||||||
|
self,
|
||||||
|
tier_config: Dict[str, Any],
|
||||||
|
ram_gb: float,
|
||||||
|
cpu_cores: int,
|
||||||
|
gpu_vram_gb: float,
|
||||||
|
gpu_total_vram_gb: float,
|
||||||
|
) -> bool:
|
||||||
|
"""Check if system meets tier requirements.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tier_config: Configuration for the tier to check
|
||||||
|
ram_gb: Available system RAM in GB
|
||||||
|
cpu_cores: Number of CPU cores
|
||||||
|
gpu_vram_gb: Available GPU VRAM in GB
|
||||||
|
gpu_total_vram_gb: Total GPU VRAM in GB
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if system meets all requirements for this tier
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Check RAM requirements
|
||||||
|
ram_req = tier_config.get("ram_gb", {})
|
||||||
|
ram_min = ram_req.get("min", 0)
|
||||||
|
ram_max = ram_req.get("max")
|
||||||
|
|
||||||
|
if ram_gb < ram_min:
|
||||||
|
return False
|
||||||
|
if ram_max is not None and ram_gb > ram_max:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check CPU core requirements
|
||||||
|
cpu_req = tier_config.get("cpu_cores", {})
|
||||||
|
cpu_min = cpu_req.get("min", 1)
|
||||||
|
cpu_max = cpu_req.get("max")
|
||||||
|
|
||||||
|
if cpu_cores < cpu_min:
|
||||||
|
return False
|
||||||
|
if cpu_max is not None and cpu_cores > cpu_max:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check GPU requirements
|
||||||
|
gpu_required = tier_config.get("gpu_required", False)
|
||||||
|
if gpu_required:
|
||||||
|
gpu_vram_req = tier_config.get("gpu_vram_gb", {}).get("min", 0)
|
||||||
|
if gpu_total_vram_gb < gpu_vram_req:
|
||||||
|
return False
|
||||||
|
elif gpu_total_vram_gb > 0: # GPU present but not required
|
||||||
|
gpu_vram_max = tier_config.get("gpu_vram_gb", {}).get("max")
|
||||||
|
if gpu_vram_max is not None and gpu_total_vram_gb > gpu_vram_max:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error checking tier requirements: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_tier_config(self, tier_name: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""Get configuration for a specific tier.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tier_name: Tier to get config for. If None, uses detected tier.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tier configuration dictionary
|
||||||
|
"""
|
||||||
|
if tier_name is None:
|
||||||
|
tier_name = self.detect_current_tier()
|
||||||
|
|
||||||
|
if not self.tier_config or "tiers" not in self.tier_config:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
return self.tier_config["tiers"].get(tier_name, {})
|
||||||
|
|
||||||
|
def get_preferred_models(self, tier_name: Optional[str] = None) -> List[str]:
|
||||||
|
"""Get preferred model list for detected or specified tier.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tier_name: Tier to get models for. If None, uses detected tier.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of preferred model sizes for the tier
|
||||||
|
"""
|
||||||
|
tier_config = self.get_tier_config(tier_name)
|
||||||
|
return tier_config.get("preferred_models", ["small"])
|
||||||
|
|
||||||
|
def get_scaling_thresholds(
|
||||||
|
self, tier_name: Optional[str] = None
|
||||||
|
) -> Dict[str, float]:
|
||||||
|
"""Get scaling thresholds for detected or specified tier.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tier_name: Tier to get thresholds for. If None, uses detected tier.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with memory_percent and cpu_percent thresholds
|
||||||
|
"""
|
||||||
|
tier_config = self.get_tier_config(tier_name)
|
||||||
|
return tier_config.get(
|
||||||
|
"scaling_thresholds", {"memory_percent": 75.0, "cpu_percent": 80.0}
|
||||||
|
)
|
||||||
|
|
||||||
|
def is_gpu_required(self, tier_name: Optional[str] = None) -> bool:
|
||||||
|
"""Check if detected or specified tier requires GPU.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tier_name: Tier to check. If None, uses detected tier.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if GPU is required for this tier
|
||||||
|
"""
|
||||||
|
tier_config = self.get_tier_config(tier_name)
|
||||||
|
return tier_config.get("gpu_required", False)
|
||||||
|
|
||||||
|
def get_performance_characteristics(
|
||||||
|
self, tier_name: Optional[str] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Get performance characteristics for detected or specified tier.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tier_name: Tier to get characteristics for. If None, uses detected tier.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with performance characteristics
|
||||||
|
"""
|
||||||
|
tier_config = self.get_tier_config(tier_name)
|
||||||
|
return tier_config.get("performance_characteristics", {})
|
||||||
|
|
||||||
|
def can_upgrade_model(
|
||||||
|
self, current_model_size: str, target_model_size: str
|
||||||
|
) -> bool:
|
||||||
|
"""Check if system can handle a larger model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_model_size: Current model size (e.g., 'small', 'medium')
|
||||||
|
target_model_size: Target model size (e.g., 'medium', 'large')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if system can handle the target model size
|
||||||
|
"""
|
||||||
|
preferred_models = self.get_preferred_models()
|
||||||
|
|
||||||
|
# If target model is in preferred list, system should handle it
|
||||||
|
if target_model_size in preferred_models:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check if target is larger than current but still within capabilities
|
||||||
|
size_order = ["small", "medium", "large"]
|
||||||
|
try:
|
||||||
|
current_idx = size_order.index(current_model_size)
|
||||||
|
target_idx = size_order.index(target_model_size)
|
||||||
|
|
||||||
|
# Only allow upgrade if target is in preferred models
|
||||||
|
return target_idx <= max(
|
||||||
|
[
|
||||||
|
size_order.index(size)
|
||||||
|
for size in preferred_models
|
||||||
|
if size in size_order
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_model_recommendations(self) -> Dict[str, Any]:
|
||||||
|
"""Get comprehensive model recommendations for current system.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with model recommendations and capabilities
|
||||||
|
"""
|
||||||
|
tier = self.detect_current_tier()
|
||||||
|
tier_config = self.get_tier_config(tier)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"detected_tier": tier,
|
||||||
|
"preferred_models": self.get_preferred_models(tier),
|
||||||
|
"model_size_range": tier_config.get("model_size_range", {}),
|
||||||
|
"performance_characteristics": self.get_performance_characteristics(tier),
|
||||||
|
"scaling_thresholds": self.get_scaling_thresholds(tier),
|
||||||
|
"gpu_required": self.is_gpu_required(tier),
|
||||||
|
"description": tier_config.get("description", ""),
|
||||||
|
}
|
||||||
|
|
||||||
|
def refresh_config(self) -> None:
|
||||||
|
"""Reload tier configuration from file.
|
||||||
|
|
||||||
|
Useful for runtime configuration updates without restarting.
|
||||||
|
"""
|
||||||
|
self._load_tier_config()
|
||||||
|
self._cached_tier = None # Clear cache to force re-detection
|
||||||
Reference in New Issue
Block a user