Mai/src/resource/scaling.py

"""Proactive scaling system with hybrid monitoring and graceful degradation."""

import asyncio
import threading
import time
import logging
from typing import Dict, List, Optional, Any, Callable, Tuple
from dataclasses import dataclass
from enum import Enum
from collections import deque

from .tiers import HardwareTierDetector
from ..models.resource_monitor import ResourceMonitor


class ScalingDecision(Enum):
    """Types of scaling decisions."""

    NO_CHANGE = "no_change"
    UPGRADE = "upgrade"
    DOWNGRADE = "downgrade"
    DEGRADATION_CASCADE = "degradation_cascade"


@dataclass
class ScalingEvent:
    """Record of a scaling decision and its context."""

    timestamp: float
    decision: ScalingDecision
    old_model_size: Optional[str]
    new_model_size: Optional[str]
    reason: str
    resources: Dict[str, float]
    tier: str


class ProactiveScaler:
    """
    Proactive scaling system with hybrid monitoring and graceful degradation.

    Combines continuous background monitoring with pre-flight checks to
    anticipate resource constraints and scale models before performance
    degradation impacts user experience.
    """

    def __init__(
        self,
        resource_monitor: Optional[ResourceMonitor] = None,
        tier_detector: Optional[HardwareTierDetector] = None,
        upgrade_threshold: float = 0.8,
        downgrade_threshold: float = 0.9,
        stabilization_minutes: int = 5,
        monitoring_interval: float = 2.0,
        trend_window_minutes: int = 10,
    ):
        """Initialize proactive scaler.

        Args:
            resource_monitor: ResourceMonitor instance for metrics
            tier_detector: HardwareTierDetector for tier-based thresholds
            upgrade_threshold: Resource usage threshold for upgrades (default 0.8 = 80%)
            downgrade_threshold: Resource usage threshold for downgrades (default 0.9 = 90%)
            stabilization_minutes: Minimum time between upgrades (default 5 minutes)
            monitoring_interval: Background monitoring interval in seconds
            trend_window_minutes: Window for trend analysis in minutes
        """
        self.logger = logging.getLogger(__name__)

        # Core dependencies
        self.resource_monitor = resource_monitor or ResourceMonitor()
        self.tier_detector = tier_detector or HardwareTierDetector()

        # Configuration
        self.upgrade_threshold = upgrade_threshold
        self.downgrade_threshold = downgrade_threshold
        self.stabilization_seconds = stabilization_minutes * 60
        self.monitoring_interval = monitoring_interval
        self.trend_window_seconds = trend_window_minutes * 60

        # State management
        self._monitoring_active = False
        self._monitoring_thread: Optional[threading.Thread] = None
        self._shutdown_event = threading.Event()

        # Resource history and trend analysis
        self._resource_history: deque = deque(maxlen=500)  # Store last 500 samples
        self._performance_metrics: deque = deque(maxlen=100)  # Last 100 operations
        self._scaling_history: List[ScalingEvent] = []

        # Stabilization tracking
        self._last_upgrade_time: float = 0
        self._last_downgrade_time: float = 0
        self._current_model_size: Optional[str] = None
        self._stabilization_cooldown: bool = False

        # Callbacks for external systems
        self._on_scaling_decision: Optional[Callable[[ScalingEvent], None]] = None

        # Hysteresis to prevent thrashing
        self._hysteresis_margin = 0.05  # 5% margin between upgrade/downgrade

        self.logger.info("ProactiveScaler initialized with hybrid monitoring")

    def set_scaling_callback(self, callback: Callable[[ScalingEvent], None]) -> None:
        """Set callback function for scaling decisions.

        Args:
            callback: Function to call when scaling decision is made
        """
        self._on_scaling_decision = callback

    def start_continuous_monitoring(self) -> None:
        """Start background continuous monitoring."""
        if self._monitoring_active:
            self.logger.warning("Monitoring already active")
            return

        self._monitoring_active = True
        self._shutdown_event.clear()

        self._monitoring_thread = threading.Thread(
            target=self._monitoring_loop, daemon=True, name="ProactiveScaler-Monitor"
        )
        self._monitoring_thread.start()

        self.logger.info("Started continuous background monitoring")

    def stop_continuous_monitoring(self) -> None:
        """Stop background continuous monitoring."""
        if not self._monitoring_active:
            return

        self._monitoring_active = False
        self._shutdown_event.set()

        if self._monitoring_thread and self._monitoring_thread.is_alive():
            self._monitoring_thread.join(timeout=5.0)

        self.logger.info("Stopped continuous background monitoring")

    def check_preflight_resources(
        self, operation_type: str = "model_inference"
    ) -> Tuple[bool, str]:
        """Perform quick pre-flight resource check before operation.

        Args:
            operation_type: Type of operation being attempted

        Returns:
            Tuple of (can_proceed, reason_if_denied)
        """
        try:
            resources = self.resource_monitor.get_current_resources()

            # Critical resource checks
            if resources["memory_percent"] > self.downgrade_threshold * 100:
                return (
                    False,
                    f"Memory usage too high: {resources['memory_percent']:.1f}%",
                )

            if resources["cpu_percent"] > self.downgrade_threshold * 100:
                return False, f"CPU usage too high: {resources['cpu_percent']:.1f}%"

            # Check for immediate degradation needs
            if self._should_immediate_degrade(resources):
                return (
                    False,
                    "Immediate degradation required - resources critically constrained",
                )

            return True, "Resources adequate for operation"

        except Exception as e:
            self.logger.error(f"Error in pre-flight check: {e}")
            return False, f"Pre-flight check failed: {e}"

    def should_upgrade_model(
        self, current_resources: Optional[Dict[str, float]] = None
    ) -> bool:
        """Check if conditions allow for model upgrade.

        Args:
            current_resources: Current resource snapshot (optional)

        Returns:
            True if upgrade conditions are met
        """
        try:
            resources = (
                current_resources or self.resource_monitor.get_current_resources()
            )
            current_time = time.time()

            # Check stabilization cooldown
            if current_time - self._last_upgrade_time < self.stabilization_seconds:
                return False

            # Check if resources are consistently low enough for upgrade
            if not self._resources_support_upgrade(resources):
                return False

            # Analyze trends to ensure stability
            if not self._trend_supports_upgrade():
                return False

            # Check if we're in stabilization cooldown from previous downgrades
            if self._stabilization_cooldown:
                return False

            return True

        except Exception as e:
            self.logger.error(f"Error checking upgrade conditions: {e}")
            return False

    def initiate_graceful_degradation(
        self, reason: str, immediate: bool = False
    ) -> Optional[str]:
        """Initiate graceful degradation to smaller model.

        Args:
            reason: Reason for degradation
            immediate: Whether degradation should happen immediately

        Returns:
            Recommended smaller model size or None
        """
        try:
            resources = self.resource_monitor.get_current_resources()
            current_tier = self.tier_detector.detect_current_tier()
            tier_config = self.tier_detector.get_tier_config(current_tier)

            # Determine target model size based on current constraints
            if self._current_model_size == "large":
                target_size = "medium"
            elif self._current_model_size == "medium":
                target_size = "small"
            else:
                target_size = "small"  # Stay at small if already small

            # Check if degradation is beneficial
            if target_size == self._current_model_size:
                self.logger.warning(
                    "Already at minimum model size, cannot degrade further"
                )
                return None

            current_time = time.time()
            if not immediate:
                # Apply stabilization period for downgrades too
                if (
                    current_time - self._last_downgrade_time
                    < self.stabilization_seconds
                ):
                    self.logger.info("Degradation blocked by stabilization period")
                    return None

            # Create scaling event
            event = ScalingEvent(
                timestamp=current_time,
                decision=ScalingDecision.DOWNGRADE,
                old_model_size=self._current_model_size,
                new_model_size=target_size,
                reason=reason,
                resources=resources,
                tier=current_tier,
            )

            # Record the decision
            self._record_scaling_decision(event)

            # Update timing
            self._last_downgrade_time = current_time
            self._current_model_size = target_size

            self.logger.info(
                f"Initiated graceful degradation to {target_size}: {reason}"
            )

            # Trigger callback if set
            if self._on_scaling_decision:
                self._on_scaling_decision(event)

            return target_size

        except Exception as e:
            self.logger.error(f"Error initiating degradation: {e}")
            return None

    def analyze_resource_trends(self) -> Dict[str, Any]:
        """Analyze resource usage trends for predictive scaling.

        Returns:
            Dictionary with trend analysis and predictions
        """
        try:
            if len(self._resource_history) < 10:
                return {"status": "insufficient_data"}

            # Calculate trends for key metrics
            memory_trend = self._calculate_trend(
                [entry["memory"] for entry in self._resource_history]
            )
            cpu_trend = self._calculate_trend(
                [entry["cpu"] for entry in self._resource_history]
            )

            # Predict future usage based on trends
            future_memory = self._predict_future_usage(memory_trend)
            future_cpu = self._predict_future_usage(cpu_trend)

            # Determine scaling recommendation
            recommendation = self._generate_trend_recommendation(
                memory_trend, cpu_trend, future_memory, future_cpu
            )

            return {
                "status": "analyzed",
                "memory_trend": memory_trend,
                "cpu_trend": cpu_trend,
                "predicted_memory_usage": future_memory,
                "predicted_cpu_usage": future_cpu,
                "recommendation": recommendation,
                "confidence": self._calculate_trend_confidence(),
            }

        except Exception as e:
            self.logger.error(f"Error analyzing trends: {e}")
            return {"status": "error", "error": str(e)}

    def update_performance_metrics(
        self, operation_type: str, duration_ms: float, success: bool
    ) -> None:
        """Update performance metrics for scaling decisions.

        Args:
            operation_type: Type of operation performed
            duration_ms: Duration in milliseconds
            success: Whether operation was successful
        """
        metric = {
            "timestamp": time.time(),
            "operation_type": operation_type,
            "duration_ms": duration_ms,
            "success": success,
        }

        self._performance_metrics.append(metric)

        # Keep only recent metrics (maintained by deque maxlen)

    def get_scaling_status(self) -> Dict[str, Any]:
        """Get current scaling status and recommendations.

        Returns:
            Dictionary with scaling status information
        """
        try:
            current_resources = self.resource_monitor.get_current_resources()
            current_tier = self.tier_detector.detect_current_tier()

            return {
                "monitoring_active": self._monitoring_active,
                "current_model_size": self._current_model_size,
                "current_tier": current_tier,
                "current_resources": current_resources,
                "upgrade_available": self.should_upgrade_model(current_resources),
                "degradation_needed": self._should_immediate_degrade(current_resources),
                "stabilization_cooldown": self._stabilization_cooldown,
                "last_upgrade_time": self._last_upgrade_time,
                "last_downgrade_time": self._last_downgrade_time,
                "recent_decisions": self._scaling_history[-5:],  # Last 5 decisions
                "trend_analysis": self.analyze_resource_trends(),
            }

        except Exception as e:
            self.logger.error(f"Error getting scaling status: {e}")
            return {"status": "error", "error": str(e)}

    def _monitoring_loop(self) -> None:
        """Background monitoring loop."""
        self.logger.info("Starting proactive scaling monitoring loop")

        while not self._shutdown_event.wait(self.monitoring_interval):
            try:
                if not self._monitoring_active:
                    break

                # Collect current resources
                resources = self.resource_monitor.get_current_resources()
                timestamp = time.time()

                # Update resource history
                self._update_resource_history(resources, timestamp)

                # Check for scaling opportunities
                self._check_scaling_opportunities(resources, timestamp)

            except Exception as e:
                self.logger.error(f"Error in monitoring loop: {e}")
                time.sleep(1.0)  # Brief pause on error

        self.logger.info("Proactive scaling monitoring loop stopped")

    def _update_resource_history(
        self, resources: Dict[str, float], timestamp: float
    ) -> None:
        """Update resource history with current snapshot."""
        history_entry = {
            "timestamp": timestamp,
            "memory": resources["memory_percent"],
            "cpu": resources["cpu_percent"],
            "available_memory_gb": resources["available_memory_gb"],
            "gpu_utilization": resources.get("gpu_utilization_percent", 0),
        }

        self._resource_history.append(history_entry)

        # Also update the resource monitor's history
        self.resource_monitor.update_history()

    def _check_scaling_opportunities(
        self, resources: Dict[str, float], timestamp: float
    ) -> None:
        """Check for proactive scaling opportunities."""
        try:
            # Check for immediate degradation needs
            if self._should_immediate_degrade(resources):
                degradation_reason = f"Critical resource usage: Memory {resources['memory_percent']:.1f}%, CPU {resources['cpu_percent']:.1f}%"
                self.initiate_graceful_degradation(degradation_reason, immediate=True)
                return

            # Check for upgrade opportunities
            if self.should_upgrade_model(resources):
                if not self._stabilization_cooldown:
                    upgrade_recommendation = self._determine_upgrade_target()
                    if upgrade_recommendation:
                        self._execute_upgrade(
                            upgrade_recommendation, resources, timestamp
                        )

            # Update stabilization cooldown status
            self._update_stabilization_status()

        except Exception as e:
            self.logger.error(f"Error checking scaling opportunities: {e}")

    def _should_immediate_degrade(self, resources: Dict[str, float]) -> bool:
        """Check if immediate degradation is required."""
        # Critical thresholds that require immediate action
        memory_critical = resources["memory_percent"] > self.downgrade_threshold * 100
        cpu_critical = resources["cpu_percent"] > self.downgrade_threshold * 100

        # Also check available memory (avoid OOM)
        memory_low = resources["available_memory_gb"] < 1.0  # Less than 1GB available

        return memory_critical or cpu_critical or memory_low

    def _resources_support_upgrade(self, resources: Dict[str, float]) -> bool:
        """Check if current resources support model upgrade."""
        memory_ok = resources["memory_percent"] < self.upgrade_threshold * 100
        cpu_ok = resources["cpu_percent"] < self.upgrade_threshold * 100
        memory_available = (
            resources["available_memory_gb"] >= 4.0
        )  # Need at least 4GB free

        return memory_ok and cpu_ok and memory_available

    def _trend_supports_upgrade(self) -> bool:
        """Check if resource trends support model upgrade."""
        if len(self._resource_history) < 20:  # Need more data
            return False

        # Analyze recent trends
        recent_entries = list(self._resource_history)[-20:]

        memory_values = [entry["memory"] for entry in recent_entries]
        cpu_values = [entry["cpu"] for entry in recent_entries]

        memory_trend = self._calculate_trend(memory_values)
        cpu_trend = self._calculate_trend(cpu_values)

        # Only upgrade if trends are stable or decreasing
        return memory_trend in ["stable", "decreasing"] and cpu_trend in [
            "stable",
            "decreasing",
        ]

    def _determine_upgrade_target(self) -> Optional[str]:
        """Determine the best upgrade target based on current tier."""
        try:
            current_tier = self.tier_detector.detect_current_tier()
            preferred_models = self.tier_detector.get_preferred_models(current_tier)

            if not preferred_models:
                return None

            # Find next larger model in preferred list
            size_order = ["small", "medium", "large"]
            current_idx = (
                size_order.index(self._current_model_size)
                if self._current_model_size
                else -1
            )

            # Find the largest model we can upgrade to
            for size in reversed(size_order):  # Check large to small
                if size in preferred_models and size_order.index(size) > current_idx:
                    return size

            return None

        except Exception as e:
            self.logger.error(f"Error determining upgrade target: {e}")
            return None

    def _execute_upgrade(
        self, target_size: str, resources: Dict[str, float], timestamp: float
    ) -> None:
        """Execute model upgrade with proper recording."""
        try:
            current_time = time.time()

            # Check stabilization period
            if current_time - self._last_upgrade_time < self.stabilization_seconds:
                self.logger.debug("Upgrade blocked by stabilization period")
                return

            # Create scaling event
            event = ScalingEvent(
                timestamp=current_time,
                decision=ScalingDecision.UPGRADE,
                old_model_size=self._current_model_size,
                new_model_size=target_size,
                reason=f"Proactive upgrade based on resource availability: {resources['memory_percent']:.1f}% memory, {resources['cpu_percent']:.1f}% CPU",
                resources=resources,
                tier=self.tier_detector.detect_current_tier(),
            )

            # Record the decision
            self._record_scaling_decision(event)

            # Update state
            self._last_upgrade_time = current_time
            self._current_model_size = target_size

            # Set stabilization cooldown
            self._stabilization_cooldown = True

            self.logger.info(f"Executed proactive upgrade to {target_size}")

            # Trigger callback if set
            if self._on_scaling_decision:
                self._on_scaling_decision(event)

        except Exception as e:
            self.logger.error(f"Error executing upgrade: {e}")

    def _update_stabilization_status(self) -> None:
        """Update stabilization cooldown status."""
        current_time = time.time()

        # Check if stabilization period has passed
        time_since_last_change = min(
            current_time - self._last_upgrade_time,
            current_time - self._last_downgrade_time,
        )

        if time_since_last_change > self.stabilization_seconds:
            self._stabilization_cooldown = False
        else:
            self._stabilization_cooldown = True

    def _record_scaling_decision(self, event: ScalingEvent) -> None:
        """Record a scaling decision in history."""
        self._scaling_history.append(event)

        # Keep only recent history (last 50 decisions)
        if len(self._scaling_history) > 50:
            self._scaling_history = self._scaling_history[-50:]

    def _calculate_trend(self, values: List[float]) -> str:
        """Calculate trend direction from a list of values."""
        if len(values) < 5:
            return "insufficient_data"

        # Simple linear regression for trend
        n = len(values)
        x_values = list(range(n))

        sum_x = sum(x_values)
        sum_y = sum(values)
        sum_xy = sum(x * y for x, y in zip(x_values, values))
        sum_x2 = sum(x * x for x in x_values)

        # Calculate slope
        try:
            slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x * sum_x)

            # Determine trend based on slope magnitude
            if abs(slope) < 0.1:
                return "stable"
            elif slope > 0:
                return "increasing"
            else:
                return "decreasing"
        except ZeroDivisionError:
            return "stable"

    def _predict_future_usage(self, trend: str) -> Optional[float]:
        """Predict future resource usage based on trend."""
        if trend == "stable":
            return None  # No change predicted
        elif trend == "increasing":
            # Predict usage in 5 minutes based on current trend
            return min(0.95, 0.8 + 0.1)  # Conservative estimate
        elif trend == "decreasing":
            return max(0.3, 0.6 - 0.1)  # Conservative estimate

        return None

    def _generate_trend_recommendation(
        self,
        memory_trend: str,
        cpu_trend: str,
        future_memory: Optional[float],
        future_cpu: Optional[float],
    ) -> str:
        """Generate scaling recommendation based on trend analysis."""
        if memory_trend == "increasing" or cpu_trend == "increasing":
            return "monitor_closely"  # Resources trending up
        elif memory_trend == "decreasing" and cpu_trend == "decreasing":
            return "consider_upgrade"  # Resources trending down
        elif memory_trend == "stable" and cpu_trend == "stable":
            return "maintain_current"  # Stable conditions
        else:
            return "monitor_closely"  # Mixed signals

    def _calculate_trend_confidence(self) -> float:
        """Calculate confidence in trend predictions."""
        if len(self._resource_history) < 20:
            return 0.3  # Low confidence with limited data

        # Higher confidence with more data and stable trends
        data_factor = min(1.0, len(self._resource_history) / 100.0)

        # Calculate consistency of recent trends
        recent_entries = list(self._resource_history)[-20:]
        memory_variance = self._calculate_variance(
            [entry["memory"] for entry in recent_entries]
        )
        cpu_variance = self._calculate_variance(
            [entry["cpu"] for entry in recent_entries]
        )

        # Lower variance = higher confidence
        variance_factor = max(0.3, 1.0 - (memory_variance + cpu_variance) / 200.0)

        return data_factor * variance_factor

    def _calculate_variance(self, values: List[float]) -> float:
        """Calculate variance of a list of values."""
        if not values:
            return 0.0

        mean = sum(values) / len(values)
        variance = sum((x - mean) ** 2 for x in values) / len(values)
        return variance