Files
Mai/src/mai/sandbox/docker_executor.py
2026-01-26 22:40:49 -05:00

433 lines
15 KiB
Python

"""
Docker Executor for Mai Safe Code Execution
Provides isolated container execution using Docker with comprehensive
resource limits, security restrictions, and audit logging integration.
"""
import logging
import tempfile
import time
import uuid
from dataclasses import dataclass
from pathlib import Path
from typing import Any
try:
import docker
from docker.errors import APIError, ContainerError, DockerException, ImageNotFound
from docker.models.containers import Container
DOCKER_AVAILABLE = True
except ImportError:
docker = None
Container = None
DockerException = Exception
APIError = Exception
ContainerError = Exception
ImageNotFound = Exception
DOCKER_AVAILABLE = False
from .audit_logger import AuditLogger
@dataclass
class ContainerConfig:
"""Configuration for Docker container execution"""
image: str = "python:3.10-slim"
timeout_seconds: int = 30
memory_limit: str = "128m" # Docker memory limit format
cpu_limit: str = "0.5" # CPU quota (0.5 = 50% of one CPU)
network_disabled: bool = True
read_only_filesystem: bool = True
tmpfs_size: str = "64m" # Temporary filesystem size
working_dir: str = "/app"
user: str = "nobody" # Non-root user
@dataclass
class ContainerResult:
"""Result of container execution"""
success: bool
container_id: str
exit_code: int
stdout: str | None = None
stderr: str | None = None
execution_time: float = 0.0
error: str | None = None
resource_usage: dict[str, Any] | None = None
class DockerExecutor:
"""
Docker-based container executor for isolated code execution.
Provides secure sandboxing using Docker containers with resource limits,
network restrictions, and comprehensive audit logging.
"""
def __init__(self, audit_logger: AuditLogger | None = None):
"""
Initialize Docker executor
Args:
audit_logger: Optional audit logger for execution logging
"""
self.audit_logger = audit_logger
self.client = None
self.available = False
# Try to initialize Docker client
self._initialize_docker()
# Setup logging
self.logger = logging.getLogger(__name__)
def _initialize_docker(self) -> None:
"""Initialize Docker client and verify availability"""
if not DOCKER_AVAILABLE:
self.available = False
return
try:
if docker is not None:
self.client = docker.from_env()
# Test Docker connection
self.client.ping()
self.available = True
else:
self.available = False
self.client = None
except Exception as e:
self.logger.warning(f"Docker not available: {e}")
self.available = False
self.client = None
def is_available(self) -> bool:
"""Check if Docker executor is available"""
return self.available and self.client is not None
def execute_code(
self,
code: str,
config: ContainerConfig | None = None,
environment: dict[str, str] | None = None,
files: dict[str, str] | None = None,
) -> ContainerResult:
"""
Execute code in isolated Docker container
Args:
code: Python code to execute
config: Container configuration
environment: Environment variables
files: Additional files to mount in container
Returns:
ContainerResult with execution details
"""
if not self.is_available() or self.client is None:
return ContainerResult(
success=False, container_id="", exit_code=-1, error="Docker executor not available"
)
config = config or ContainerConfig()
container_id = str(uuid.uuid4())[:8]
start_time = time.time()
try:
# Create temporary directory for files
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
# Write code to file
code_file = temp_path / "execute.py"
code_file.write_text(code)
# Prepare volume mounts
volumes = {
str(code_file): {
"bind": f"{config.working_dir}/execute.py",
"mode": "ro", # read-only
}
}
# Add additional files if provided
if files:
for filename, content in files.items():
file_path = temp_path / filename
file_path.write_text(content)
volumes[str(file_path)] = {
"bind": f"{config.working_dir}/{filename}",
"mode": "ro",
}
# Prepare container configuration
container_config = self._build_container_config(config, environment)
# Create and start container
container = self.client.containers.run(
image=config.image,
command=["python", "execute.py"],
volumes=volumes,
**container_config,
detach=True,
)
# Get container ID safely
container_id = getattr(container, "id", container_id)
try:
# Wait for completion with timeout
result = container.wait(timeout=config.timeout_seconds)
exit_code = result["StatusCode"]
# Get logs
stdout = container.logs(stdout=True, stderr=False).decode("utf-8")
stderr = container.logs(stdout=False, stderr=True).decode("utf-8")
# Get resource usage stats
stats = self._get_container_stats(container)
# Determine success
success = exit_code == 0 and not stderr
execution_result = ContainerResult(
success=success,
container_id=container_id,
exit_code=exit_code,
stdout=stdout,
stderr=stderr,
execution_time=time.time() - start_time,
resource_usage=stats,
)
# Log execution if audit logger available
if self.audit_logger:
self._log_container_execution(code, execution_result, config)
return execution_result
finally:
# Always cleanup container
try:
container.remove(force=True)
except Exception:
pass # Best effort cleanup
except ContainerError as e:
return ContainerResult(
success=False,
container_id=container_id or "unknown",
exit_code=getattr(e, "exit_code", -1),
stderr=str(e),
execution_time=time.time() - start_time,
error=f"Container execution error: {e}",
)
except ImageNotFound as e:
return ContainerResult(
success=False,
container_id=container_id,
exit_code=-1,
error=f"Docker image not found: {e}",
)
except APIError as e:
return ContainerResult(
success=False,
container_id=container_id,
exit_code=-1,
error=f"Docker API error: {e}",
)
except Exception as e:
return ContainerResult(
success=False,
container_id=container_id,
exit_code=-1,
execution_time=time.time() - start_time,
error=f"Unexpected error: {e}",
)
def _build_container_config(
self, config: ContainerConfig, environment: dict[str, str] | None = None
) -> dict[str, Any]:
"""Build Docker container configuration"""
container_config = {
"mem_limit": config.memory_limit,
"cpu_quota": int(float(config.cpu_limit) * 100000), # Convert to microseconds
"cpu_period": 100000, # 100ms period
"network_disabled": config.network_disabled,
"read_only": config.read_only_filesystem,
"tmpfs": {"/tmp": f"size={config.tmpfs_size},noexec,nosuid,nodev"},
"user": config.user,
"working_dir": config.working_dir,
"remove": True, # Auto-remove container
}
# Add environment variables
if environment:
container_config["environment"] = {
**environment,
"PYTHONPATH": config.working_dir,
"PYTHONDONTWRITEBYTECODE": "1",
}
else:
container_config["environment"] = {
"PYTHONPATH": config.working_dir,
"PYTHONDONTWRITEBYTECODE": "1",
}
# Security options
container_config["security_opt"] = [
"no-new-privileges:true",
"seccomp:unconfined", # Python needs some syscalls
]
# Capabilities (drop all capabilities)
container_config["cap_drop"] = ["ALL"]
container_config["cap_add"] = ["CHOWN", "DAC_OVERRIDE"] # Minimal capabilities for Python
return container_config
def _get_container_stats(self, container) -> dict[str, Any]:
"""Get resource usage statistics from container"""
try:
stats = container.stats(stream=False)
# Calculate CPU usage
cpu_stats = stats.get("cpu_stats", {})
precpu_stats = stats.get("precpu_stats", {})
cpu_usage = cpu_stats.get("cpu_usage", {}).get("total_usage", 0)
precpu_usage = precpu_stats.get("cpu_usage", {}).get("total_usage", 0)
system_usage = cpu_stats.get("system_cpu_usage", 0)
presystem_usage = precpu_stats.get("system_cpu_usage", 0)
cpu_count = cpu_stats.get("online_cpus", 1)
cpu_percent = 0.0
if system_usage > presystem_usage:
cpu_delta = cpu_usage - precpu_usage
system_delta = system_usage - presystem_usage
cpu_percent = (cpu_delta / system_delta) * cpu_count * 100.0
# Calculate memory usage
memory_stats = stats.get("memory_stats", {})
memory_usage = memory_stats.get("usage", 0)
memory_limit = memory_stats.get("limit", 1)
memory_percent = (memory_usage / memory_limit) * 100.0
return {
"cpu_percent": round(cpu_percent, 2),
"memory_usage_bytes": memory_usage,
"memory_limit_bytes": memory_limit,
"memory_percent": round(memory_percent, 2),
"memory_usage_mb": round(memory_usage / (1024 * 1024), 2),
}
except Exception:
return {
"cpu_percent": 0.0,
"memory_usage_bytes": 0,
"memory_limit_bytes": 0,
"memory_percent": 0.0,
"memory_usage_mb": 0.0,
}
def _log_container_execution(
self, code: str, result: ContainerResult, config: ContainerConfig
) -> None:
"""Log container execution to audit logger"""
if not self.audit_logger:
return
execution_data = {
"type": "docker_container",
"container_id": result.container_id,
"exit_code": result.exit_code,
"stdout": result.stdout,
"stderr": result.stderr,
"execution_time": result.execution_time,
"config": {
"image": config.image,
"timeout": config.timeout_seconds,
"memory_limit": config.memory_limit,
"cpu_limit": config.cpu_limit,
"network_disabled": config.network_disabled,
"read_only_filesystem": config.read_only_filesystem,
},
"resource_usage": result.resource_usage,
}
# Note: execution_type parameter not available in current AuditLogger
self.audit_logger.log_execution(code=code, execution_result=execution_data)
def get_available_images(self) -> list[str]:
"""Get list of available Docker images"""
if not self.is_available() or self.client is None:
return []
try:
images = self.client.images.list()
return [img.tags[0] for img in images if img.tags]
except Exception:
return []
def pull_image(self, image_name: str) -> bool:
"""Pull Docker image"""
if not self.is_available() or self.client is None:
return False
try:
self.client.images.pull(image_name)
return True
except Exception:
return False
def cleanup_containers(self) -> int:
"""Clean up any dangling containers"""
if not self.is_available() or self.client is None:
return 0
try:
containers = self.client.containers.list(all=True, filters={"status": "exited"})
count = 0
for container in containers:
try:
container.remove(force=True)
count += 1
except Exception:
pass
return count
except Exception:
return 0
def get_system_info(self) -> dict[str, Any]:
"""Get Docker system information"""
if not self.is_available() or self.client is None:
return {"available": False}
try:
info = self.client.info()
version = self.client.version()
return {
"available": True,
"version": version.get("Version", "unknown"),
"api_version": version.get("ApiVersion", "unknown"),
"containers": info.get("Containers", 0),
"containers_running": info.get("ContainersRunning", 0),
"containers_paused": info.get("ContainersPaused", 0),
"containers_stopped": info.get("ContainersStopped", 0),
"images": info.get("Images", 0),
"memory_total": info.get("MemTotal", 0),
"ncpu": info.get("NCPU", 0),
}
except Exception:
return {"available": False, "error": "Failed to get system info"}