Initial commit: Clean slate for Mai project
This commit is contained in:
341
tests/test_docker_integration.py
Normal file
341
tests/test_docker_integration.py
Normal file
@@ -0,0 +1,341 @@
|
||||
"""
|
||||
Integration test for complete Docker sandbox execution
|
||||
|
||||
Tests the full integration of Docker executor with sandbox manager,
|
||||
risk analysis, resource enforcement, and audit logging.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import tempfile
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, Mock
|
||||
|
||||
from src.mai.sandbox.manager import SandboxManager, ExecutionRequest
|
||||
from src.mai.sandbox.audit_logger import AuditLogger
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
class TestDockerSandboxIntegration:
|
||||
"""Integration tests for Docker sandbox execution"""
|
||||
|
||||
@pytest.fixture
|
||||
def temp_log_dir(self):
|
||||
"""Create temporary directory for audit logs"""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
yield temp_dir
|
||||
|
||||
@pytest.fixture
|
||||
def sandbox_manager(self, temp_log_dir):
|
||||
"""Create SandboxManager with temp log directory"""
|
||||
return SandboxManager(log_dir=temp_log_dir)
|
||||
|
||||
def test_full_docker_execution_workflow(self, sandbox_manager):
|
||||
"""Test complete Docker execution workflow"""
|
||||
with patch.object(sandbox_manager.docker_executor, "is_available", return_value=True):
|
||||
with patch.object(sandbox_manager.docker_executor, "execute_code") as mock_docker:
|
||||
# Mock Docker container execution
|
||||
from src.mai.sandbox.docker_executor import ContainerResult
|
||||
|
||||
mock_docker.return_value = {
|
||||
"success": True,
|
||||
"output": "42\n",
|
||||
"container_result": ContainerResult(
|
||||
success=True,
|
||||
container_id="integration-test-container",
|
||||
exit_code=0,
|
||||
stdout="42\n",
|
||||
stderr="",
|
||||
execution_time=2.3,
|
||||
resource_usage={
|
||||
"cpu_percent": 15.2,
|
||||
"memory_usage_mb": 28.5,
|
||||
"memory_percent": 5.5,
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
# Create execution request
|
||||
request = ExecutionRequest(
|
||||
code="result = 6 * 7\nprint(result)",
|
||||
use_docker=True,
|
||||
docker_image="python:3.10-slim",
|
||||
timeout_seconds=30,
|
||||
cpu_limit_percent=50.0,
|
||||
memory_limit_percent=40.0,
|
||||
network_allowed=False,
|
||||
filesystem_restricted=True,
|
||||
)
|
||||
|
||||
# Execute code
|
||||
result = sandbox_manager.execute_code(request)
|
||||
|
||||
# Verify execution results
|
||||
assert result.success is True
|
||||
assert result.execution_method == "docker"
|
||||
assert result.output == "42\n"
|
||||
assert result.container_result is not None
|
||||
assert result.container_result.container_id == "integration-test-container"
|
||||
assert result.container_result.exit_code == 0
|
||||
assert result.container_result.execution_time == 2.3
|
||||
assert result.container_result.resource_usage["cpu_percent"] == 15.2
|
||||
assert result.container_result.resource_usage["memory_usage_mb"] == 28.5
|
||||
|
||||
# Verify Docker executor was called with correct parameters
|
||||
mock_docker.assert_called_once()
|
||||
call_args = mock_docker.call_args
|
||||
|
||||
# Check code was passed correctly
|
||||
assert call_args.args[0] == "result = 6 * 7\nprint(result)"
|
||||
|
||||
# Check container config
|
||||
config = call_args.kwargs["config"]
|
||||
assert config.image == "python:3.10-slim"
|
||||
assert config.timeout_seconds == 30
|
||||
assert config.memory_limit == "51m" # Scaled from 40% of 128m
|
||||
assert config.cpu_limit == "0.5" # 50% CPU
|
||||
assert config.network_disabled is True
|
||||
assert config.read_only_filesystem is True
|
||||
|
||||
# Verify audit logging occurred
|
||||
assert result.audit_entry_id is not None
|
||||
|
||||
# Check audit log contents
|
||||
logs = sandbox_manager.get_execution_history(limit=1)
|
||||
assert len(logs) == 1
|
||||
|
||||
log_entry = logs[0]
|
||||
assert log_entry["code"] == "result = 6 * 7\nprint(result)"
|
||||
assert log_entry["execution_result"]["success"] is True
|
||||
assert "docker_container" in log_entry["execution_result"]
|
||||
|
||||
def test_docker_execution_with_additional_files(self, sandbox_manager):
|
||||
"""Test Docker execution with additional files"""
|
||||
with patch.object(sandbox_manager.docker_executor, "is_available", return_value=True):
|
||||
with patch.object(sandbox_manager.docker_executor, "execute_code") as mock_docker:
|
||||
# Mock Docker execution
|
||||
from src.mai.sandbox.docker_executor import ContainerResult
|
||||
|
||||
mock_docker.return_value = {
|
||||
"success": True,
|
||||
"output": "Hello, Alice!\n",
|
||||
"container_result": ContainerResult(
|
||||
success=True,
|
||||
container_id="files-test-container",
|
||||
exit_code=0,
|
||||
stdout="Hello, Alice!\n",
|
||||
),
|
||||
}
|
||||
|
||||
# Create execution request with additional files
|
||||
request = ExecutionRequest(
|
||||
code="with open('template.txt', 'r') as f: template = f.read()\nprint(template.replace('{name}', 'Alice'))",
|
||||
use_docker=True,
|
||||
additional_files={"template.txt": "Hello, {name}!"},
|
||||
)
|
||||
|
||||
# Execute code
|
||||
result = sandbox_manager.execute_code(request)
|
||||
|
||||
# Verify execution
|
||||
assert result.success is True
|
||||
assert result.execution_method == "docker"
|
||||
|
||||
# Verify Docker executor was called with files
|
||||
call_args = mock_docker.call_args
|
||||
assert "files" in call_args.kwargs
|
||||
assert call_args.kwargs["files"] == {"template.txt": "Hello, {name}!"}
|
||||
|
||||
def test_docker_execution_blocked_by_risk_analysis(self, sandbox_manager):
|
||||
"""Test that high-risk code is blocked before Docker execution"""
|
||||
with patch.object(sandbox_manager.docker_executor, "is_available", return_value=True):
|
||||
with patch.object(sandbox_manager.docker_executor, "execute_code") as mock_docker:
|
||||
# Risk analysis will automatically detect the dangerous pattern
|
||||
request = ExecutionRequest(
|
||||
code="import subprocess; subprocess.run(['rm', '-rf', '/'], shell=True)",
|
||||
use_docker=True,
|
||||
)
|
||||
|
||||
# Execute code
|
||||
result = sandbox_manager.execute_code(request)
|
||||
|
||||
# Verify execution was blocked
|
||||
assert result.success is False
|
||||
assert "blocked" in result.error.lower()
|
||||
assert result.risk_assessment.score >= 70
|
||||
assert result.execution_method == "local" # Set before Docker check
|
||||
|
||||
# Docker executor should not be called
|
||||
mock_docker.assert_not_called()
|
||||
|
||||
# Should still be logged
|
||||
assert result.audit_entry_id is not None
|
||||
|
||||
def test_docker_execution_fallback_to_local(self, sandbox_manager):
|
||||
"""Test fallback to local execution when Docker unavailable"""
|
||||
with patch.object(sandbox_manager.docker_executor, "is_available", return_value=False):
|
||||
with patch.object(sandbox_manager, "_execute_in_sandbox") as mock_local:
|
||||
with patch.object(
|
||||
sandbox_manager.resource_enforcer, "stop_monitoring"
|
||||
) as mock_monitoring:
|
||||
# Mock local execution
|
||||
mock_local.return_value = {"success": True, "output": "Local fallback result"}
|
||||
|
||||
# Mock resource usage
|
||||
from src.mai.sandbox.resource_enforcer import ResourceUsage
|
||||
|
||||
mock_monitoring.return_value = ResourceUsage(
|
||||
cpu_percent=35.0,
|
||||
memory_percent=25.0,
|
||||
memory_used_gb=0.4,
|
||||
elapsed_seconds=1.8,
|
||||
approaching_limits=False,
|
||||
)
|
||||
|
||||
# Create request preferring Docker
|
||||
request = ExecutionRequest(
|
||||
code="print('fallback test')",
|
||||
use_docker=True, # But Docker is unavailable
|
||||
)
|
||||
|
||||
# Execute code
|
||||
result = sandbox_manager.execute_code(request)
|
||||
|
||||
# Verify fallback to local execution
|
||||
assert result.success is True
|
||||
assert result.execution_method == "local"
|
||||
assert result.output == "Local fallback result"
|
||||
assert result.container_result is None
|
||||
assert result.resource_usage is not None
|
||||
assert result.resource_usage.cpu_percent == 35.0
|
||||
|
||||
# Verify local execution was used
|
||||
mock_local.assert_called_once()
|
||||
|
||||
def test_audit_logging_docker_execution_details(self, sandbox_manager):
|
||||
"""Test comprehensive audit logging for Docker execution"""
|
||||
with patch.object(sandbox_manager.docker_executor, "is_available", return_value=True):
|
||||
with patch.object(sandbox_manager.docker_executor, "execute_code") as mock_docker:
|
||||
# Mock Docker execution with detailed stats
|
||||
from src.mai.sandbox.docker_executor import ContainerResult
|
||||
|
||||
mock_docker.return_value = {
|
||||
"success": True,
|
||||
"output": "Calculation complete: 144\n",
|
||||
"container_result": ContainerResult(
|
||||
success=True,
|
||||
container_id="audit-test-container",
|
||||
exit_code=0,
|
||||
stdout="Calculation complete: 144\n",
|
||||
stderr="",
|
||||
execution_time=3.7,
|
||||
resource_usage={
|
||||
"cpu_percent": 22.8,
|
||||
"memory_usage_mb": 45.2,
|
||||
"memory_percent": 8.9,
|
||||
"memory_usage_bytes": 47395648,
|
||||
"memory_limit_bytes": 536870912,
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
# Execute request
|
||||
request = ExecutionRequest(
|
||||
code="result = 12 * 12\nprint(f'Calculation complete: {result}')",
|
||||
use_docker=True,
|
||||
docker_image="python:3.9-alpine",
|
||||
timeout_seconds=45,
|
||||
)
|
||||
|
||||
result = sandbox_manager.execute_code(request)
|
||||
|
||||
# Verify audit log contains Docker execution details
|
||||
logs = sandbox_manager.get_execution_history(limit=1)
|
||||
assert len(logs) == 1
|
||||
|
||||
log_entry = logs[0]
|
||||
execution_result = log_entry["execution_result"]
|
||||
|
||||
# Check Docker-specific fields
|
||||
assert execution_result["type"] == "docker_container"
|
||||
assert execution_result["container_id"] == "audit-test-container"
|
||||
assert execution_result["exit_code"] == 0
|
||||
assert execution_result["stdout"] == "Calculation complete: 144\n"
|
||||
|
||||
# Check configuration details
|
||||
config = execution_result["config"]
|
||||
assert config["image"] == "python:3.9-alpine"
|
||||
assert config["timeout"] == 45
|
||||
assert config["network_disabled"] is True
|
||||
assert config["read_only_filesystem"] is True
|
||||
|
||||
# Check resource usage
|
||||
resource_usage = execution_result["resource_usage"]
|
||||
assert resource_usage["cpu_percent"] == 22.8
|
||||
assert resource_usage["memory_usage_mb"] == 45.2
|
||||
assert resource_usage["memory_percent"] == 8.9
|
||||
|
||||
def test_system_status_includes_docker_info(self, sandbox_manager):
|
||||
"""Test system status includes Docker information"""
|
||||
with patch.object(sandbox_manager.docker_executor, "is_available", return_value=True):
|
||||
with patch.object(
|
||||
sandbox_manager.docker_executor, "get_system_info"
|
||||
) as mock_docker_info:
|
||||
# Mock Docker system info
|
||||
mock_docker_info.return_value = {
|
||||
"available": True,
|
||||
"version": "20.10.12",
|
||||
"api_version": "1.41",
|
||||
"containers": 5,
|
||||
"containers_running": 2,
|
||||
"images": 8,
|
||||
"ncpu": 4,
|
||||
"memory_total": 8589934592,
|
||||
}
|
||||
|
||||
# Get system status
|
||||
status = sandbox_manager.get_system_status()
|
||||
|
||||
# Verify Docker information is included
|
||||
assert "docker_available" in status
|
||||
assert "docker_info" in status
|
||||
assert status["docker_available"] is True
|
||||
assert status["docker_info"]["available"] is True
|
||||
assert status["docker_info"]["version"] == "20.10.12"
|
||||
assert status["docker_info"]["containers"] == 5
|
||||
assert status["docker_info"]["images"] == 8
|
||||
|
||||
def test_docker_status_management(self, sandbox_manager):
|
||||
"""Test Docker status management functions"""
|
||||
with patch.object(sandbox_manager.docker_executor, "is_available", return_value=True):
|
||||
with patch.object(
|
||||
sandbox_manager.docker_executor, "get_available_images"
|
||||
) as mock_images:
|
||||
with patch.object(sandbox_manager.docker_executor, "pull_image") as mock_pull:
|
||||
with patch.object(
|
||||
sandbox_manager.docker_executor, "cleanup_containers"
|
||||
) as mock_cleanup:
|
||||
# Mock responses
|
||||
mock_images.return_value = ["python:3.10-slim", "python:3.9-alpine"]
|
||||
mock_pull.return_value = True
|
||||
mock_cleanup.return_value = 3
|
||||
|
||||
# Test get Docker status
|
||||
status = sandbox_manager.get_docker_status()
|
||||
assert status["available"] is True
|
||||
assert "python:3.10-slim" in status["images"]
|
||||
assert "python:3.9-alpine" in status["images"]
|
||||
|
||||
# Test pull image
|
||||
pull_result = sandbox_manager.pull_docker_image("node:16-alpine")
|
||||
assert pull_result is True
|
||||
mock_pull.assert_called_once_with("node:16-alpine")
|
||||
|
||||
# Test cleanup containers
|
||||
cleanup_count = sandbox_manager.cleanup_docker_containers()
|
||||
assert cleanup_count == 3
|
||||
mock_cleanup.assert_called_once()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user