Phase 5Evaluation and Security·6 min read

Safe Sandboxing: Docker & API Key Security

Phase 5 of 8

When agents execute code, they need a safe environment. In this guide, you'll learn to sandbox agent execution with Docker and protect your API keys from exposure.

Coming from Software Engineering? You already know Docker — this is using it the same way CI/CD systems do: spin up an isolated container, run untrusted code inside it, capture the output, tear it down. The difference is the "untrusted code" is generated by an LLM at runtime rather than written by a developer. Your Docker, resource limiting, and security hardening skills transfer completely.


Why Sandboxing Matters

Risks of unsandboxed code execution:

  • File system access - Delete or modify important files
  • Network access - Make unauthorized requests
  • Resource exhaustion - Infinite loops, memory bombs
  • Data exfiltration - Steal sensitive information
  • Privilege escalation - Gain system access

Docker Basics for Sandboxing

Docker creates isolated containers that protect your host system:

# Install Docker (if not installed)
# macOS: brew install docker
# Ubuntu: apt install docker.io
# Windows: Download Docker Desktop

# Verify installation
docker --version

Your First Sandbox

# script_id: day_065_docker_sandboxing_part1/basic_sandbox
import docker
import tempfile
import os

def run_code_in_sandbox(code: str, timeout: int = 30) -> dict:
    """
    Run Python code safely in a Docker container.

    Args:
        code: Python code to execute
        timeout: Maximum execution time in seconds

    Returns:
        dict with stdout, stderr, and exit code
    """

    # Initialize Docker client
    client = docker.from_env()

    # Create temp file with the code
    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
        f.write(code)
        code_file = f.name

    try:
        # Run in container
        result = client.containers.run(
            image="python:3.11-slim",
            command=f"python /code/script.py",
            volumes={
                os.path.dirname(code_file): {'bind': '/code', 'mode': 'ro'}
            },
            working_dir="/code",
            remove=True,  # Auto-remove container after execution
            timeout=timeout,
            mem_limit="256m",  # Limit memory
            network_disabled=True,  # No network access
            read_only=True,  # Read-only filesystem
        )

        return {
            "stdout": result.decode('utf-8'),
            "stderr": "",
            "exit_code": 0
        }

    except docker.errors.ContainerError as e:
        return {
            "stdout": "",
            "stderr": e.stderr.decode('utf-8') if e.stderr else str(e),
            "exit_code": e.exit_status
        }
    except docker.errors.APIError as e:
        return {
            "stdout": "",
            "stderr": str(e),
            "exit_code": -1
        }
    finally:
        os.unlink(code_file)

# Example usage
code = """
print("Hello from sandbox!")
print(2 + 2)
"""

result = run_code_in_sandbox(code)
print(f"Output: {result['stdout']}")
print(f"Exit code: {result['exit_code']}")

Building a Secure Sandbox Image

Create a custom Docker image with security restrictions:

# Dockerfile.sandbox
FROM python:3.11-slim

# Create non-root user
RUN useradd -m -s /bin/bash sandbox

# Install common packages
RUN pip install --no-cache-dir \
    numpy \
    pandas \
    matplotlib \
    requests

# Remove dangerous packages
RUN pip uninstall -y pip setuptools wheel

# Set working directory
WORKDIR /sandbox

# Switch to non-root user
USER sandbox

# Default command
CMD ["python"]

Build and use the image:

docker build -t sandbox:latest -f Dockerfile.sandbox .
# script_id: day_065_docker_sandboxing_part1/secure_sandbox
def run_in_secure_sandbox(code: str, timeout: int = 30) -> dict:
    """Run code in a custom secure sandbox."""

    client = docker.from_env()

    # Write code to temp file
    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
        f.write(code)
        code_file = f.name

    try:
        container = client.containers.run(
            image="sandbox:latest",  # Our custom image
            command=f"python /sandbox/script.py",
            volumes={
                os.path.dirname(code_file): {'bind': '/sandbox', 'mode': 'ro'}
            },
            detach=True,
            mem_limit="512m",
            memswap_limit="512m",  # No swap
            cpu_period=100000,
            cpu_quota=50000,  # 50% of one CPU
            network_disabled=True,
            read_only=True,
            security_opt=["no-new-privileges"],
            cap_drop=["ALL"],  # Drop all capabilities
        )

        # Wait for completion with timeout
        result = container.wait(timeout=timeout)
        logs = container.logs()

        container.remove()

        return {
            "stdout": logs.decode('utf-8'),
            "exit_code": result['StatusCode']
        }

    except Exception as e:
        return {"error": str(e)}
    finally:
        os.unlink(code_file)

Container Resource Limits

Prevent resource exhaustion attacks:

# script_id: day_065_docker_sandboxing_part1/resource_limits
def create_limited_container(code: str) -> dict:
    """Create container with strict resource limits."""

    client = docker.from_env()

    container_config = {
        "image": "python:3.11-slim",
        "command": ["python", "-c", code],

        # Memory limits
        "mem_limit": "128m",       # Max 128MB RAM
        "memswap_limit": "128m",   # No swap

        # CPU limits
        "cpu_period": 100000,
        "cpu_quota": 25000,        # 25% of one CPU
        "cpu_shares": 256,         # Low priority

        # Process limits
        "pids_limit": 50,          # Max 50 processes

        # Storage limits
        "read_only": True,
        "tmpfs": {"/tmp": "size=10m"},  # 10MB temp space

        # Network
        "network_disabled": True,

        # Security
        "security_opt": ["no-new-privileges"],
        "cap_drop": ["ALL"],

        # Auto cleanup
        "remove": True,
    }

    try:
        result = client.containers.run(**container_config)
        return {"output": result.decode('utf-8')}
    except docker.errors.ContainerError as e:
        return {"error": str(e)}

Input/Output Handling

Safely pass data to and from sandboxed code:

# script_id: day_065_docker_sandboxing_part1/sandbox_io
import json
import base64

class SandboxIO:
    """Handle input/output with sandboxed code."""

    def __init__(self):
        self.client = docker.from_env()

    def run_with_data(self, code: str, input_data: dict) -> dict:
        """
        Run code with input data and capture structured output.

        Args:
            code: Python code to execute
            input_data: Data to pass to the code

        Returns:
            Output data from the code
        """

        # Wrap code to handle I/O
        wrapped_code = f'''
import json
import sys

# Input data (passed from host)
INPUT_DATA = {json.dumps(input_data)}

# User code
{code}

# Capture output if 'result' variable exists
if 'result' in dir():
    print("__OUTPUT_START__")
    print(json.dumps(result))
    print("__OUTPUT_END__")
'''

        result = self._execute(wrapped_code)

        # Parse output
        if "__OUTPUT_START__" in result.get("stdout", ""):
            output_section = result["stdout"].split("__OUTPUT_START__")[1]
            output_section = output_section.split("__OUTPUT_END__")[0].strip()
            try:
                result["data"] = json.loads(output_section)
            except json.JSONDecodeError:
                result["data"] = None

        return result

    def _execute(self, code: str) -> dict:
        """Execute code in container."""

        with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
            f.write(code)
            code_path = f.name

        try:
            output = self.client.containers.run(
                image="python:3.11-slim",
                command=["python", "/code/script.py"],
                volumes={os.path.dirname(code_path): {'bind': '/code', 'mode': 'ro'}},
                remove=True,
                network_disabled=True,
                mem_limit="128m"
            )
            return {"stdout": output.decode('utf-8'), "exit_code": 0}
        except docker.errors.ContainerError as e:
            return {"stderr": str(e), "exit_code": e.exit_status}
        finally:
            os.unlink(code_path)

# Example usage
sandbox = SandboxIO()

code = """
# Access input data
numbers = INPUT_DATA['numbers']

# Process
result = {
    'sum': sum(numbers),
    'average': sum(numbers) / len(numbers),
    'count': len(numbers)
}
"""

output = sandbox.run_with_data(code, {"numbers": [1, 2, 3, 4, 5]})
print(f"Result: {output.get('data')}")
# Result: {'sum': 15, 'average': 3.0, 'count': 5}