Files
letta-server/letta/services/tool_sandbox/modal_sandbox.py
2025-08-12 15:39:17 -07:00

421 lines
17 KiB
Python

from typing import Any, Dict, Optional
import modal
from letta.log import get_logger
from letta.otel.tracing import log_event, trace_method
from letta.schemas.agent import AgentState
from letta.schemas.enums import SandboxType
from letta.schemas.sandbox_config import SandboxConfig
from letta.schemas.tool import Tool
from letta.schemas.tool_execution_result import ToolExecutionResult
from letta.services.helpers.tool_parser_helper import parse_stdout_best_effort
from letta.services.tool_sandbox.base import AsyncToolSandboxBase
from letta.settings import tool_settings
from letta.types import JsonDict
from letta.utils import get_friendly_error_msg
logger = get_logger(__name__)
# class AsyncToolSandboxModalBase(AsyncToolSandboxBase):
# pass
class AsyncToolSandboxModal(AsyncToolSandboxBase):
def __init__(
self,
tool_name: str,
args: JsonDict,
user,
tool_object: Tool | None = None,
sandbox_config: SandboxConfig | None = None,
sandbox_env_vars: dict[str, Any] | None = None,
):
super().__init__(tool_name, args, user, tool_object, sandbox_config=sandbox_config, sandbox_env_vars=sandbox_env_vars)
if not tool_settings.modal_token_id or not tool_settings.modal_token_secret:
raise ValueError("MODAL_TOKEN_ID and MODAL_TOKEN_SECRET must be set.")
# Create a unique app name based on tool and config
self._app_name = self._generate_app_name()
def _generate_app_name(self) -> str:
"""Generate a unique app name based on tool and configuration. Created based on tool name and org"""
return f"{self.user.organization_id}-{self.tool_name}"
async def _fetch_or_create_modal_app(self, sbx_config: SandboxConfig, env_vars: Dict[str, str]) -> modal.App:
"""Create a Modal app with the tool function registered."""
try:
app = await modal.App.lookup.aio(self._app_name)
return app
except:
app = modal.App(self._app_name)
modal_config = sbx_config.get_modal_config()
# Get the base image with dependencies
image = self._get_modal_image(sbx_config)
# Decorator for the tool, note information on running untrusted code: https://modal.com/docs/guide/restricted-access
# The `@app.function` decorator must apply to functions in global scope, unless `serialized=True` is set.
@app.function(image=image, timeout=modal_config.timeout, restrict_modal_access=True, max_inputs=1, serialized=True)
def execute_tool_with_script(execution_script: str, environment_vars: dict[str, str]):
"""Execute the generated tool script in Modal sandbox."""
import os
# Note: We pass environment variables directly instead of relying on Modal secrets
# This is more flexible and doesn't require pre-configured secrets
for key, value in environment_vars.items():
os.environ[key] = str(value)
exec_globals = {}
exec(execution_script, exec_globals)
# Store the function reference in the app for later use
app.remote_executor = execute_tool_with_script
return app
@trace_method
async def run(
self,
agent_state: Optional[AgentState] = None,
additional_env_vars: Optional[Dict] = None,
) -> ToolExecutionResult:
if self.provided_sandbox_config:
sbx_config = self.provided_sandbox_config
else:
sbx_config = await self.sandbox_config_manager.get_or_create_default_sandbox_config_async(
sandbox_type=SandboxType.MODAL, actor=self.user
)
envs = await self._gather_env_vars(agent_state, additional_env_vars or {}, sbx_config.id, is_local=False)
# Generate execution script (this includes the tool source code and execution logic)
execution_script = await self.generate_execution_script(agent_state=agent_state)
try:
log_event(
"modal_execution_started",
{"tool": self.tool_name, "app_name": self._app_name, "env_vars": list(envs)},
)
# Create Modal app with the tool function registered
app = await self._fetch_or_create_modal_app(sbx_config, envs)
# Execute the tool remotely
with app.run():
# app = modal.Cls.from_name(app.name, "NodeShimServer")()
result = app.remote_executor.remote(execution_script, envs)
# Process the result
if result["error"]:
# Tool errors are expected behavior - tools can raise exceptions as part of their normal operation
# Only log at debug level to avoid triggering Sentry alerts for expected errors
logger.debug(f"Tool {self.tool_name} raised a {result['error']['name']}: {result['error']['value']}")
logger.debug(f"Traceback from Modal sandbox: \n{result['error']['traceback']}")
func_return = get_friendly_error_msg(
function_name=self.tool_name, exception_name=result["error"]["name"], exception_message=result["error"]["value"]
)
log_event(
"modal_execution_failed",
{
"tool": self.tool_name,
"app_name": self._app_name,
"error_type": result["error"]["name"],
"error_message": result["error"]["value"],
"func_return": func_return,
},
)
# Parse the result from stdout even if there was an error
# (in case the function returned something before failing)
agent_state = None # Initialize agent_state
try:
func_return_parsed, agent_state_parsed = parse_stdout_best_effort(result["stdout"])
if func_return_parsed is not None:
func_return = func_return_parsed
agent_state = agent_state_parsed
except Exception:
# If parsing fails, keep the error message
pass
else:
func_return, agent_state = parse_stdout_best_effort(result["stdout"])
log_event(
"modal_execution_succeeded",
{
"tool": self.tool_name,
"app_name": self._app_name,
"func_return": func_return,
},
)
return ToolExecutionResult(
func_return=func_return,
agent_state=agent_state,
stdout=[result["stdout"]] if result["stdout"] else [],
stderr=[result["stderr"]] if result["stderr"] else [],
status="error" if result["error"] else "success",
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
except Exception as e:
logger.error(f"Modal execution for tool {self.tool_name} encountered an error: {e}")
func_return = get_friendly_error_msg(
function_name=self.tool_name,
exception_name=type(e).__name__,
exception_message=str(e),
)
log_event(
"modal_execution_error",
{
"tool": self.tool_name,
"app_name": self._app_name,
"error": str(e),
"func_return": func_return,
},
)
return ToolExecutionResult(
func_return=func_return,
agent_state=None,
stdout=[],
stderr=[str(e)],
status="error",
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
def _get_modal_image(self, sbx_config: SandboxConfig) -> modal.Image:
"""Get Modal image with required public python dependencies.
Caching and rebuilding is handled in a cascading manner
https://modal.com/docs/guide/images#image-caching-and-rebuilds
"""
image = modal.Image.debian_slim(python_version="3.12")
all_requirements = ["letta"]
# Add sandbox-specific pip requirements
modal_configs = sbx_config.get_modal_config()
if modal_configs.pip_requirements:
all_requirements.extend([str(req) for req in modal_configs.pip_requirements])
# Add tool-specific pip requirements
if self.tool and self.tool.pip_requirements:
all_requirements.extend([str(req) for req in self.tool.pip_requirements])
if all_requirements:
image = image.pip_install(*all_requirements)
return image
def use_top_level_await(self) -> bool:
"""
Modal functions don't have an active event loop by default,
so we should use asyncio.run() like local execution.
"""
return False
class TypescriptToolSandboxModal(AsyncToolSandboxModal):
"""Modal sandbox implementation for TypeScript tools."""
@trace_method
async def run(
self,
agent_state: Optional[AgentState] = None,
additional_env_vars: Optional[Dict] = None,
) -> ToolExecutionResult:
"""Run TypeScript tool in Modal sandbox using Node.js server."""
if self.provided_sandbox_config:
sbx_config = self.provided_sandbox_config
else:
sbx_config = await self.sandbox_config_manager.get_or_create_default_sandbox_config_async(
sandbox_type=SandboxType.MODAL, actor=self.user
)
envs = await self._gather_env_vars(agent_state, additional_env_vars or {}, sbx_config.id, is_local=False)
# Generate execution script (JSON args for TypeScript)
json_args = await self.generate_execution_script(agent_state=agent_state)
try:
log_event(
"modal_typescript_execution_started",
{"tool": self.tool_name, "app_name": self._app_name, "args": json_args},
)
# Create Modal app with the TypeScript Node.js server
app = await self._fetch_or_create_modal_app(sbx_config, envs)
# Execute the TypeScript tool remotely via the Node.js server
with app.run():
# Get the NodeShimServer class from Modal
node_server = modal.Cls.from_name(self._app_name, "NodeShimServer")
# Call the remote_executor method with the JSON arguments
# The server will parse the JSON and call the TypeScript function
result = node_server().remote_executor.remote(json_args)
# Process the TypeScript execution result
if isinstance(result, dict) and "error" in result:
# Handle errors from TypeScript execution
logger.debug(f"TypeScript tool {self.tool_name} raised an error: {result['error']}")
func_return = get_friendly_error_msg(
function_name=self.tool_name,
exception_name="TypeScriptError",
exception_message=str(result["error"]),
)
log_event(
"modal_typescript_execution_failed",
{
"tool": self.tool_name,
"app_name": self._app_name,
"error": result["error"],
"func_return": func_return,
},
)
return ToolExecutionResult(
func_return=func_return,
agent_state=None, # TypeScript tools don't support agent_state yet
stdout=[],
stderr=[str(result["error"])],
status="error",
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
else:
# Success case - TypeScript function returned a result
func_return = str(result) if result is not None else ""
log_event(
"modal_typescript_execution_succeeded",
{
"tool": self.tool_name,
"app_name": self._app_name,
"func_return": func_return,
},
)
return ToolExecutionResult(
func_return=func_return,
agent_state=None, # TypeScript tools don't support agent_state yet
stdout=[],
stderr=[],
status="success",
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
except Exception as e:
logger.error(f"Modal TypeScript execution for tool {self.tool_name} encountered an error: {e}")
func_return = get_friendly_error_msg(
function_name=self.tool_name,
exception_name=type(e).__name__,
exception_message=str(e),
)
log_event(
"modal_typescript_execution_error",
{
"tool": self.tool_name,
"app_name": self._app_name,
"error": str(e),
"func_return": func_return,
},
)
return ToolExecutionResult(
func_return=func_return,
agent_state=None,
stdout=[],
stderr=[str(e)],
status="error",
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
async def _fetch_or_create_modal_app(self, sbx_config: SandboxConfig, env_vars: Dict[str, str]) -> modal.App:
"""Create or fetch a Modal app with TypeScript execution capabilities."""
try:
return await modal.App.lookup.aio(self._app_name)
except:
app = modal.App(self._app_name)
modal_config = sbx_config.get_modal_config()
# Get the base image with dependencies
image = self._get_modal_image(sbx_config)
# Import the NodeShimServer that will handle TypeScript execution
from sandbox.node_server import NodeShimServer
# Register the NodeShimServer class with Modal
# This creates a serverless function that can handle concurrent requests
app.cls(image=image, restrict_modal_access=True, include_source=False, timeout=modal_config.timeout if modal_config else 60)(
modal.concurrent(max_inputs=100, target_inputs=50)(NodeShimServer)
)
# Deploy the app to Modal
with modal.enable_output():
await app.deploy.aio()
return app
async def generate_execution_script(self, agent_state: Optional[AgentState], wrap_print_with_markers: bool = False) -> str:
"""Generate the execution script for TypeScript tools.
For TypeScript tools, this returns the JSON-encoded arguments that will be passed
to the Node.js server via the remote_executor method.
"""
import json
# Convert args to JSON string for TypeScript execution
# The Node.js server expects JSON-encoded arguments
return json.dumps(self.args)
def _get_modal_image(self, sbx_config: SandboxConfig) -> modal.Image:
"""Build a Modal image with Node.js, TypeScript, and the user's tool function."""
import importlib.util
from pathlib import Path
# Find the sandbox module location
spec = importlib.util.find_spec("sandbox")
if not spec or not spec.origin:
raise ValueError("Could not find sandbox module")
server_dir = Path(spec.origin).parent
# Get the TypeScript function source code
if not self.tool or not self.tool.source_code:
raise ValueError("TypeScript tool must have source code")
ts_function = self.tool.source_code
# Get npm dependencies from sandbox config and tool
modal_config = sbx_config.get_modal_config()
npm_dependencies = []
# Add dependencies from sandbox config
if modal_config and modal_config.npm_requirements:
npm_dependencies.extend(modal_config.npm_requirements)
# Add dependencies from the tool itself
if self.tool.npm_requirements:
npm_dependencies.extend(self.tool.npm_requirements)
# Build npm install command for user dependencies
user_dependencies_cmd = ""
if npm_dependencies:
# Ensure unique dependencies
unique_deps = list(set(npm_dependencies))
user_dependencies_cmd = " && npm install " + " ".join(unique_deps)
# Escape single quotes in the TypeScript function for shell command
escaped_ts_function = ts_function.replace("'", "'\\''")
# Build the Docker image with Node.js and TypeScript
image = (
modal.Image.from_registry("node:22-slim", add_python="3.12")
.add_local_dir(server_dir, "/root/sandbox", ignore=["node_modules", "build"], copy=True)
.run_commands(
# Install dependencies and build the TypeScript server
f"cd /root/sandbox/resources/server && npm install{user_dependencies_cmd}",
# Write the user's TypeScript function to a file
f"echo '{escaped_ts_function}' > /root/sandbox/user-function.ts",
)
)
return image
# probably need to do parse_stdout_best_effort