feat: route all sandbox errors to stderr (#2222)
Co-authored-by: Caren Thomas <caren@caren-mac.local>
This commit is contained in:
@@ -1957,6 +1957,21 @@ class SyncServer(Server):
|
||||
if sandbox_run_result is None:
|
||||
raise ValueError(f"Tool with id {tool.id} returned execution with None")
|
||||
function_response = str(sandbox_run_result.func_return)
|
||||
stdout = [s for s in sandbox_run_result.stdout if s.strip()]
|
||||
stderr = [s for s in sandbox_run_result.stderr if s.strip()]
|
||||
|
||||
# expected error
|
||||
if stderr:
|
||||
error_msg = self.get_error_msg_for_func_return(tool.name, stderr[-1])
|
||||
return FunctionReturn(
|
||||
id="null",
|
||||
function_call_id="null",
|
||||
date=get_utc_time(),
|
||||
status="error",
|
||||
function_return=error_msg,
|
||||
stdout=stdout,
|
||||
stderr=stderr,
|
||||
)
|
||||
|
||||
return FunctionReturn(
|
||||
id="null",
|
||||
@@ -1964,17 +1979,13 @@ class SyncServer(Server):
|
||||
date=get_utc_time(),
|
||||
status="success",
|
||||
function_return=function_response,
|
||||
stdout=sandbox_run_result.stdout,
|
||||
stderr=sandbox_run_result.stderr,
|
||||
stdout=stdout,
|
||||
stderr=stderr,
|
||||
)
|
||||
|
||||
# unexpected error TODO(@cthomas): consolidate error handling
|
||||
except Exception as e:
|
||||
# same as agent.py
|
||||
from letta.constants import MAX_ERROR_MESSAGE_CHAR_LIMIT
|
||||
|
||||
error_msg = f"Error executing tool {tool.name}: {e}"
|
||||
if len(error_msg) > MAX_ERROR_MESSAGE_CHAR_LIMIT:
|
||||
error_msg = error_msg[:MAX_ERROR_MESSAGE_CHAR_LIMIT]
|
||||
|
||||
error_msg = self.get_error_msg_for_func_return(tool.name, e)
|
||||
return FunctionReturn(
|
||||
id="null",
|
||||
function_call_id="null",
|
||||
@@ -1985,6 +1996,17 @@ class SyncServer(Server):
|
||||
stderr=[traceback.format_exc()],
|
||||
)
|
||||
|
||||
|
||||
def get_error_msg_for_func_return(self, tool_name, exception_message):
|
||||
# same as agent.py
|
||||
from letta.constants import MAX_ERROR_MESSAGE_CHAR_LIMIT
|
||||
|
||||
error_msg = f"Error executing tool {tool_name}: {exception_message}"
|
||||
if len(error_msg) > MAX_ERROR_MESSAGE_CHAR_LIMIT:
|
||||
error_msg = error_msg[:MAX_ERROR_MESSAGE_CHAR_LIMIT]
|
||||
return error_msg
|
||||
|
||||
|
||||
# Composio wrappers
|
||||
def get_composio_client(self, api_key: Optional[str] = None):
|
||||
if api_key:
|
||||
|
||||
@@ -7,6 +7,7 @@ import runpy
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import traceback
|
||||
import uuid
|
||||
import venv
|
||||
from typing import Any, Dict, Optional, TextIO
|
||||
@@ -174,41 +175,16 @@ class ToolExecutionSandbox:
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
|
||||
# Handle error with optimistic error parsing from the string
|
||||
# This is very brittle, so we fall back to a RuntimeError if parsing fails
|
||||
if result.returncode != 0:
|
||||
# Log the error
|
||||
logger.error(f"Sandbox execution error:\n{result.stderr}")
|
||||
|
||||
# Parse and raise the actual error from stderr
|
||||
tb_lines = result.stderr.strip().splitlines()
|
||||
exception_line = tb_lines[-1] # The last line contains the exception
|
||||
|
||||
try:
|
||||
# Split exception type and message
|
||||
exception_type, exception_message = exception_line.split(": ", 1)
|
||||
exception_type = exception_type.strip()
|
||||
exception_message = exception_message.strip()
|
||||
|
||||
# Dynamically raise the exception
|
||||
exception_class = eval(exception_type) # Look up the exception type
|
||||
|
||||
except Exception:
|
||||
# Fallback to RuntimeError if parsing fails
|
||||
raise RuntimeError(result.stderr)
|
||||
|
||||
raise exception_class(exception_message)
|
||||
|
||||
func_result, stdout = self.parse_out_function_results_markers(result.stdout)
|
||||
func_return, agent_state = self.parse_best_effort(func_result)
|
||||
return SandboxRunResult(
|
||||
func_return=func_return,
|
||||
func_return=func_return,
|
||||
agent_state=agent_state,
|
||||
stdout=[stdout],
|
||||
stderr=[result.stderr],
|
||||
sandbox_config_fingerprint=sbx_config.fingerprint(),
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise TimeoutError(f"Executing tool {self.tool_name} has timed out.")
|
||||
except subprocess.CalledProcessError as e:
|
||||
@@ -217,39 +193,49 @@ class ToolExecutionSandbox:
|
||||
except Exception as e:
|
||||
logger.error(f"Executing tool {self.tool_name} has an unexpected error: {e}")
|
||||
raise e
|
||||
|
||||
|
||||
def run_local_dir_sandbox_runpy(
|
||||
self, sbx_config: SandboxConfig, env_vars: Dict[str, str], temp_file_path: str, old_stdout: TextIO, old_stderr: TextIO
|
||||
) -> SandboxRunResult:
|
||||
func_return, agent_state, error_msg = None, None, None
|
||||
|
||||
# Redirect stdout and stderr to capture script output
|
||||
captured_stdout = io.StringIO()
|
||||
captured_stderr = io.StringIO()
|
||||
captured_stdout, captured_stderr = io.StringIO(), io.StringIO()
|
||||
sys.stdout = captured_stdout
|
||||
sys.stderr = captured_stderr
|
||||
|
||||
# Execute the temp file
|
||||
with self.temporary_env_vars(env_vars):
|
||||
result = runpy.run_path(temp_file_path, init_globals=env_vars)
|
||||
try:
|
||||
# Execute the temp file
|
||||
with self.temporary_env_vars(env_vars):
|
||||
result = runpy.run_path(temp_file_path, init_globals=env_vars)
|
||||
|
||||
# Fetch the result
|
||||
func_result = result.get(self.LOCAL_SANDBOX_RESULT_VAR_NAME)
|
||||
func_return, agent_state = self.parse_best_effort(func_result)
|
||||
# Fetch the result
|
||||
func_result = result.get(self.LOCAL_SANDBOX_RESULT_VAR_NAME)
|
||||
func_return, agent_state = self.parse_best_effort(func_result)
|
||||
|
||||
except Exception as e:
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
error_msg = f"{type(e).__name__}: {str(e)}"
|
||||
|
||||
# Restore stdout and stderr and collect captured output
|
||||
sys.stdout = old_stdout
|
||||
sys.stderr = old_stderr
|
||||
stdout_output = captured_stdout.getvalue()
|
||||
stderr_output = captured_stderr.getvalue()
|
||||
stdout_output = [captured_stdout.getvalue()]
|
||||
stderr_output = [captured_stderr.getvalue()]
|
||||
stderr_output.append(error_msg if error_msg else '')
|
||||
|
||||
return SandboxRunResult(
|
||||
func_return=func_return,
|
||||
agent_state=agent_state,
|
||||
stdout=[stdout_output],
|
||||
stderr=[stderr_output],
|
||||
stdout=stdout_output,
|
||||
stderr=stderr_output,
|
||||
sandbox_config_fingerprint=sbx_config.fingerprint(),
|
||||
)
|
||||
|
||||
def parse_out_function_results_markers(self, text: str):
|
||||
if self.LOCAL_SANDBOX_RESULT_START_MARKER not in text:
|
||||
return '', text
|
||||
marker_len = len(self.LOCAL_SANDBOX_RESULT_START_MARKER)
|
||||
start_index = text.index(self.LOCAL_SANDBOX_RESULT_START_MARKER) + marker_len
|
||||
end_index = text.index(self.LOCAL_SANDBOX_RESULT_END_MARKER)
|
||||
@@ -294,21 +280,22 @@ class ToolExecutionSandbox:
|
||||
env_vars = self.sandbox_config_manager.get_sandbox_env_vars_as_dict(sandbox_config_id=sbx_config.id, actor=self.user, limit=100)
|
||||
code = self.generate_execution_script(agent_state=agent_state)
|
||||
execution = sbx.run_code(code, envs=env_vars)
|
||||
func_return, agent_state = None, None
|
||||
if execution.error is not None:
|
||||
logger.error(f"Executing tool {self.tool_name} failed with {execution.error}")
|
||||
# Raise a concise exception as this gets returned to the LLM
|
||||
raise self.parse_exception_from_e2b_execution(execution)
|
||||
execution.logs.stderr.append(execution.error.traceback)
|
||||
execution.logs.stderr.append(f"{execution.error.name}: {execution.error.value}")
|
||||
elif len(execution.results) == 0:
|
||||
return None
|
||||
else:
|
||||
func_return, agent_state = self.parse_best_effort(execution.results[0].text)
|
||||
return SandboxRunResult(
|
||||
func_return=func_return,
|
||||
agent_state=agent_state,
|
||||
stdout=execution.logs.stdout,
|
||||
stderr=execution.logs.stderr,
|
||||
sandbox_config_fingerprint=sbx_config.fingerprint(),
|
||||
)
|
||||
return SandboxRunResult(
|
||||
func_return=func_return,
|
||||
agent_state=agent_state,
|
||||
stdout=execution.logs.stdout,
|
||||
stderr=execution.logs.stderr,
|
||||
sandbox_config_fingerprint=sbx_config.fingerprint(),
|
||||
)
|
||||
|
||||
def parse_exception_from_e2b_execution(self, e2b_execution: "Execution") -> Exception:
|
||||
builtins_dict = __builtins__ if isinstance(__builtins__, dict) else vars(__builtins__)
|
||||
@@ -356,6 +343,8 @@ class ToolExecutionSandbox:
|
||||
# general utility functions
|
||||
|
||||
def parse_best_effort(self, text: str) -> Any:
|
||||
if not text:
|
||||
return None, None
|
||||
result = pickle.loads(base64.b64decode(text))
|
||||
agent_state = None
|
||||
if not result["agent_state"] is None:
|
||||
|
||||
@@ -177,6 +177,7 @@ def always_err_tool(test_user):
|
||||
str: not important
|
||||
"""
|
||||
# Raise a unusual error so we know it's from this function
|
||||
print("Going to error now")
|
||||
raise ZeroDivisionError("This is an intentionally weird division!")
|
||||
|
||||
tool = create_tool_from_func(error)
|
||||
@@ -314,15 +315,16 @@ def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_re
|
||||
assert result.func_return is None
|
||||
|
||||
|
||||
@pytest.mark.e2b_sandbox
|
||||
@pytest.mark.local_sandbox
|
||||
def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state):
|
||||
nonexistent_name = "Alexander Wang"
|
||||
args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
|
||||
sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
|
||||
|
||||
# run the sandbox
|
||||
with pytest.raises(ValueError, match=f"Old content '{nonexistent_name}' not found in memory block 'human'"):
|
||||
sandbox.run(agent_state=agent_state)
|
||||
result = sandbox.run(agent_state=agent_state)
|
||||
assert len(result.stderr) != 0, "stderr not empty"
|
||||
assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0], "stderr contains expected error"
|
||||
|
||||
|
||||
@pytest.mark.local_sandbox
|
||||
@@ -402,8 +404,11 @@ def test_local_sandbox_with_venv_errors(mock_e2b_api_key_none, custom_test_sandb
|
||||
sandbox = ToolExecutionSandbox(always_err_tool.name, {}, user_id=test_user.id)
|
||||
|
||||
# run the sandbox
|
||||
with pytest.raises(ZeroDivisionError, match="This is an intentionally weird division!"):
|
||||
sandbox.run()
|
||||
result = sandbox.run()
|
||||
assert len(result.stdout) != 0, "stdout not empty"
|
||||
assert "error" in result.stdout[0], "stdout contains printed string"
|
||||
assert len(result.stderr) != 0, "stderr not empty"
|
||||
assert "ZeroDivisionError: This is an intentionally weird division!" in result.stderr[0], "stderr contains expected error"
|
||||
|
||||
|
||||
# E2B sandbox tests
|
||||
@@ -500,8 +505,9 @@ def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memor
|
||||
sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
|
||||
|
||||
# run the sandbox
|
||||
with pytest.raises(ValueError, match=f"Old content '{nonexistent_name}' not found in memory block 'human'"):
|
||||
sandbox.run(agent_state=agent_state)
|
||||
result = sandbox.run(agent_state=agent_state)
|
||||
assert len(result.stderr) != 0, "stderr not empty"
|
||||
assert f"ValueError: Old content '{nonexistent_name}' not found in memory block 'human'" in result.stderr[0], "stderr contains expected error"
|
||||
|
||||
|
||||
@pytest.mark.e2b_sandbox
|
||||
|
||||
@@ -798,8 +798,8 @@ def test_tool_run(server, mock_e2b_api_key_none, user_id, agent_id):
|
||||
print(result)
|
||||
assert result.status == "success"
|
||||
assert result.function_return == "Ingested message Hello, world!", result.function_return
|
||||
assert result.stdout == ['']
|
||||
assert result.stderr == ['']
|
||||
assert not result.stdout
|
||||
assert not result.stderr
|
||||
|
||||
result = server.run_tool_from_source(
|
||||
user_id=user_id,
|
||||
@@ -811,8 +811,8 @@ def test_tool_run(server, mock_e2b_api_key_none, user_id, agent_id):
|
||||
print(result)
|
||||
assert result.status == "success"
|
||||
assert result.function_return == "Ingested message Well well well", result.function_return
|
||||
assert result.stdout == ['']
|
||||
assert result.stderr == ['']
|
||||
assert not result.stdout
|
||||
assert not result.stderr
|
||||
|
||||
result = server.run_tool_from_source(
|
||||
user_id=user_id,
|
||||
@@ -825,8 +825,9 @@ def test_tool_run(server, mock_e2b_api_key_none, user_id, agent_id):
|
||||
assert result.status == "error"
|
||||
assert "Error" in result.function_return, result.function_return
|
||||
assert "missing 1 required positional argument" in result.function_return, result.function_return
|
||||
assert result.stdout == ['']
|
||||
assert result.stderr != [''], "missing 1 required positional argument" in result.stderr[0]
|
||||
assert not result.stdout
|
||||
assert result.stderr
|
||||
assert "missing 1 required positional argument" in result.stderr[0]
|
||||
|
||||
# Test that we can still pull the tool out by default (pulls that last tool in the source)
|
||||
result = server.run_tool_from_source(
|
||||
@@ -839,8 +840,9 @@ def test_tool_run(server, mock_e2b_api_key_none, user_id, agent_id):
|
||||
print(result)
|
||||
assert result.status == "success"
|
||||
assert result.function_return == "Ingested message Well well well", result.function_return
|
||||
assert result.stdout != [''], "I'm a distractor" in result.stdout[0]
|
||||
assert result.stderr == ['']
|
||||
assert result.stdout
|
||||
assert "I'm a distractor" in result.stdout[0]
|
||||
assert not result.stderr
|
||||
|
||||
# Test that we can pull the tool out by name
|
||||
result = server.run_tool_from_source(
|
||||
@@ -853,8 +855,9 @@ def test_tool_run(server, mock_e2b_api_key_none, user_id, agent_id):
|
||||
print(result)
|
||||
assert result.status == "success"
|
||||
assert result.function_return == "Ingested message Well well well", result.function_return
|
||||
assert result.stdout != [''], "I'm a distractor" in result.stdout[0]
|
||||
assert result.stderr == ['']
|
||||
assert result.stdout
|
||||
assert "I'm a distractor" in result.stdout[0]
|
||||
assert not result.stderr
|
||||
|
||||
# Test that we can pull a different tool out by name
|
||||
result = server.run_tool_from_source(
|
||||
@@ -867,8 +870,9 @@ def test_tool_run(server, mock_e2b_api_key_none, user_id, agent_id):
|
||||
print(result)
|
||||
assert result.status == "success"
|
||||
assert result.function_return == str(None), result.function_return
|
||||
assert result.stdout != [''], "I'm a distractor" in result.stdout[0]
|
||||
assert result.stderr == ['']
|
||||
assert result.stdout
|
||||
assert "I'm a distractor" in result.stdout[0]
|
||||
assert not result.stderr
|
||||
|
||||
|
||||
def test_composio_client_simple(server):
|
||||
|
||||
Reference in New Issue
Block a user