diff --git a/letta/services/tool_execution_sandbox.py b/letta/services/tool_execution_sandbox.py index 98c8603a..4c95184c 100644 --- a/letta/services/tool_execution_sandbox.py +++ b/letta/services/tool_execution_sandbox.py @@ -172,9 +172,31 @@ class ToolExecutionSandbox: capture_output=True, text=True, ) - if result.stderr: - logger.error(f"Sandbox execution error: {result.stderr}") - raise RuntimeError(f"Sandbox execution error: {result.stderr}") + + # Handle error with optimistic error parsing from the string + # This is very brittle, so we fall back to a RuntimeError if parsing fails + if result.returncode != 0: + # Log the error + logger.error(f"Sandbox execution error:\n{result.stderr}") + + # Parse and raise the actual error from stderr + tb_lines = result.stderr.strip().splitlines() + exception_line = tb_lines[-1] # The last line contains the exception + + try: + # Split exception type and message + exception_type, exception_message = exception_line.split(": ", 1) + exception_type = exception_type.strip() + exception_message = exception_message.strip() + + # Dynamically raise the exception + exception_class = eval(exception_type) # Look up the exception type + + except Exception: + # Fallback to RuntimeError if parsing fails + raise RuntimeError(result.stderr) + + raise exception_class(exception_message) func_result, stdout = self.parse_out_function_results_markers(result.stdout) func_return, agent_state = self.parse_best_effort(func_result) @@ -184,9 +206,11 @@ class ToolExecutionSandbox: except subprocess.TimeoutExpired: raise TimeoutError(f"Executing tool {self.tool_name} has timed out.") except subprocess.CalledProcessError as e: - raise RuntimeError(f"Executing tool {self.tool_name} has process error: {e}") + logger.error(f"Executing tool {self.tool_name} has process error: {e}") + raise e except Exception as e: - raise RuntimeError(f"Executing tool {self.tool_name} has an unexpected error: {e}") + logger.error(f"Executing tool {self.tool_name} has an unexpected error: {e}") + raise e def run_local_dir_sandbox_runpy( self, sbx_config: SandboxConfig, env_vars: Dict[str, str], temp_file_path: str, old_stdout: TextIO diff --git a/tests/integration_test_tool_execution_sandbox.py b/tests/integration_test_tool_execution_sandbox.py index 09129e49..20df0051 100644 --- a/tests/integration_test_tool_execution_sandbox.py +++ b/tests/integration_test_tool_execution_sandbox.py @@ -183,6 +183,23 @@ def get_warning_tool(test_user): yield tool +@pytest.fixture +def always_err_tool(test_user): + def error() -> str: + """ + Simple function that errors + + Returns: + str: not important + """ + # Raise a unusual error so we know it's from this function + raise ZeroDivisionError("This is an intentionally weird division!") + + tool = create_tool_from_func(error) + tool = ToolManager().create_or_update_tool(tool, test_user) + yield tool + + @pytest.fixture def list_tool(test_user): def create_list(): @@ -244,6 +261,33 @@ def agent_state(): yield agent_state +@pytest.fixture +def custom_test_sandbox_config(test_user): + """ + Fixture to create a consistent local sandbox configuration for tests. + + Args: + test_user: The test user to be used for creating the sandbox configuration. + + Returns: + A tuple containing the SandboxConfigManager and the created sandbox configuration. + """ + # Create the SandboxConfigManager + manager = SandboxConfigManager(tool_settings) + + # Set the sandbox to be within the external codebase path and use a venv + external_codebase_path = str(Path(__file__).parent / "test_tool_sandbox" / "restaurant_management_system") + local_sandbox_config = LocalSandboxConfig(sandbox_dir=external_codebase_path, use_venv=True) + + # Create the sandbox configuration + config_create = SandboxConfigCreate(config=local_sandbox_config.model_dump()) + + # Create or update the sandbox configuration + manager.create_or_update_sandbox_config(sandbox_config_create=config_create, actor=test_user) + + return manager, local_sandbox_config + + # Local sandbox tests @@ -347,16 +391,7 @@ def test_local_sandbox_e2e_composio_star_github(mock_e2b_api_key_none, check_com @pytest.mark.local_sandbox -def test_local_sandbox_external_codebase(mock_e2b_api_key_none, external_codebase_tool, test_user): - # Make the external codebase the sandbox config - manager = SandboxConfigManager(tool_settings) - - # Set the sandbox to be within the external codebase path and use a venv - external_codebase_path = str(Path(__file__).parent / "test_tool_sandbox" / "restaurant_management_system") - local_sandbox_config = LocalSandboxConfig(sandbox_dir=external_codebase_path, use_venv=True) - config_create = SandboxConfigCreate(config=local_sandbox_config.model_dump()) - manager.create_or_update_sandbox_config(sandbox_config_create=config_create, actor=test_user) - +def test_local_sandbox_external_codebase(mock_e2b_api_key_none, custom_test_sandbox_config, external_codebase_tool, test_user): # Set the args args = {"percentage": 10} @@ -370,21 +405,23 @@ def test_local_sandbox_external_codebase(mock_e2b_api_key_none, external_codebas @pytest.mark.local_sandbox -def test_local_sandbox_with_venv_and_warnings_does_not_error(mock_e2b_api_key_none, get_warning_tool, test_user): - # Make the external codebase the sandbox config - manager = SandboxConfigManager(tool_settings) - - # Set the sandbox to be within the external codebase path and use a venv - external_codebase_path = str(Path(__file__).parent / "test_tool_sandbox" / "restaurant_management_system") - local_sandbox_config = LocalSandboxConfig(sandbox_dir=external_codebase_path, use_venv=True) - config_create = SandboxConfigCreate(config=local_sandbox_config.model_dump()) - manager.create_or_update_sandbox_config(sandbox_config_create=config_create, actor=test_user) - +def test_local_sandbox_with_venv_and_warnings_does_not_error( + mock_e2b_api_key_none, custom_test_sandbox_config, get_warning_tool, test_user +): sandbox = ToolExecutionSandbox(get_warning_tool.name, {}, user_id=test_user.id) result = sandbox.run() assert result.func_return == "Hello World" +@pytest.mark.e2b_sandbox +def test_local_sandbox_with_venv_errors(mock_e2b_api_key_none, custom_test_sandbox_config, always_err_tool, test_user): + sandbox = ToolExecutionSandbox(always_err_tool.name, {}, user_id=test_user.id) + + # run the sandbox + with pytest.raises(ZeroDivisionError, match="This is an intentionally weird division!"): + sandbox.run() + + # E2B sandbox tests