From 860cf1949ea59d995c56c4800a96c7164810fb7b Mon Sep 17 00:00:00 2001 From: Matthew Zhou Date: Mon, 2 Dec 2024 11:08:44 -0800 Subject: [PATCH] feat: Improve error messages from tool sandbox (#2135) --- .github/workflows/integration_tests.yml | 5 ++++- .github/workflows/tests.yml | 7 ++---- letta/services/tool_execution_sandbox.py | 13 +++++++++-- ...ntegration_test_tool_execution_sandbox.py} | 22 +++++++++++++++++++ 4 files changed, 39 insertions(+), 8 deletions(-) rename tests/{test_tool_execution_sandbox.py => integration_test_tool_execution_sandbox.py} (93%) diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index e3c673a3..350c4c4d 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -8,6 +8,8 @@ env: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }} + E2B_API_KEY: ${{ secrets.E2B_API_KEY }} + E2B_SANDBOX_TEMPLATE_ID: ${{ secrets.E2B_SANDBOX_TEMPLATE_ID }} on: push: @@ -24,6 +26,7 @@ jobs: matrix: integration_test_suite: - "integration_test_summarizer.py" + - "integration_test_tool_execution_sandbox.py" services: qdrant: image: qdrant/qdrant @@ -52,7 +55,7 @@ jobs: with: python-version: "3.12" poetry-version: "1.8.2" - install-args: "-E dev -E postgres -E milvus -E external-tools -E tests" + install-args: "-E dev -E postgres -E milvus -E external-tools -E tests -E cloud-tool-sandbox" - name: Migrate database env: LETTA_PG_PORT: 5432 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ffaa2704..d81681e4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,8 +6,6 @@ env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} - E2B_API_KEY: ${{ secrets.E2B_API_KEY }} - E2B_SANDBOX_TEMPLATE_ID: ${{ secrets.E2B_SANDBOX_TEMPLATE_ID }} on: push: @@ -30,7 +28,6 @@ jobs: - "test_o1_agent.py" - "test_tool_rule_solver.py" - "test_agent_tool_graph.py" - - "test_tool_execution_sandbox.py" - "test_utils.py" - "test_tool_schema_parsing.py" services: @@ -61,7 +58,7 @@ jobs: with: python-version: "3.12" poetry-version: "1.8.2" - install-args: "-E dev -E postgres -E milvus -E external-tools -E tests -E cloud-tool-sandbox" + install-args: "-E dev -E postgres -E milvus -E external-tools -E tests" - name: Migrate database env: LETTA_PG_PORT: 5432 @@ -135,4 +132,4 @@ jobs: LETTA_SERVER_PASS: test_server_token PYTHONPATH: ${{ github.workspace }}:${{ env.PYTHONPATH }} run: | - poetry run pytest -s -vv -k "not test_model_letta_perfomance.py and not test_utils.py and not test_client.py and not test_tool_execution_sandbox.py and not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_performance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client_legacy.py" tests + poetry run pytest -s -vv -k "not test_model_letta_perfomance.py and not test_utils.py and not test_client.py and not integration_test_tool_execution_sandbox.py and not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_performance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client_legacy.py" tests diff --git a/letta/services/tool_execution_sandbox.py b/letta/services/tool_execution_sandbox.py index 97a4903c..c1c48979 100644 --- a/letta/services/tool_execution_sandbox.py +++ b/letta/services/tool_execution_sandbox.py @@ -132,7 +132,8 @@ class ToolExecutionSandbox: sandbox_config_fingerprint=sbx_config.fingerprint(), ) except Exception as e: - raise RuntimeError(f"Executing tool {self.tool_name} has an unexpected error: {e}") + logger.error(f"Executing tool {self.tool_name} has an unexpected error: {e}") + raise e finally: # Clean up the temp file and restore stdout sys.stdout = old_stdout @@ -154,7 +155,9 @@ class ToolExecutionSandbox: env_vars = self.sandbox_config_manager.get_sandbox_env_vars_as_dict(sandbox_config_id=sbx_config.id, actor=self.user, limit=100) execution = sbx.run_code(code, envs=env_vars) if execution.error is not None: - raise Exception(f"Executing tool {self.tool_name} failed with {execution.error}") + logger.error(f"Executing tool {self.tool_name} failed with {execution.error}") + # Raise a concise exception as this gets returned to the LLM + raise self.parse_exception_from_e2b_execution(execution) elif len(execution.results) == 0: return None else: @@ -166,6 +169,12 @@ class ToolExecutionSandbox: sandbox_config_fingerprint=sbx_config.fingerprint(), ) + def parse_exception_from_e2b_execution(self, e2b_execution: "Execution") -> Exception: + builtins_dict = __builtins__ if isinstance(__builtins__, dict) else vars(__builtins__) + # Dynamically fetch the exception class from builtins, defaulting to Exception if not found + exception_class = builtins_dict.get(e2b_execution.error.name, Exception) + return exception_class(e2b_execution.error.value) + def get_running_e2b_sandbox_with_same_state(self, sandbox_config: SandboxConfig) -> Optional["Sandbox"]: from e2b_code_interpreter import Sandbox diff --git a/tests/test_tool_execution_sandbox.py b/tests/integration_test_tool_execution_sandbox.py similarity index 93% rename from tests/test_tool_execution_sandbox.py rename to tests/integration_test_tool_execution_sandbox.py index 977a674e..1d5f556e 100644 --- a/tests/test_tool_execution_sandbox.py +++ b/tests/integration_test_tool_execution_sandbox.py @@ -266,6 +266,17 @@ def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_re assert result.func_return is None +@pytest.mark.e2b_sandbox +def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state): + nonexistent_name = "Alexander Wang" + args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"} + sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) + + # run the sandbox + with pytest.raises(ValueError, match=f"Old content '{nonexistent_name}' not found in memory block 'human'"): + sandbox.run(agent_state=agent_state) + + @pytest.mark.local_sandbox def test_local_sandbox_with_list_rv(mock_e2b_api_key_none, list_tool, test_user): sandbox = ToolExecutionSandbox(list_tool.name, {}, user_id=test_user.id) @@ -390,6 +401,17 @@ def test_e2b_sandbox_core_memory_replace(check_e2b_key_is_set, core_memory_repla assert result.func_return is None +@pytest.mark.e2b_sandbox +def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state): + nonexistent_name = "Alexander Wang" + args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"} + sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id) + + # run the sandbox + with pytest.raises(ValueError, match=f"Old content '{nonexistent_name}' not found in memory block 'human'"): + sandbox.run(agent_state=agent_state) + + @pytest.mark.e2b_sandbox def test_e2b_sandbox_inject_env_var_existing_sandbox(check_e2b_key_is_set, get_env_tool, test_user): manager = SandboxConfigManager(tool_settings)