feat: Improve error messages from tool sandbox (#2135)

2024-12-02 11:08:44 -08:00
parent 4181c0d81e
commit 860cf1949e
4 changed files with 39 additions and 8 deletions
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -8,6 +8,8 @@ env:
  GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
  AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
  AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
+  E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
+  E2B_SANDBOX_TEMPLATE_ID: ${{ secrets.E2B_SANDBOX_TEMPLATE_ID }}

 on:
  push:
@@ -24,6 +26,7 @@ jobs:
      matrix:
        integration_test_suite:
          - "integration_test_summarizer.py"
+          - "integration_test_tool_execution_sandbox.py"
    services:
      qdrant:
        image: qdrant/qdrant
@@ -52,7 +55,7 @@ jobs:
        with:
          python-version: "3.12"
          poetry-version: "1.8.2"
-          install-args: "-E dev -E postgres -E milvus -E external-tools -E tests"
+          install-args: "-E dev -E postgres -E milvus -E external-tools -E tests -E cloud-tool-sandbox"
      - name: Migrate database
        env:
          LETTA_PG_PORT: 5432
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -6,8 +6,6 @@ env:
  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
  GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
  GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
-  E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
-  E2B_SANDBOX_TEMPLATE_ID: ${{ secrets.E2B_SANDBOX_TEMPLATE_ID }}

 on:
  push:
@@ -30,7 +28,6 @@ jobs:
          - "test_o1_agent.py"
          - "test_tool_rule_solver.py"
          - "test_agent_tool_graph.py"
-          - "test_tool_execution_sandbox.py"
          - "test_utils.py"
          - "test_tool_schema_parsing.py"
    services:
@@ -61,7 +58,7 @@ jobs:
        with:
          python-version: "3.12"
          poetry-version: "1.8.2"
-          install-args: "-E dev -E postgres -E milvus -E external-tools -E tests -E cloud-tool-sandbox"
+          install-args: "-E dev -E postgres -E milvus -E external-tools -E tests"
      - name: Migrate database
        env:
          LETTA_PG_PORT: 5432
@@ -135,4 +132,4 @@ jobs:
          LETTA_SERVER_PASS: test_server_token
          PYTHONPATH: ${{ github.workspace }}:${{ env.PYTHONPATH }}
        run: |
-          poetry run pytest -s -vv -k "not test_model_letta_perfomance.py and not test_utils.py and not test_client.py and not test_tool_execution_sandbox.py and not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_performance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client_legacy.py" tests
+          poetry run pytest -s -vv -k "not test_model_letta_perfomance.py and not test_utils.py and not test_client.py and not integration_test_tool_execution_sandbox.py and not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_performance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client_legacy.py" tests
--- a/letta/services/tool_execution_sandbox.py
+++ b/letta/services/tool_execution_sandbox.py
@@ -132,7 +132,8 @@ class ToolExecutionSandbox:
                sandbox_config_fingerprint=sbx_config.fingerprint(),
            )
        except Exception as e:
-            raise RuntimeError(f"Executing tool {self.tool_name} has an unexpected error: {e}")
+            logger.error(f"Executing tool {self.tool_name} has an unexpected error: {e}")
+            raise e
        finally:
            # Clean up the temp file and restore stdout
            sys.stdout = old_stdout
@@ -154,7 +155,9 @@ class ToolExecutionSandbox:
        env_vars = self.sandbox_config_manager.get_sandbox_env_vars_as_dict(sandbox_config_id=sbx_config.id, actor=self.user, limit=100)
        execution = sbx.run_code(code, envs=env_vars)
        if execution.error is not None:
-            raise Exception(f"Executing tool {self.tool_name} failed with {execution.error}")
+            logger.error(f"Executing tool {self.tool_name} failed with {execution.error}")
+            # Raise a concise exception as this gets returned to the LLM
+            raise self.parse_exception_from_e2b_execution(execution)
        elif len(execution.results) == 0:
            return None
        else:
@@ -166,6 +169,12 @@ class ToolExecutionSandbox:
                sandbox_config_fingerprint=sbx_config.fingerprint(),
            )

+    def parse_exception_from_e2b_execution(self, e2b_execution: "Execution") -> Exception:
+        builtins_dict = __builtins__ if isinstance(__builtins__, dict) else vars(__builtins__)
+        # Dynamically fetch the exception class from builtins, defaulting to Exception if not found
+        exception_class = builtins_dict.get(e2b_execution.error.name, Exception)
+        return exception_class(e2b_execution.error.value)
+
    def get_running_e2b_sandbox_with_same_state(self, sandbox_config: SandboxConfig) -> Optional["Sandbox"]:
        from e2b_code_interpreter import Sandbox

--- a/tests/integration_test_tool_execution_sandbox.py
+++ b/tests/integration_test_tool_execution_sandbox.py
@@ -266,6 +266,17 @@ def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_re
    assert result.func_return is None


+@pytest.mark.e2b_sandbox
+def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state):
+    nonexistent_name = "Alexander Wang"
+    args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
+    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
+
+    # run the sandbox
+    with pytest.raises(ValueError, match=f"Old content '{nonexistent_name}' not found in memory block 'human'"):
+        sandbox.run(agent_state=agent_state)
+
+
@pytest.mark.local_sandbox
 def test_local_sandbox_with_list_rv(mock_e2b_api_key_none, list_tool, test_user):
    sandbox = ToolExecutionSandbox(list_tool.name, {}, user_id=test_user.id)
@@ -390,6 +401,17 @@ def test_e2b_sandbox_core_memory_replace(check_e2b_key_is_set, core_memory_repla
    assert result.func_return is None


+@pytest.mark.e2b_sandbox
+def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state):
+    nonexistent_name = "Alexander Wang"
+    args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
+    sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
+
+    # run the sandbox
+    with pytest.raises(ValueError, match=f"Old content '{nonexistent_name}' not found in memory block 'human'"):
+        sandbox.run(agent_state=agent_state)
+
+
@pytest.mark.e2b_sandbox
 def test_e2b_sandbox_inject_env_var_existing_sandbox(check_e2b_key_is_set, get_env_tool, test_user):
    manager = SandboxConfigManager(tool_settings)