feat: Improve error messages from tool sandbox (#2135)

This commit is contained in:
Matthew Zhou
2024-12-02 11:08:44 -08:00
committed by GitHub
parent 4181c0d81e
commit 860cf1949e
4 changed files with 39 additions and 8 deletions

View File

@@ -8,6 +8,8 @@ env:
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_BASE_URL: ${{ secrets.AZURE_BASE_URL }}
E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
E2B_SANDBOX_TEMPLATE_ID: ${{ secrets.E2B_SANDBOX_TEMPLATE_ID }}
on:
push:
@@ -24,6 +26,7 @@ jobs:
matrix:
integration_test_suite:
- "integration_test_summarizer.py"
- "integration_test_tool_execution_sandbox.py"
services:
qdrant:
image: qdrant/qdrant
@@ -52,7 +55,7 @@ jobs:
with:
python-version: "3.12"
poetry-version: "1.8.2"
install-args: "-E dev -E postgres -E milvus -E external-tools -E tests"
install-args: "-E dev -E postgres -E milvus -E external-tools -E tests -E cloud-tool-sandbox"
- name: Migrate database
env:
LETTA_PG_PORT: 5432

View File

@@ -6,8 +6,6 @@ env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
E2B_API_KEY: ${{ secrets.E2B_API_KEY }}
E2B_SANDBOX_TEMPLATE_ID: ${{ secrets.E2B_SANDBOX_TEMPLATE_ID }}
on:
push:
@@ -30,7 +28,6 @@ jobs:
- "test_o1_agent.py"
- "test_tool_rule_solver.py"
- "test_agent_tool_graph.py"
- "test_tool_execution_sandbox.py"
- "test_utils.py"
- "test_tool_schema_parsing.py"
services:
@@ -61,7 +58,7 @@ jobs:
with:
python-version: "3.12"
poetry-version: "1.8.2"
install-args: "-E dev -E postgres -E milvus -E external-tools -E tests -E cloud-tool-sandbox"
install-args: "-E dev -E postgres -E milvus -E external-tools -E tests"
- name: Migrate database
env:
LETTA_PG_PORT: 5432
@@ -135,4 +132,4 @@ jobs:
LETTA_SERVER_PASS: test_server_token
PYTHONPATH: ${{ github.workspace }}:${{ env.PYTHONPATH }}
run: |
poetry run pytest -s -vv -k "not test_model_letta_perfomance.py and not test_utils.py and not test_client.py and not test_tool_execution_sandbox.py and not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_performance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client_legacy.py" tests
poetry run pytest -s -vv -k "not test_model_letta_perfomance.py and not test_utils.py and not test_client.py and not integration_test_tool_execution_sandbox.py and not integration_test_summarizer.py and not test_agent_tool_graph.py and not test_tool_rule_solver.py and not test_local_client.py and not test_o1_agent.py and not test_cli.py and not test_concurrent_connections.py and not test_quickstart and not test_model_letta_performance and not test_storage and not test_server and not test_openai_client and not test_providers and not test_client_legacy.py" tests

View File

@@ -132,7 +132,8 @@ class ToolExecutionSandbox:
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
except Exception as e:
raise RuntimeError(f"Executing tool {self.tool_name} has an unexpected error: {e}")
logger.error(f"Executing tool {self.tool_name} has an unexpected error: {e}")
raise e
finally:
# Clean up the temp file and restore stdout
sys.stdout = old_stdout
@@ -154,7 +155,9 @@ class ToolExecutionSandbox:
env_vars = self.sandbox_config_manager.get_sandbox_env_vars_as_dict(sandbox_config_id=sbx_config.id, actor=self.user, limit=100)
execution = sbx.run_code(code, envs=env_vars)
if execution.error is not None:
raise Exception(f"Executing tool {self.tool_name} failed with {execution.error}")
logger.error(f"Executing tool {self.tool_name} failed with {execution.error}")
# Raise a concise exception as this gets returned to the LLM
raise self.parse_exception_from_e2b_execution(execution)
elif len(execution.results) == 0:
return None
else:
@@ -166,6 +169,12 @@ class ToolExecutionSandbox:
sandbox_config_fingerprint=sbx_config.fingerprint(),
)
def parse_exception_from_e2b_execution(self, e2b_execution: "Execution") -> Exception:
builtins_dict = __builtins__ if isinstance(__builtins__, dict) else vars(__builtins__)
# Dynamically fetch the exception class from builtins, defaulting to Exception if not found
exception_class = builtins_dict.get(e2b_execution.error.name, Exception)
return exception_class(e2b_execution.error.value)
def get_running_e2b_sandbox_with_same_state(self, sandbox_config: SandboxConfig) -> Optional["Sandbox"]:
from e2b_code_interpreter import Sandbox

View File

@@ -266,6 +266,17 @@ def test_local_sandbox_core_memory_replace(mock_e2b_api_key_none, core_memory_re
assert result.func_return is None
@pytest.mark.e2b_sandbox
def test_local_sandbox_core_memory_replace_errors(mock_e2b_api_key_none, core_memory_replace_tool, test_user, agent_state):
nonexistent_name = "Alexander Wang"
args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
# run the sandbox
with pytest.raises(ValueError, match=f"Old content '{nonexistent_name}' not found in memory block 'human'"):
sandbox.run(agent_state=agent_state)
@pytest.mark.local_sandbox
def test_local_sandbox_with_list_rv(mock_e2b_api_key_none, list_tool, test_user):
sandbox = ToolExecutionSandbox(list_tool.name, {}, user_id=test_user.id)
@@ -390,6 +401,17 @@ def test_e2b_sandbox_core_memory_replace(check_e2b_key_is_set, core_memory_repla
assert result.func_return is None
@pytest.mark.e2b_sandbox
def test_e2b_sandbox_core_memory_replace_errors(check_e2b_key_is_set, core_memory_replace_tool, test_user, agent_state):
nonexistent_name = "Alexander Wang"
args = {"label": "human", "old_content": nonexistent_name, "new_content": "Matt"}
sandbox = ToolExecutionSandbox(core_memory_replace_tool.name, args, user_id=test_user.id)
# run the sandbox
with pytest.raises(ValueError, match=f"Old content '{nonexistent_name}' not found in memory block 'human'"):
sandbox.run(agent_state=agent_state)
@pytest.mark.e2b_sandbox
def test_e2b_sandbox_inject_env_var_existing_sandbox(check_e2b_key_is_set, get_env_tool, test_user):
manager = SandboxConfigManager(tool_settings)