fix(mcp): handle MCP tool errors gracefully to prevent Datadog alerts (#8687)

MCP tool errors (ToolError, McpError) are expected user-facing errors from external MCP servers (e.g., "No connected account found"). These were propagating through @trace_method decorator and being recorded as errors in Datadog APM. Changes: - Add try/except to catch expected MCP errors in ExternalMCPToolExecutor - Return ToolExecutionResult with status="error" instead of re-raising - Log expected errors at INFO level instead of letting them trace as ERROR - Remove stray 'pass' statement that was a no-op Fixes #8685 🤖 Generated with [Letta Code](https://letta.com) Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: datadog-official[bot] <datadog-official[bot]@users.noreply.github.com> Co-authored-by: Kian Jones <11655409+kianjones9@users.noreply.github.com>
2026-01-18 22:00:52 -08:00
parent 81b5d71889
commit 940d648d42
1 changed files with 38 additions and 14 deletions
--- a/letta/services/tool_executor/mcp_tool_executor.py
+++ b/letta/services/tool_executor/mcp_tool_executor.py
@@ -1,6 +1,7 @@
 from typing import Any, Dict, Optional

 from letta.constants import MCP_TOOL_TAG_NAME_PREFIX
+from letta.log import get_logger
 from letta.otel.tracing import trace_method
 from letta.schemas.agent import AgentState
 from letta.schemas.sandbox_config import SandboxConfig
@@ -9,6 +10,13 @@ from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.user import User
 from letta.services.mcp_manager import MCPManager
 from letta.services.tool_executor.tool_executor_base import ToolExecutor
+from letta.utils import get_friendly_error_msg
+
+logger = get_logger(__name__)
+
+# MCP error class names that represent expected user-facing errors
+# These are checked by class name to avoid import dependencies on fastmcp/mcp packages
+MCP_EXPECTED_ERROR_CLASSES = {"McpError", "ToolError"}


 class ExternalMCPToolExecutor(ToolExecutor):
@@ -25,8 +33,6 @@ class ExternalMCPToolExecutor(ToolExecutor):
        sandbox_config: Optional[SandboxConfig] = None,
        sandbox_env_vars: Optional[Dict[str, Any]] = None,
    ) -> ToolExecutionResult:
-        pass
-
        mcp_server_tag = [tag for tag in tool.tags if tag.startswith(f"{MCP_TOOL_TAG_NAME_PREFIX}:")]
        if not mcp_server_tag:
            raise ValueError(f"Tool {tool.name} does not have a valid MCP server tag")
@@ -41,16 +47,34 @@ class ExternalMCPToolExecutor(ToolExecutor):
            environment_variables = agent_state.get_agent_env_vars_as_dict()
            agent_id = agent_state.id

-        function_response, success = await mcp_manager.execute_mcp_server_tool(
-            mcp_server_name=mcp_server_name,
-            tool_name=function_name,
-            tool_args=function_args,
-            environment_variables=environment_variables,
-            actor=actor,
-            agent_id=agent_id,
-        )
+        try:
+            function_response, success = await mcp_manager.execute_mcp_server_tool(
+                mcp_server_name=mcp_server_name,
+                tool_name=function_name,
+                tool_args=function_args,
+                environment_variables=environment_variables,
+                actor=actor,
+                agent_id=agent_id,
+            )

-        return ToolExecutionResult(
-            status="success" if success else "error",
-            func_return=function_response,
-        )
+            return ToolExecutionResult(
+                status="success" if success else "error",
+                func_return=function_response,
+            )
+        except Exception as e:
+            # Check if this is an expected MCP error (ToolError, McpError)
+            # These are user-facing errors from the external MCP server (e.g., "No connected account found")
+            # We handle them gracefully instead of letting them propagate as exceptions
+            if e.__class__.__name__ in MCP_EXPECTED_ERROR_CLASSES:
+                logger.info(f"MCP tool '{function_name}' returned expected error: {str(e)}")
+                error_message = get_friendly_error_msg(
+                    function_name=function_name,
+                    exception_name=e.__class__.__name__,
+                    exception_message=str(e),
+                )
+                return ToolExecutionResult(
+                    status="error",
+                    func_return=error_message,
+                )
+            # Re-raise unexpected errors
+            raise