From 940d648d42fa43deea76964be8cf4080ad6cf71d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 18 Jan 2026 22:00:52 -0800
Subject: [PATCH] fix(mcp): handle MCP tool errors gracefully to prevent
 Datadog alerts (#8687)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MCP tool errors (ToolError, McpError) are expected user-facing errors
from external MCP servers (e.g., "No connected account found"). These
were propagating through @trace_method decorator and being recorded
as errors in Datadog APM.

Changes:
- Add try/except to catch expected MCP errors in ExternalMCPToolExecutor
- Return ToolExecutionResult with status="error" instead of re-raising
- Log expected errors at INFO level instead of letting them trace as ERROR
- Remove stray 'pass' statement that was a no-op

Fixes #8685

🤖 Generated with [Letta Code](https://letta.com)

Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com>
Co-authored-by: datadog-official[bot] <datadog-official[bot]@users.noreply.github.com>
Co-authored-by: Kian Jones <11655409+kianjones9@users.noreply.github.com>
---
 .../tool_executor/mcp_tool_executor.py        | 52 ++++++++++++++-----
 1 file changed, 38 insertions(+), 14 deletions(-)

diff --git a/letta/services/tool_executor/mcp_tool_executor.py b/letta/services/tool_executor/mcp_tool_executor.py
index 69237cdf..3ea16161 100644
--- a/letta/services/tool_executor/mcp_tool_executor.py
+++ b/letta/services/tool_executor/mcp_tool_executor.py
@@ -1,6 +1,7 @@
 from typing import Any, Dict, Optional
 
 from letta.constants import MCP_TOOL_TAG_NAME_PREFIX
+from letta.log import get_logger
 from letta.otel.tracing import trace_method
 from letta.schemas.agent import AgentState
 from letta.schemas.sandbox_config import SandboxConfig
@@ -9,6 +10,13 @@ from letta.schemas.tool_execution_result import ToolExecutionResult
 from letta.schemas.user import User
 from letta.services.mcp_manager import MCPManager
 from letta.services.tool_executor.tool_executor_base import ToolExecutor
+from letta.utils import get_friendly_error_msg
+
+logger = get_logger(__name__)
+
+# MCP error class names that represent expected user-facing errors
+# These are checked by class name to avoid import dependencies on fastmcp/mcp packages
+MCP_EXPECTED_ERROR_CLASSES = {"McpError", "ToolError"}
 
 
 class ExternalMCPToolExecutor(ToolExecutor):
@@ -25,8 +33,6 @@ class ExternalMCPToolExecutor(ToolExecutor):
         sandbox_config: Optional[SandboxConfig] = None,
         sandbox_env_vars: Optional[Dict[str, Any]] = None,
     ) -> ToolExecutionResult:
-        pass
-
         mcp_server_tag = [tag for tag in tool.tags if tag.startswith(f"{MCP_TOOL_TAG_NAME_PREFIX}:")]
         if not mcp_server_tag:
             raise ValueError(f"Tool {tool.name} does not have a valid MCP server tag")
@@ -41,16 +47,34 @@ class ExternalMCPToolExecutor(ToolExecutor):
             environment_variables = agent_state.get_agent_env_vars_as_dict()
             agent_id = agent_state.id
 
-        function_response, success = await mcp_manager.execute_mcp_server_tool(
-            mcp_server_name=mcp_server_name,
-            tool_name=function_name,
-            tool_args=function_args,
-            environment_variables=environment_variables,
-            actor=actor,
-            agent_id=agent_id,
-        )
+        try:
+            function_response, success = await mcp_manager.execute_mcp_server_tool(
+                mcp_server_name=mcp_server_name,
+                tool_name=function_name,
+                tool_args=function_args,
+                environment_variables=environment_variables,
+                actor=actor,
+                agent_id=agent_id,
+            )
 
-        return ToolExecutionResult(
-            status="success" if success else "error",
-            func_return=function_response,
-        )
+            return ToolExecutionResult(
+                status="success" if success else "error",
+                func_return=function_response,
+            )
+        except Exception as e:
+            # Check if this is an expected MCP error (ToolError, McpError)
+            # These are user-facing errors from the external MCP server (e.g., "No connected account found")
+            # We handle them gracefully instead of letting them propagate as exceptions
+            if e.__class__.__name__ in MCP_EXPECTED_ERROR_CLASSES:
+                logger.info(f"MCP tool '{function_name}' returned expected error: {str(e)}")
+                error_message = get_friendly_error_msg(
+                    function_name=function_name,
+                    exception_name=e.__class__.__name__,
+                    exception_message=str(e),
+                )
+                return ToolExecutionResult(
+                    status="error",
+                    func_return=error_message,
+                )
+            # Re-raise unexpected errors
+            raise