From ddcfeb26b1f2de284ec862b70a1b30ad69a6f3f4 Mon Sep 17 00:00:00 2001 From: Kian Jones <11655409+kianjones9@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:51:37 -0800 Subject: [PATCH] fix(core): catch all MCP tool execution errors instead of re-raising (#9419) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(core): catch all MCP tool execution errors instead of re-raising MCP tools are external user-configured servers - any failure during tool execution is expected and should be returned as (error_msg, False) to the agent, not raised as an exception that hits Datadog as a 500. Previously: - base_client.py only caught McpError/ToolError, re-raised everything else - fastmcp_client.py (both SSE and StreamableHTTP) always re-raised Now all three execute_tool() methods catch all exceptions and return the error message to the agent conversation. The agent handles tool failures via the error message naturally. This silences ~15 Datadog issue types including: - fastmcp.exceptions.ToolError (validation, permissions) - mcp.shared.exceptions.McpError (connection closed, credentials) - httpx.HTTPStatusError (503 from Zapier, etc.) - httpx.ConnectError, ReadTimeout, RemoteProtocolError - requests.exceptions.ConnectionError - builtins.ConnectionError 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix(core): log unexpected MCP errors at warning level with traceback Expected MCP errors (ToolError, McpError, httpx.*, ConnectionError, etc.) log at info level. Anything else (e.g. TypeError, AttributeError from our own code) logs at warning with exc_info=True so it still surfaces in Datadog without crashing the request. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta --------- Co-authored-by: Letta --- letta/services/mcp/base_client.py | 46 ++++++++++++++++++---------- letta/services/mcp/fastmcp_client.py | 37 ++++++---------------- 2 files changed, 38 insertions(+), 45 deletions(-) diff --git a/letta/services/mcp/base_client.py b/letta/services/mcp/base_client.py index 31b25e29..c53b6f99 100644 --- a/letta/services/mcp/base_client.py +++ b/letta/services/mcp/base_client.py @@ -11,6 +11,31 @@ from letta.log import get_logger logger = get_logger(__name__) +EXPECTED_MCP_TOOL_ERRORS = ( + "McpError", + "ToolError", + "HTTPStatusError", + "ConnectError", + "ConnectTimeout", + "ReadTimeout", + "ReadError", + "RemoteProtocolError", + "LocalProtocolError", + "ConnectionError", + "SSLError", + "MaxRetryError", + "ProtocolError", + "BrokenResourceError", +) + + +def _log_mcp_tool_error(log: "get_logger", tool_name: str, exc: Exception) -> None: + exc_name = type(exc).__name__ + if exc_name in EXPECTED_MCP_TOOL_ERRORS: + log.info(f"MCP tool '{tool_name}' execution failed ({exc_name}): {exc}") + else: + log.warning(f"MCP tool '{tool_name}' execution failed with unexpected error ({exc_name}): {exc}", exc_info=True) + # TODO: Get rid of Async prefix on this class name once we deprecate old sync code class AsyncBaseMCPClient: @@ -81,24 +106,11 @@ class AsyncBaseMCPClient: try: result = await self.session.call_tool(tool_name, tool_args) except Exception as e: - # ToolError is raised by fastmcp for input validation errors (e.g., missing required properties) - # McpError is raised for other MCP-related errors - # Both are expected user-facing issues from external MCP servers - # Log at debug level to avoid triggering production alerts for expected failures - - # Handle ExceptionGroup wrapping (Python 3.11+ async TaskGroup can wrap exceptions) exception_to_check = e - if hasattr(e, "exceptions") and e.exceptions: - # If it's an ExceptionGroup with a single wrapped exception, unwrap it - if len(e.exceptions) == 1: - exception_to_check = e.exceptions[0] - - if exception_to_check.__class__.__name__ in ("McpError", "ToolError"): - logger.debug(f"MCP tool '{tool_name}' execution failed: {str(exception_to_check)}") - # Return error message with failure status instead of raising to avoid Datadog alerts - return str(e), False - # Re-raise unexpected errors - raise + if hasattr(e, "exceptions") and e.exceptions and len(e.exceptions) == 1: + exception_to_check = e.exceptions[0] + _log_mcp_tool_error(logger, tool_name, exception_to_check) + return str(exception_to_check), False parsed_content = [] for content_piece in result.content: diff --git a/letta/services/mcp/fastmcp_client.py b/letta/services/mcp/fastmcp_client.py index 01190e56..2a2b05c2 100644 --- a/letta/services/mcp/fastmcp_client.py +++ b/letta/services/mcp/fastmcp_client.py @@ -19,6 +19,7 @@ from mcp import Tool as MCPTool from letta.errors import LettaMCPConnectionError from letta.functions.mcp_client.types import SSEServerConfig, StreamableHTTPServerConfig from letta.log import get_logger +from letta.services.mcp.base_client import _log_mcp_tool_error from letta.services.mcp.server_side_oauth import ServerSideOAuth logger = get_logger(__name__) @@ -142,21 +143,11 @@ class AsyncFastMCPSSEClient: try: result = await self.client.call_tool(tool_name, tool_args) except Exception as e: - # ToolError is raised by fastmcp for input validation errors (e.g., missing required properties) - # McpError is raised for other MCP-related errors - # Both are expected user-facing issues from external MCP servers - # Log at debug level to avoid triggering production alerts for expected failures - - # Handle ExceptionGroup wrapping (Python 3.11+ async TaskGroup can wrap exceptions) exception_to_check = e - if hasattr(e, "exceptions") and e.exceptions: - # If it's an ExceptionGroup with a single wrapped exception, unwrap it - if len(e.exceptions) == 1: - exception_to_check = e.exceptions[0] - - if exception_to_check.__class__.__name__ in ("McpError", "ToolError"): - logger.debug(f"MCP tool '{tool_name}' execution failed: {str(exception_to_check)}") - raise + if hasattr(e, "exceptions") and e.exceptions and len(e.exceptions) == 1: + exception_to_check = e.exceptions[0] + _log_mcp_tool_error(logger, tool_name, exception_to_check) + return str(exception_to_check), False # Parse content from result parsed_content = [] @@ -309,21 +300,11 @@ class AsyncFastMCPStreamableHTTPClient: try: result = await self.client.call_tool(tool_name, tool_args) except Exception as e: - # ToolError is raised by fastmcp for input validation errors (e.g., missing required properties) - # McpError is raised for other MCP-related errors - # Both are expected user-facing issues from external MCP servers - # Log at debug level to avoid triggering production alerts for expected failures - - # Handle ExceptionGroup wrapping (Python 3.11+ async TaskGroup can wrap exceptions) exception_to_check = e - if hasattr(e, "exceptions") and e.exceptions: - # If it's an ExceptionGroup with a single wrapped exception, unwrap it - if len(e.exceptions) == 1: - exception_to_check = e.exceptions[0] - - if exception_to_check.__class__.__name__ in ("McpError", "ToolError"): - logger.debug(f"MCP tool '{tool_name}' execution failed: {str(exception_to_check)}") - raise + if hasattr(e, "exceptions") and e.exceptions and len(e.exceptions) == 1: + exception_to_check = e.exceptions[0] + _log_mcp_tool_error(logger, tool_name, exception_to_check) + return str(exception_to_check), False # Parse content from result parsed_content = []