fix(core): catch all MCP tool execution errors instead of re-raising (#9419)
* fix(core): catch all MCP tool execution errors instead of re-raising MCP tools are external user-configured servers - any failure during tool execution is expected and should be returned as (error_msg, False) to the agent, not raised as an exception that hits Datadog as a 500. Previously: - base_client.py only caught McpError/ToolError, re-raised everything else - fastmcp_client.py (both SSE and StreamableHTTP) always re-raised Now all three execute_tool() methods catch all exceptions and return the error message to the agent conversation. The agent handles tool failures via the error message naturally. This silences ~15 Datadog issue types including: - fastmcp.exceptions.ToolError (validation, permissions) - mcp.shared.exceptions.McpError (connection closed, credentials) - httpx.HTTPStatusError (503 from Zapier, etc.) - httpx.ConnectError, ReadTimeout, RemoteProtocolError - requests.exceptions.ConnectionError - builtins.ConnectionError 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): log unexpected MCP errors at warning level with traceback Expected MCP errors (ToolError, McpError, httpx.*, ConnectionError, etc.) log at info level. Anything else (e.g. TypeError, AttributeError from our own code) logs at warning with exc_info=True so it still surfaces in Datadog without crashing the request. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -11,6 +11,31 @@ from letta.log import get_logger
|
|||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
EXPECTED_MCP_TOOL_ERRORS = (
|
||||||
|
"McpError",
|
||||||
|
"ToolError",
|
||||||
|
"HTTPStatusError",
|
||||||
|
"ConnectError",
|
||||||
|
"ConnectTimeout",
|
||||||
|
"ReadTimeout",
|
||||||
|
"ReadError",
|
||||||
|
"RemoteProtocolError",
|
||||||
|
"LocalProtocolError",
|
||||||
|
"ConnectionError",
|
||||||
|
"SSLError",
|
||||||
|
"MaxRetryError",
|
||||||
|
"ProtocolError",
|
||||||
|
"BrokenResourceError",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _log_mcp_tool_error(log: "get_logger", tool_name: str, exc: Exception) -> None:
|
||||||
|
exc_name = type(exc).__name__
|
||||||
|
if exc_name in EXPECTED_MCP_TOOL_ERRORS:
|
||||||
|
log.info(f"MCP tool '{tool_name}' execution failed ({exc_name}): {exc}")
|
||||||
|
else:
|
||||||
|
log.warning(f"MCP tool '{tool_name}' execution failed with unexpected error ({exc_name}): {exc}", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
# TODO: Get rid of Async prefix on this class name once we deprecate old sync code
|
# TODO: Get rid of Async prefix on this class name once we deprecate old sync code
|
||||||
class AsyncBaseMCPClient:
|
class AsyncBaseMCPClient:
|
||||||
@@ -81,24 +106,11 @@ class AsyncBaseMCPClient:
|
|||||||
try:
|
try:
|
||||||
result = await self.session.call_tool(tool_name, tool_args)
|
result = await self.session.call_tool(tool_name, tool_args)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# ToolError is raised by fastmcp for input validation errors (e.g., missing required properties)
|
|
||||||
# McpError is raised for other MCP-related errors
|
|
||||||
# Both are expected user-facing issues from external MCP servers
|
|
||||||
# Log at debug level to avoid triggering production alerts for expected failures
|
|
||||||
|
|
||||||
# Handle ExceptionGroup wrapping (Python 3.11+ async TaskGroup can wrap exceptions)
|
|
||||||
exception_to_check = e
|
exception_to_check = e
|
||||||
if hasattr(e, "exceptions") and e.exceptions:
|
if hasattr(e, "exceptions") and e.exceptions and len(e.exceptions) == 1:
|
||||||
# If it's an ExceptionGroup with a single wrapped exception, unwrap it
|
exception_to_check = e.exceptions[0]
|
||||||
if len(e.exceptions) == 1:
|
_log_mcp_tool_error(logger, tool_name, exception_to_check)
|
||||||
exception_to_check = e.exceptions[0]
|
return str(exception_to_check), False
|
||||||
|
|
||||||
if exception_to_check.__class__.__name__ in ("McpError", "ToolError"):
|
|
||||||
logger.debug(f"MCP tool '{tool_name}' execution failed: {str(exception_to_check)}")
|
|
||||||
# Return error message with failure status instead of raising to avoid Datadog alerts
|
|
||||||
return str(e), False
|
|
||||||
# Re-raise unexpected errors
|
|
||||||
raise
|
|
||||||
|
|
||||||
parsed_content = []
|
parsed_content = []
|
||||||
for content_piece in result.content:
|
for content_piece in result.content:
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ from mcp import Tool as MCPTool
|
|||||||
from letta.errors import LettaMCPConnectionError
|
from letta.errors import LettaMCPConnectionError
|
||||||
from letta.functions.mcp_client.types import SSEServerConfig, StreamableHTTPServerConfig
|
from letta.functions.mcp_client.types import SSEServerConfig, StreamableHTTPServerConfig
|
||||||
from letta.log import get_logger
|
from letta.log import get_logger
|
||||||
|
from letta.services.mcp.base_client import _log_mcp_tool_error
|
||||||
from letta.services.mcp.server_side_oauth import ServerSideOAuth
|
from letta.services.mcp.server_side_oauth import ServerSideOAuth
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
@@ -142,21 +143,11 @@ class AsyncFastMCPSSEClient:
|
|||||||
try:
|
try:
|
||||||
result = await self.client.call_tool(tool_name, tool_args)
|
result = await self.client.call_tool(tool_name, tool_args)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# ToolError is raised by fastmcp for input validation errors (e.g., missing required properties)
|
|
||||||
# McpError is raised for other MCP-related errors
|
|
||||||
# Both are expected user-facing issues from external MCP servers
|
|
||||||
# Log at debug level to avoid triggering production alerts for expected failures
|
|
||||||
|
|
||||||
# Handle ExceptionGroup wrapping (Python 3.11+ async TaskGroup can wrap exceptions)
|
|
||||||
exception_to_check = e
|
exception_to_check = e
|
||||||
if hasattr(e, "exceptions") and e.exceptions:
|
if hasattr(e, "exceptions") and e.exceptions and len(e.exceptions) == 1:
|
||||||
# If it's an ExceptionGroup with a single wrapped exception, unwrap it
|
exception_to_check = e.exceptions[0]
|
||||||
if len(e.exceptions) == 1:
|
_log_mcp_tool_error(logger, tool_name, exception_to_check)
|
||||||
exception_to_check = e.exceptions[0]
|
return str(exception_to_check), False
|
||||||
|
|
||||||
if exception_to_check.__class__.__name__ in ("McpError", "ToolError"):
|
|
||||||
logger.debug(f"MCP tool '{tool_name}' execution failed: {str(exception_to_check)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Parse content from result
|
# Parse content from result
|
||||||
parsed_content = []
|
parsed_content = []
|
||||||
@@ -309,21 +300,11 @@ class AsyncFastMCPStreamableHTTPClient:
|
|||||||
try:
|
try:
|
||||||
result = await self.client.call_tool(tool_name, tool_args)
|
result = await self.client.call_tool(tool_name, tool_args)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# ToolError is raised by fastmcp for input validation errors (e.g., missing required properties)
|
|
||||||
# McpError is raised for other MCP-related errors
|
|
||||||
# Both are expected user-facing issues from external MCP servers
|
|
||||||
# Log at debug level to avoid triggering production alerts for expected failures
|
|
||||||
|
|
||||||
# Handle ExceptionGroup wrapping (Python 3.11+ async TaskGroup can wrap exceptions)
|
|
||||||
exception_to_check = e
|
exception_to_check = e
|
||||||
if hasattr(e, "exceptions") and e.exceptions:
|
if hasattr(e, "exceptions") and e.exceptions and len(e.exceptions) == 1:
|
||||||
# If it's an ExceptionGroup with a single wrapped exception, unwrap it
|
exception_to_check = e.exceptions[0]
|
||||||
if len(e.exceptions) == 1:
|
_log_mcp_tool_error(logger, tool_name, exception_to_check)
|
||||||
exception_to_check = e.exceptions[0]
|
return str(exception_to_check), False
|
||||||
|
|
||||||
if exception_to_check.__class__.__name__ in ("McpError", "ToolError"):
|
|
||||||
logger.debug(f"MCP tool '{tool_name}' execution failed: {str(exception_to_check)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Parse content from result
|
# Parse content from result
|
||||||
parsed_content = []
|
parsed_content = []
|
||||||
|
|||||||
Reference in New Issue
Block a user