Files
letta-server/letta/services/mcp/base_client.py
Kian Jones ddcfeb26b1 fix(core): catch all MCP tool execution errors instead of re-raising (#9419)
* fix(core): catch all MCP tool execution errors instead of re-raising

MCP tools are external user-configured servers - any failure during
tool execution is expected and should be returned as (error_msg, False)
to the agent, not raised as an exception that hits Datadog as a 500.

Previously:
- base_client.py only caught McpError/ToolError, re-raised everything else
- fastmcp_client.py (both SSE and StreamableHTTP) always re-raised

Now all three execute_tool() methods catch all exceptions and return
the error message to the agent conversation. The agent handles tool
failures via the error message naturally.

This silences ~15 Datadog issue types including:
- fastmcp.exceptions.ToolError (validation, permissions)
- mcp.shared.exceptions.McpError (connection closed, credentials)
- httpx.HTTPStatusError (503 from Zapier, etc.)
- httpx.ConnectError, ReadTimeout, RemoteProtocolError
- requests.exceptions.ConnectionError
- builtins.ConnectionError

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): log unexpected MCP errors at warning level with traceback

Expected MCP errors (ToolError, McpError, httpx.*, ConnectionError, etc.)
log at info level. Anything else (e.g. TypeError, AttributeError from
our own code) logs at warning with exc_info=True so it still surfaces
in Datadog without crashing the request.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

---------

Co-authored-by: Letta <noreply@letta.com>
2026-02-24 10:52:07 -08:00

154 lines
6.4 KiB
Python

from contextlib import AsyncExitStack
from typing import Optional, Tuple
from mcp import ClientSession, Tool as MCPTool
from mcp.client.auth import OAuthClientProvider
from mcp.types import TextContent
from letta.errors import LettaMCPConnectionError
from letta.functions.mcp_client.types import BaseServerConfig
from letta.log import get_logger
logger = get_logger(__name__)
EXPECTED_MCP_TOOL_ERRORS = (
"McpError",
"ToolError",
"HTTPStatusError",
"ConnectError",
"ConnectTimeout",
"ReadTimeout",
"ReadError",
"RemoteProtocolError",
"LocalProtocolError",
"ConnectionError",
"SSLError",
"MaxRetryError",
"ProtocolError",
"BrokenResourceError",
)
def _log_mcp_tool_error(log: "get_logger", tool_name: str, exc: Exception) -> None:
exc_name = type(exc).__name__
if exc_name in EXPECTED_MCP_TOOL_ERRORS:
log.info(f"MCP tool '{tool_name}' execution failed ({exc_name}): {exc}")
else:
log.warning(f"MCP tool '{tool_name}' execution failed with unexpected error ({exc_name}): {exc}", exc_info=True)
# TODO: Get rid of Async prefix on this class name once we deprecate old sync code
class AsyncBaseMCPClient:
# HTTP headers
AGENT_ID_HEADER = "X-Agent-Id"
def __init__(
self, server_config: BaseServerConfig, oauth_provider: Optional[OAuthClientProvider] = None, agent_id: Optional[str] = None
):
self.server_config = server_config
self.oauth_provider = oauth_provider
self.agent_id = agent_id
self.exit_stack = AsyncExitStack()
self.session: Optional[ClientSession] = None
self.initialized = False
async def connect_to_server(self):
try:
await self._initialize_connection(self.server_config)
await self.session.initialize()
self.initialized = True
except LettaMCPConnectionError:
raise
except ConnectionError as e:
logger.debug(f"MCP connection failed: {str(e)}")
raise LettaMCPConnectionError(message=str(e), server_name=getattr(self.server_config, "server_name", None)) from e
except Exception as e:
logger.warning(
f"Connecting to MCP server failed. Please review your server config: {self.server_config.model_dump_json(indent=4)}. Error: {str(e)}"
)
if hasattr(self.server_config, "server_url") and self.server_config.server_url:
server_info = f"server URL '{self.server_config.server_url}'"
elif hasattr(self.server_config, "command") and self.server_config.command:
server_info = f"command '{self.server_config.command}'"
else:
server_info = f"server '{self.server_config.server_name}'"
raise LettaMCPConnectionError(
message=f"Failed to connect to MCP {server_info}. Please check your configuration and ensure the server is accessible.",
server_name=getattr(self.server_config, "server_name", None),
) from e
async def _initialize_connection(self, server_config: BaseServerConfig) -> None:
raise NotImplementedError("Subclasses must implement _initialize_connection")
async def list_tools(self, serialize: bool = False) -> list[MCPTool]:
self._check_initialized()
response = await self.session.list_tools()
if serialize:
serializable_tools = []
for tool in response.tools:
if hasattr(tool, "model_dump"):
# Pydantic model - use model_dump
serializable_tools.append(tool.model_dump())
elif hasattr(tool, "dict"):
# Older Pydantic model - use dict()
serializable_tools.append(tool.dict())
elif hasattr(tool, "__dict__"):
# Regular object - use __dict__
serializable_tools.append(tool.__dict__)
else:
# Fallback - convert to string
serializable_tools.append(str(tool))
return serializable_tools
return response.tools
async def execute_tool(self, tool_name: str, tool_args: dict) -> Tuple[str, bool]:
self._check_initialized()
try:
result = await self.session.call_tool(tool_name, tool_args)
except Exception as e:
exception_to_check = e
if hasattr(e, "exceptions") and e.exceptions and len(e.exceptions) == 1:
exception_to_check = e.exceptions[0]
_log_mcp_tool_error(logger, tool_name, exception_to_check)
return str(exception_to_check), False
parsed_content = []
for content_piece in result.content:
if isinstance(content_piece, TextContent):
parsed_content.append(content_piece.text)
logger.debug(f"MCP tool result parsed content (text): {parsed_content}")
else:
parsed_content.append(str(content_piece))
logger.debug(f"MCP tool result parsed content (other): {parsed_content}")
if len(parsed_content) > 0:
final_content = " ".join(parsed_content)
else:
# TODO move hardcoding to constants
final_content = "Empty response from tool"
return final_content, not result.isError
def _check_initialized(self):
if not self.initialized:
logger.error("MCPClient has not been initialized")
raise RuntimeError("MCPClient has not been initialized")
async def cleanup(self):
"""Clean up resources used by the MCP client.
This method handles ExceptionGroup errors that can occur when closing async context managers
(e.g., from the MCP library's internal TaskGroup usage). Cleanup is a best-effort operation
and errors are logged but not re-raised to prevent masking the original exception.
"""
try:
await self.exit_stack.aclose()
except* Exception as eg:
# ExceptionGroup can be raised when closing async context managers that use TaskGroup
# Log each sub-exception at debug level since cleanup errors are expected in some cases
# (e.g., connection already closed, server unavailable)
for exc in eg.exceptions:
logger.debug(f"MCP client cleanup error (suppressed): {type(exc).__name__}: {exc}")
def to_sync_client(self):
raise NotImplementedError("Subclasses must implement to_sync_client")