* fix(core): catch all MCP tool execution errors instead of re-raising MCP tools are external user-configured servers - any failure during tool execution is expected and should be returned as (error_msg, False) to the agent, not raised as an exception that hits Datadog as a 500. Previously: - base_client.py only caught McpError/ToolError, re-raised everything else - fastmcp_client.py (both SSE and StreamableHTTP) always re-raised Now all three execute_tool() methods catch all exceptions and return the error message to the agent conversation. The agent handles tool failures via the error message naturally. This silences ~15 Datadog issue types including: - fastmcp.exceptions.ToolError (validation, permissions) - mcp.shared.exceptions.McpError (connection closed, credentials) - httpx.HTTPStatusError (503 from Zapier, etc.) - httpx.ConnectError, ReadTimeout, RemoteProtocolError - requests.exceptions.ConnectionError - builtins.ConnectionError 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): log unexpected MCP errors at warning level with traceback Expected MCP errors (ToolError, McpError, httpx.*, ConnectionError, etc.) log at info level. Anything else (e.g. TypeError, AttributeError from our own code) logs at warning with exc_info=True so it still surfaces in Datadog without crashing the request. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
154 lines
6.4 KiB
Python
154 lines
6.4 KiB
Python
from contextlib import AsyncExitStack
|
|
from typing import Optional, Tuple
|
|
|
|
from mcp import ClientSession, Tool as MCPTool
|
|
from mcp.client.auth import OAuthClientProvider
|
|
from mcp.types import TextContent
|
|
|
|
from letta.errors import LettaMCPConnectionError
|
|
from letta.functions.mcp_client.types import BaseServerConfig
|
|
from letta.log import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
EXPECTED_MCP_TOOL_ERRORS = (
|
|
"McpError",
|
|
"ToolError",
|
|
"HTTPStatusError",
|
|
"ConnectError",
|
|
"ConnectTimeout",
|
|
"ReadTimeout",
|
|
"ReadError",
|
|
"RemoteProtocolError",
|
|
"LocalProtocolError",
|
|
"ConnectionError",
|
|
"SSLError",
|
|
"MaxRetryError",
|
|
"ProtocolError",
|
|
"BrokenResourceError",
|
|
)
|
|
|
|
|
|
def _log_mcp_tool_error(log: "get_logger", tool_name: str, exc: Exception) -> None:
|
|
exc_name = type(exc).__name__
|
|
if exc_name in EXPECTED_MCP_TOOL_ERRORS:
|
|
log.info(f"MCP tool '{tool_name}' execution failed ({exc_name}): {exc}")
|
|
else:
|
|
log.warning(f"MCP tool '{tool_name}' execution failed with unexpected error ({exc_name}): {exc}", exc_info=True)
|
|
|
|
|
|
# TODO: Get rid of Async prefix on this class name once we deprecate old sync code
|
|
class AsyncBaseMCPClient:
|
|
# HTTP headers
|
|
AGENT_ID_HEADER = "X-Agent-Id"
|
|
|
|
def __init__(
|
|
self, server_config: BaseServerConfig, oauth_provider: Optional[OAuthClientProvider] = None, agent_id: Optional[str] = None
|
|
):
|
|
self.server_config = server_config
|
|
self.oauth_provider = oauth_provider
|
|
self.agent_id = agent_id
|
|
self.exit_stack = AsyncExitStack()
|
|
self.session: Optional[ClientSession] = None
|
|
self.initialized = False
|
|
|
|
async def connect_to_server(self):
|
|
try:
|
|
await self._initialize_connection(self.server_config)
|
|
await self.session.initialize()
|
|
self.initialized = True
|
|
except LettaMCPConnectionError:
|
|
raise
|
|
except ConnectionError as e:
|
|
logger.debug(f"MCP connection failed: {str(e)}")
|
|
raise LettaMCPConnectionError(message=str(e), server_name=getattr(self.server_config, "server_name", None)) from e
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"Connecting to MCP server failed. Please review your server config: {self.server_config.model_dump_json(indent=4)}. Error: {str(e)}"
|
|
)
|
|
if hasattr(self.server_config, "server_url") and self.server_config.server_url:
|
|
server_info = f"server URL '{self.server_config.server_url}'"
|
|
elif hasattr(self.server_config, "command") and self.server_config.command:
|
|
server_info = f"command '{self.server_config.command}'"
|
|
else:
|
|
server_info = f"server '{self.server_config.server_name}'"
|
|
raise LettaMCPConnectionError(
|
|
message=f"Failed to connect to MCP {server_info}. Please check your configuration and ensure the server is accessible.",
|
|
server_name=getattr(self.server_config, "server_name", None),
|
|
) from e
|
|
|
|
async def _initialize_connection(self, server_config: BaseServerConfig) -> None:
|
|
raise NotImplementedError("Subclasses must implement _initialize_connection")
|
|
|
|
async def list_tools(self, serialize: bool = False) -> list[MCPTool]:
|
|
self._check_initialized()
|
|
response = await self.session.list_tools()
|
|
if serialize:
|
|
serializable_tools = []
|
|
for tool in response.tools:
|
|
if hasattr(tool, "model_dump"):
|
|
# Pydantic model - use model_dump
|
|
serializable_tools.append(tool.model_dump())
|
|
elif hasattr(tool, "dict"):
|
|
# Older Pydantic model - use dict()
|
|
serializable_tools.append(tool.dict())
|
|
elif hasattr(tool, "__dict__"):
|
|
# Regular object - use __dict__
|
|
serializable_tools.append(tool.__dict__)
|
|
else:
|
|
# Fallback - convert to string
|
|
serializable_tools.append(str(tool))
|
|
return serializable_tools
|
|
return response.tools
|
|
|
|
async def execute_tool(self, tool_name: str, tool_args: dict) -> Tuple[str, bool]:
|
|
self._check_initialized()
|
|
try:
|
|
result = await self.session.call_tool(tool_name, tool_args)
|
|
except Exception as e:
|
|
exception_to_check = e
|
|
if hasattr(e, "exceptions") and e.exceptions and len(e.exceptions) == 1:
|
|
exception_to_check = e.exceptions[0]
|
|
_log_mcp_tool_error(logger, tool_name, exception_to_check)
|
|
return str(exception_to_check), False
|
|
|
|
parsed_content = []
|
|
for content_piece in result.content:
|
|
if isinstance(content_piece, TextContent):
|
|
parsed_content.append(content_piece.text)
|
|
logger.debug(f"MCP tool result parsed content (text): {parsed_content}")
|
|
else:
|
|
parsed_content.append(str(content_piece))
|
|
logger.debug(f"MCP tool result parsed content (other): {parsed_content}")
|
|
if len(parsed_content) > 0:
|
|
final_content = " ".join(parsed_content)
|
|
else:
|
|
# TODO move hardcoding to constants
|
|
final_content = "Empty response from tool"
|
|
|
|
return final_content, not result.isError
|
|
|
|
def _check_initialized(self):
|
|
if not self.initialized:
|
|
logger.error("MCPClient has not been initialized")
|
|
raise RuntimeError("MCPClient has not been initialized")
|
|
|
|
async def cleanup(self):
|
|
"""Clean up resources used by the MCP client.
|
|
|
|
This method handles ExceptionGroup errors that can occur when closing async context managers
|
|
(e.g., from the MCP library's internal TaskGroup usage). Cleanup is a best-effort operation
|
|
and errors are logged but not re-raised to prevent masking the original exception.
|
|
"""
|
|
try:
|
|
await self.exit_stack.aclose()
|
|
except* Exception as eg:
|
|
# ExceptionGroup can be raised when closing async context managers that use TaskGroup
|
|
# Log each sub-exception at debug level since cleanup errors are expected in some cases
|
|
# (e.g., connection already closed, server unavailable)
|
|
for exc in eg.exceptions:
|
|
logger.debug(f"MCP client cleanup error (suppressed): {type(exc).__name__}: {exc}")
|
|
|
|
def to_sync_client(self):
|
|
raise NotImplementedError("Subclasses must implement to_sync_client")
|