letta-server/letta/services/mcp/base_client.py

from contextlib import AsyncExitStack
from typing import Optional, Tuple

from mcp import ClientSession, Tool as MCPTool
from mcp.client.auth import OAuthClientProvider
from mcp.types import TextContent

from letta.functions.mcp_client.types import BaseServerConfig
from letta.log import get_logger

logger = get_logger(__name__)


# TODO: Get rid of Async prefix on this class name once we deprecate old sync code
class AsyncBaseMCPClient:
    # HTTP headers
    AGENT_ID_HEADER = "X-Agent-Id"

    def __init__(
        self, server_config: BaseServerConfig, oauth_provider: Optional[OAuthClientProvider] = None, agent_id: Optional[str] = None
    ):
        self.server_config = server_config
        self.oauth_provider = oauth_provider
        self.agent_id = agent_id
        self.exit_stack = AsyncExitStack()
        self.session: Optional[ClientSession] = None
        self.initialized = False

    async def connect_to_server(self):
        try:
            await self._initialize_connection(self.server_config)
            await self.session.initialize()
            self.initialized = True
        except ConnectionError as e:
            # MCP connection failures are often due to user misconfiguration, not system errors
            # Log at debug level to avoid triggering Sentry alerts for expected configuration issues
            logger.debug(f"MCP connection failed: {str(e)}")
            raise e
        except Exception as e:
            # MCP connection failures are often due to user misconfiguration, not system errors
            # Log as warning for visibility in monitoring
            logger.warning(
                f"Connecting to MCP server failed. Please review your server config: {self.server_config.model_dump_json(indent=4)}. Error: {str(e)}"
            )
            if hasattr(self.server_config, "server_url") and self.server_config.server_url:
                server_info = f"server URL '{self.server_config.server_url}'"
            elif hasattr(self.server_config, "command") and self.server_config.command:
                server_info = f"command '{self.server_config.command}'"
            else:
                server_info = f"server '{self.server_config.server_name}'"
            raise ConnectionError(
                f"Failed to connect to MCP {server_info}. Please check your configuration and ensure the server is accessible."
            ) from e

    async def _initialize_connection(self, server_config: BaseServerConfig) -> None:
        raise NotImplementedError("Subclasses must implement _initialize_connection")

    async def list_tools(self, serialize: bool = False) -> list[MCPTool]:
        self._check_initialized()
        response = await self.session.list_tools()
        if serialize:
            serializable_tools = []
            for tool in response.tools:
                if hasattr(tool, "model_dump"):
                    # Pydantic model - use model_dump
                    serializable_tools.append(tool.model_dump())
                elif hasattr(tool, "dict"):
                    # Older Pydantic model - use dict()
                    serializable_tools.append(tool.dict())
                elif hasattr(tool, "__dict__"):
                    # Regular object - use __dict__
                    serializable_tools.append(tool.__dict__)
                else:
                    # Fallback - convert to string
                    serializable_tools.append(str(tool))
            return serializable_tools
        return response.tools

    async def execute_tool(self, tool_name: str, tool_args: dict) -> Tuple[str, bool]:
        self._check_initialized()
        try:
            result = await self.session.call_tool(tool_name, tool_args)
        except Exception as e:
            # ToolError is raised by fastmcp for input validation errors (e.g., missing required properties)
            # McpError is raised for other MCP-related errors
            # Both are expected user-facing issues from external MCP servers
            # Log at debug level to avoid triggering production alerts for expected failures
            if e.__class__.__name__ in ("McpError", "ToolError"):
                logger.debug(f"MCP tool '{tool_name}' execution failed: {str(e)}")
                # Return error message with failure status instead of raising to avoid Datadog alerts
                return str(e), False
            # Re-raise unexpected errors
            raise

        parsed_content = []
        for content_piece in result.content:
            if isinstance(content_piece, TextContent):
                parsed_content.append(content_piece.text)
                logger.debug(f"MCP tool result parsed content (text): {parsed_content}")
            else:
                parsed_content.append(str(content_piece))
                logger.debug(f"MCP tool result parsed content (other): {parsed_content}")
        if len(parsed_content) > 0:
            final_content = " ".join(parsed_content)
        else:
            # TODO move hardcoding to constants
            final_content = "Empty response from tool"

        return final_content, not result.isError

    def _check_initialized(self):
        if not self.initialized:
            logger.error("MCPClient has not been initialized")
            raise RuntimeError("MCPClient has not been initialized")

    async def cleanup(self):
        """Clean up resources used by the MCP client.

        This method handles ExceptionGroup errors that can occur when closing async context managers
        (e.g., from the MCP library's internal TaskGroup usage). Cleanup is a best-effort operation
        and errors are logged but not re-raised to prevent masking the original exception.
        """
        try:
            await self.exit_stack.aclose()
        except* Exception as eg:
            # ExceptionGroup can be raised when closing async context managers that use TaskGroup
            # Log each sub-exception at debug level since cleanup errors are expected in some cases
            # (e.g., connection already closed, server unavailable)
            for exc in eg.exceptions:
                logger.debug(f"MCP client cleanup error (suppressed): {type(exc).__name__}: {exc}")

    def to_sync_client(self):
        raise NotImplementedError("Subclasses must implement to_sync_client")