fix(mcp): handle MCP tool errors gracefully to prevent Datadog alerts (#8687)
MCP tool errors (ToolError, McpError) are expected user-facing errors from external MCP servers (e.g., "No connected account found"). These were propagating through @trace_method decorator and being recorded as errors in Datadog APM. Changes: - Add try/except to catch expected MCP errors in ExternalMCPToolExecutor - Return ToolExecutionResult with status="error" instead of re-raising - Log expected errors at INFO level instead of letting them trace as ERROR - Remove stray 'pass' statement that was a no-op Fixes #8685 🤖 Generated with [Letta Code](https://letta.com) Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: datadog-official[bot] <datadog-official[bot]@users.noreply.github.com> Co-authored-by: Kian Jones <11655409+kianjones9@users.noreply.github.com>
This commit is contained in:
committed by
Sarah Wooders
parent
81b5d71889
commit
940d648d42
@@ -1,6 +1,7 @@
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from letta.constants import MCP_TOOL_TAG_NAME_PREFIX
|
||||
from letta.log import get_logger
|
||||
from letta.otel.tracing import trace_method
|
||||
from letta.schemas.agent import AgentState
|
||||
from letta.schemas.sandbox_config import SandboxConfig
|
||||
@@ -9,6 +10,13 @@ from letta.schemas.tool_execution_result import ToolExecutionResult
|
||||
from letta.schemas.user import User
|
||||
from letta.services.mcp_manager import MCPManager
|
||||
from letta.services.tool_executor.tool_executor_base import ToolExecutor
|
||||
from letta.utils import get_friendly_error_msg
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# MCP error class names that represent expected user-facing errors
|
||||
# These are checked by class name to avoid import dependencies on fastmcp/mcp packages
|
||||
MCP_EXPECTED_ERROR_CLASSES = {"McpError", "ToolError"}
|
||||
|
||||
|
||||
class ExternalMCPToolExecutor(ToolExecutor):
|
||||
@@ -25,8 +33,6 @@ class ExternalMCPToolExecutor(ToolExecutor):
|
||||
sandbox_config: Optional[SandboxConfig] = None,
|
||||
sandbox_env_vars: Optional[Dict[str, Any]] = None,
|
||||
) -> ToolExecutionResult:
|
||||
pass
|
||||
|
||||
mcp_server_tag = [tag for tag in tool.tags if tag.startswith(f"{MCP_TOOL_TAG_NAME_PREFIX}:")]
|
||||
if not mcp_server_tag:
|
||||
raise ValueError(f"Tool {tool.name} does not have a valid MCP server tag")
|
||||
@@ -41,16 +47,34 @@ class ExternalMCPToolExecutor(ToolExecutor):
|
||||
environment_variables = agent_state.get_agent_env_vars_as_dict()
|
||||
agent_id = agent_state.id
|
||||
|
||||
function_response, success = await mcp_manager.execute_mcp_server_tool(
|
||||
mcp_server_name=mcp_server_name,
|
||||
tool_name=function_name,
|
||||
tool_args=function_args,
|
||||
environment_variables=environment_variables,
|
||||
actor=actor,
|
||||
agent_id=agent_id,
|
||||
)
|
||||
try:
|
||||
function_response, success = await mcp_manager.execute_mcp_server_tool(
|
||||
mcp_server_name=mcp_server_name,
|
||||
tool_name=function_name,
|
||||
tool_args=function_args,
|
||||
environment_variables=environment_variables,
|
||||
actor=actor,
|
||||
agent_id=agent_id,
|
||||
)
|
||||
|
||||
return ToolExecutionResult(
|
||||
status="success" if success else "error",
|
||||
func_return=function_response,
|
||||
)
|
||||
return ToolExecutionResult(
|
||||
status="success" if success else "error",
|
||||
func_return=function_response,
|
||||
)
|
||||
except Exception as e:
|
||||
# Check if this is an expected MCP error (ToolError, McpError)
|
||||
# These are user-facing errors from the external MCP server (e.g., "No connected account found")
|
||||
# We handle them gracefully instead of letting them propagate as exceptions
|
||||
if e.__class__.__name__ in MCP_EXPECTED_ERROR_CLASSES:
|
||||
logger.info(f"MCP tool '{function_name}' returned expected error: {str(e)}")
|
||||
error_message = get_friendly_error_msg(
|
||||
function_name=function_name,
|
||||
exception_name=e.__class__.__name__,
|
||||
exception_message=str(e),
|
||||
)
|
||||
return ToolExecutionResult(
|
||||
status="error",
|
||||
func_return=error_message,
|
||||
)
|
||||
# Re-raise unexpected errors
|
||||
raise
|
||||
|
||||
Reference in New Issue
Block a user