fix(core): handle protocol errors and foreign key violations (#9308)

* fix(core): handle PermissionDeniedError in provider API key validation Fixed OpenAI PermissionDeniedError being raised as unknown error when validating provider API keys. The check_api_key methods in OpenAI-based providers (OpenAI, OpenRouter, Azure, Together) now properly catch and re-raise PermissionDeniedError as LLMPermissionDeniedError. 🐛 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): handle Unicode surrogates in OpenAI requests Sanitize invalid UTF-16 surrogates before sending requests to OpenAI API. Fixes UnicodeEncodeError when message content contains unpaired surrogates from corrupted emoji data or malformed Unicode sequences. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): handle MCP tool schema validation errors gracefully Catch fastmcp.exceptions.ToolError in execute_mcp_tool endpoint and convert to LettaInvalidArgumentError (400) instead of letting it propagate as 500 error. This is an expected user error when tool arguments don't match the MCP tool's schema. Fixes Datadog issue 8f2d874a-f8e5-11f0-9b25-da7ad0900000 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): handle ExceptionGroup-wrapped ToolError in MCP executor When MCP tools fail with validation errors (e.g., missing required parameters), fastmcp raises ToolError exceptions that may be wrapped in ExceptionGroup by Python's async TaskGroup. The exception handler now unwraps single-exception groups before checking if the error should be handled gracefully. Fixes Calendly API "organization parameter missing" errors being logged to Datadog instead of returning friendly error messages to users. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: handle missing agent in create_conversation to prevent foreign key violation * Update .gitignore --------- Co-authored-by: Letta <noreply@letta.com>
2026-02-05 17:28:21 -08:00
parent 85ee7ed7b4
commit be60697a62
8 changed files with 67 additions and 13 deletions
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -67,6 +67,33 @@ from letta.settings import model_settings
 logger = get_logger(__name__)


+def sanitize_unicode_surrogates(obj: Any) -> Any:
+    """Recursively sanitize invalid Unicode surrogates in strings within nested data structures.
+
+    This fixes UnicodeEncodeError when the OpenAI SDK tries to encode requests containing
+    unpaired UTF-16 surrogates (e.g., '\ud83c' without its pair) which can occur in corrupted
+    emoji data or malformed Unicode sequences.
+
+    Args:
+        obj: The object to sanitize (dict, list, str, or other types)
+
+    Returns:
+        The sanitized object with invalid surrogates replaced by the Unicode replacement character
+    """
+    if isinstance(obj, dict):
+        return {k: sanitize_unicode_surrogates(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [sanitize_unicode_surrogates(item) for item in obj]
+    elif isinstance(obj, str):
+        try:
+            obj.encode("utf-8")
+            return obj
+        except UnicodeEncodeError:
+            return obj.encode("utf-8", errors="replace").decode("utf-8")
+    else:
+        return obj
+
+
 def is_openai_reasoning_model(model: str) -> bool:
    """Utility function to check if the model is a 'reasoner'"""

--- a/letta/schemas/providers/azure.py
+++ b/letta/schemas/providers/azure.py
@@ -2,11 +2,11 @@ from collections import defaultdict
 from typing import ClassVar, Literal

 import httpx
-from openai import AsyncAzureOpenAI
+from openai import AsyncAzureOpenAI, AuthenticationError, PermissionDeniedError
 from pydantic import Field, field_validator

 from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
-from letta.errors import ErrorCode, LLMAuthenticationError
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMPermissionDeniedError
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
@@ -65,6 +65,9 @@ class AzureProvider(Provider):

        try:
            models_list = await client.models.list()
+        except (AuthenticationError, PermissionDeniedError):
+            # Re-raise auth/permission errors so they're properly handled upstream
+            raise
        except Exception:
            return []

@@ -176,5 +179,8 @@ class AzureProvider(Provider):

        try:
            await self.list_llm_models_async()
+        except (LLMAuthenticationError, LLMPermissionDeniedError):
+            # Re-raise specific LLM errors as-is
+            raise
        except Exception as e:
            raise LLMAuthenticationError(message=f"Failed to authenticate with Azure: {e}", code=ErrorCode.UNAUTHENTICATED)
--- a/letta/schemas/providers/openai.py
+++ b/letta/schemas/providers/openai.py
@@ -1,10 +1,10 @@
 from typing import Literal

-from openai import AsyncOpenAI, AuthenticationError
+from openai import AsyncOpenAI, AuthenticationError, PermissionDeniedError
 from pydantic import Field

 from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
-from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMError, LLMPermissionDeniedError
 from letta.log import get_logger
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
@@ -38,6 +38,8 @@ class OpenAIProvider(Provider):
            await client.models.list()
        except AuthenticationError as e:
            raise LLMAuthenticationError(message=f"Failed to authenticate with OpenAI: {e}", code=ErrorCode.UNAUTHENTICATED)
+        except PermissionDeniedError as e:
+            raise LLMPermissionDeniedError(message=f"Permission denied by OpenAI: {e}", code=ErrorCode.PERMISSION_DENIED)
        except Exception as e:
            raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)

--- a/letta/schemas/providers/openrouter.py
+++ b/letta/schemas/providers/openrouter.py
@@ -1,9 +1,9 @@
 from typing import Literal

-from openai import AsyncOpenAI, AuthenticationError
+from openai import AsyncOpenAI, AuthenticationError, PermissionDeniedError
 from pydantic import Field

-from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMError, LLMPermissionDeniedError
 from letta.log import get_logger
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
@@ -41,6 +41,8 @@ class OpenRouterProvider(OpenAIProvider):
            await client.models.list()
        except AuthenticationError as e:
            raise LLMAuthenticationError(message=f"Failed to authenticate with OpenRouter: {e}", code=ErrorCode.UNAUTHENTICATED)
+        except PermissionDeniedError as e:
+            raise LLMPermissionDeniedError(message=f"Permission denied by OpenRouter: {e}", code=ErrorCode.PERMISSION_DENIED)
        except Exception as e:
            raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)

--- a/letta/schemas/providers/together.py
+++ b/letta/schemas/providers/together.py
@@ -11,7 +11,7 @@ logger = get_logger(__name__)
 from pydantic import Field

 from letta.constants import MIN_CONTEXT_WINDOW
-from letta.errors import ErrorCode, LLMAuthenticationError
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMPermissionDeniedError
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
@@ -99,5 +99,8 @@ class TogetherProvider(OpenAIProvider):

        try:
            await self.list_llm_models_async()
+        except (LLMAuthenticationError, LLMPermissionDeniedError):
+            # Re-raise specific LLM errors as-is
+            raise
        except Exception as e:
            raise LLMAuthenticationError(message=f"Failed to authenticate with Together: {e}", code=ErrorCode.UNAUTHENTICATED)
--- a/letta/server/rest_api/routers/v1/tools.py
+++ b/letta/server/rest_api/routers/v1/tools.py
@@ -3,6 +3,7 @@ from collections.abc import AsyncGenerator
 from typing import Any, Dict, List, Literal, Optional, Union

 from fastapi import APIRouter, Body, Depends, HTTPException, Query, Request
+from fastmcp.exceptions import ToolError as FastMCPToolError
 from httpx import ConnectError, HTTPStatusError
 from pydantic import BaseModel, Field
 from starlette.responses import StreamingResponse
@@ -819,7 +820,10 @@ async def execute_mcp_tool(
        await client.connect_to_server()

        # Execute the tool
-        result, success = await client.execute_tool(tool_name, request.args)
+        try:
+            result, success = await client.execute_tool(tool_name, request.args)
+        except FastMCPToolError as e:
+            raise LettaInvalidArgumentError(f"Invalid arguments for MCP tool '{tool_name}': {str(e)}", argument_name="args")

        return {
            "result": result,
--- a/letta/services/conversation_manager.py
+++ b/letta/services/conversation_manager.py
@@ -12,7 +12,6 @@ from letta.orm.block import Block as BlockModel
 from letta.orm.blocks_conversations import BlocksConversations
 from letta.orm.conversation import Conversation as ConversationModel
 from letta.orm.conversation_messages import ConversationMessage as ConversationMessageModel
-from letta.orm.errors import NoResultFound
 from letta.orm.message import Message as MessageModel
 from letta.otel.tracing import trace_method
 from letta.schemas.agent import AgentState
@@ -22,6 +21,7 @@ from letta.schemas.letta_message import LettaMessage
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.user import User as PydanticUser
 from letta.server.db import db_registry
+from letta.services.helpers.agent_manager_helper import validate_agent_exists_async
 from letta.utils import enforce_types


@@ -48,6 +48,8 @@ class ConversationManager:
            The created conversation with isolated_block_ids if any were created
        """
        async with db_registry.async_session() as session:
+            # Validate that the agent exists before creating the conversation
+            await validate_agent_exists_async(session, agent_id, actor)
            conversation = ConversationModel(
                agent_id=agent_id,
                summary=conversation_create.summary,
--- a/letta/services/tool_executor/mcp_tool_executor.py
+++ b/letta/services/tool_executor/mcp_tool_executor.py
@@ -65,12 +65,20 @@ class ExternalMCPToolExecutor(ToolExecutor):
            # Check if this is an expected MCP error (ToolError, McpError)
            # These are user-facing errors from the external MCP server (e.g., "No connected account found")
            # We handle them gracefully instead of letting them propagate as exceptions
-            if e.__class__.__name__ in MCP_EXPECTED_ERROR_CLASSES:
-                logger.info(f"MCP tool '{function_name}' returned expected error: {str(e)}")
+
+            # Handle ExceptionGroup wrapping (Python 3.11+ async TaskGroup can wrap exceptions)
+            exception_to_check = e
+            if hasattr(e, "exceptions") and e.exceptions:
+                # If it's an ExceptionGroup with a single wrapped exception, unwrap it
+                if len(e.exceptions) == 1:
+                    exception_to_check = e.exceptions[0]
+
+            if exception_to_check.__class__.__name__ in MCP_EXPECTED_ERROR_CLASSES:
+                logger.info(f"MCP tool '{function_name}' returned expected error: {str(exception_to_check)}")
                error_message = get_friendly_error_msg(
                    function_name=function_name,
-                    exception_name=e.__class__.__name__,
-                    exception_message=str(e),
+                    exception_name=exception_to_check.__class__.__name__,
+                    exception_message=str(exception_to_check),
                )
                return ToolExecutionResult(
                    status="error",