From be60697a62e441b9f44b03824e7a21cb62d78b91 Mon Sep 17 00:00:00 2001
From: Kian Jones <11655409+kianjones9@users.noreply.github.com>
Date: Thu, 5 Feb 2026 17:28:21 -0800
Subject: [PATCH] fix(core): handle protocol errors and foreign key violations
 (#9308)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(core): handle PermissionDeniedError in provider API key validation

Fixed OpenAI PermissionDeniedError being raised as unknown error when
validating provider API keys. The check_api_key methods in OpenAI-based
providers (OpenAI, OpenRouter, Azure, Together) now properly catch and
re-raise PermissionDeniedError as LLMPermissionDeniedError.

🐛 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): handle Unicode surrogates in OpenAI requests

Sanitize invalid UTF-16 surrogates before sending requests to OpenAI API.
Fixes UnicodeEncodeError when message content contains unpaired surrogates
from corrupted emoji data or malformed Unicode sequences.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): handle MCP tool schema validation errors gracefully

Catch fastmcp.exceptions.ToolError in execute_mcp_tool endpoint and
convert to LettaInvalidArgumentError (400) instead of letting it
propagate as 500 error. This is an expected user error when tool
arguments don't match the MCP tool's schema.

Fixes Datadog issue 8f2d874a-f8e5-11f0-9b25-da7ad0900000

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): handle ExceptionGroup-wrapped ToolError in MCP executor

When MCP tools fail with validation errors (e.g., missing required parameters),
fastmcp raises ToolError exceptions that may be wrapped in ExceptionGroup by
Python's async TaskGroup. The exception handler now unwraps single-exception
groups before checking if the error should be handled gracefully.

Fixes Calendly API "organization parameter missing" errors being logged to
Datadog instead of returning friendly error messages to users.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix: handle missing agent in create_conversation to prevent foreign key violation

* Update .gitignore

---------

Co-authored-by: Letta <noreply@letta.com>
---
 letta/llm_api/openai_client.py                | 27 +++++++++++++++++++
 letta/schemas/providers/azure.py              | 10 +++++--
 letta/schemas/providers/openai.py             |  6 +++--
 letta/schemas/providers/openrouter.py         |  6 +++--
 letta/schemas/providers/together.py           |  5 +++-
 letta/server/rest_api/routers/v1/tools.py     |  6 ++++-
 letta/services/conversation_manager.py        |  4 ++-
 .../tool_executor/mcp_tool_executor.py        | 16 ++++++++---
 8 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 44aec1ff..991e8d84 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -67,6 +67,33 @@ from letta.settings import model_settings
 logger = get_logger(__name__)
 
 
+def sanitize_unicode_surrogates(obj: Any) -> Any:
+    """Recursively sanitize invalid Unicode surrogates in strings within nested data structures.
+
+    This fixes UnicodeEncodeError when the OpenAI SDK tries to encode requests containing
+    unpaired UTF-16 surrogates (e.g., '\ud83c' without its pair) which can occur in corrupted
+    emoji data or malformed Unicode sequences.
+
+    Args:
+        obj: The object to sanitize (dict, list, str, or other types)
+
+    Returns:
+        The sanitized object with invalid surrogates replaced by the Unicode replacement character
+    """
+    if isinstance(obj, dict):
+        return {k: sanitize_unicode_surrogates(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [sanitize_unicode_surrogates(item) for item in obj]
+    elif isinstance(obj, str):
+        try:
+            obj.encode("utf-8")
+            return obj
+        except UnicodeEncodeError:
+            return obj.encode("utf-8", errors="replace").decode("utf-8")
+    else:
+        return obj
+
+
 def is_openai_reasoning_model(model: str) -> bool:
     """Utility function to check if the model is a 'reasoner'"""
 
diff --git a/letta/schemas/providers/azure.py b/letta/schemas/providers/azure.py
index da074420..b9ef7560 100644
--- a/letta/schemas/providers/azure.py
+++ b/letta/schemas/providers/azure.py
@@ -2,11 +2,11 @@ from collections import defaultdict
 from typing import ClassVar, Literal
 
 import httpx
-from openai import AsyncAzureOpenAI
+from openai import AsyncAzureOpenAI, AuthenticationError, PermissionDeniedError
 from pydantic import Field, field_validator
 
 from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
-from letta.errors import ErrorCode, LLMAuthenticationError
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMPermissionDeniedError
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
@@ -65,6 +65,9 @@ class AzureProvider(Provider):
 
         try:
             models_list = await client.models.list()
+        except (AuthenticationError, PermissionDeniedError):
+            # Re-raise auth/permission errors so they're properly handled upstream
+            raise
         except Exception:
             return []
 
@@ -176,5 +179,8 @@ class AzureProvider(Provider):
 
         try:
             await self.list_llm_models_async()
+        except (LLMAuthenticationError, LLMPermissionDeniedError):
+            # Re-raise specific LLM errors as-is
+            raise
         except Exception as e:
             raise LLMAuthenticationError(message=f"Failed to authenticate with Azure: {e}", code=ErrorCode.UNAUTHENTICATED)
diff --git a/letta/schemas/providers/openai.py b/letta/schemas/providers/openai.py
index a1bdbb26..bcbe633c 100644
--- a/letta/schemas/providers/openai.py
+++ b/letta/schemas/providers/openai.py
@@ -1,10 +1,10 @@
 from typing import Literal
 
-from openai import AsyncOpenAI, AuthenticationError
+from openai import AsyncOpenAI, AuthenticationError, PermissionDeniedError
 from pydantic import Field
 
 from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
-from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMError, LLMPermissionDeniedError
 from letta.log import get_logger
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
@@ -38,6 +38,8 @@ class OpenAIProvider(Provider):
             await client.models.list()
         except AuthenticationError as e:
             raise LLMAuthenticationError(message=f"Failed to authenticate with OpenAI: {e}", code=ErrorCode.UNAUTHENTICATED)
+        except PermissionDeniedError as e:
+            raise LLMPermissionDeniedError(message=f"Permission denied by OpenAI: {e}", code=ErrorCode.PERMISSION_DENIED)
         except Exception as e:
             raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
 
diff --git a/letta/schemas/providers/openrouter.py b/letta/schemas/providers/openrouter.py
index 7f1ba419..87fcc6a6 100644
--- a/letta/schemas/providers/openrouter.py
+++ b/letta/schemas/providers/openrouter.py
@@ -1,9 +1,9 @@
 from typing import Literal
 
-from openai import AsyncOpenAI, AuthenticationError
+from openai import AsyncOpenAI, AuthenticationError, PermissionDeniedError
 from pydantic import Field
 
-from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMError, LLMPermissionDeniedError
 from letta.log import get_logger
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
@@ -41,6 +41,8 @@ class OpenRouterProvider(OpenAIProvider):
             await client.models.list()
         except AuthenticationError as e:
             raise LLMAuthenticationError(message=f"Failed to authenticate with OpenRouter: {e}", code=ErrorCode.UNAUTHENTICATED)
+        except PermissionDeniedError as e:
+            raise LLMPermissionDeniedError(message=f"Permission denied by OpenRouter: {e}", code=ErrorCode.PERMISSION_DENIED)
         except Exception as e:
             raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
 
diff --git a/letta/schemas/providers/together.py b/letta/schemas/providers/together.py
index 013afffe..00dee3dc 100644
--- a/letta/schemas/providers/together.py
+++ b/letta/schemas/providers/together.py
@@ -11,7 +11,7 @@ logger = get_logger(__name__)
 from pydantic import Field
 
 from letta.constants import MIN_CONTEXT_WINDOW
-from letta.errors import ErrorCode, LLMAuthenticationError
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMPermissionDeniedError
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
@@ -99,5 +99,8 @@ class TogetherProvider(OpenAIProvider):
 
         try:
             await self.list_llm_models_async()
+        except (LLMAuthenticationError, LLMPermissionDeniedError):
+            # Re-raise specific LLM errors as-is
+            raise
         except Exception as e:
             raise LLMAuthenticationError(message=f"Failed to authenticate with Together: {e}", code=ErrorCode.UNAUTHENTICATED)
diff --git a/letta/server/rest_api/routers/v1/tools.py b/letta/server/rest_api/routers/v1/tools.py
index 6dc1dde3..2c656841 100644
--- a/letta/server/rest_api/routers/v1/tools.py
+++ b/letta/server/rest_api/routers/v1/tools.py
@@ -3,6 +3,7 @@ from collections.abc import AsyncGenerator
 from typing import Any, Dict, List, Literal, Optional, Union
 
 from fastapi import APIRouter, Body, Depends, HTTPException, Query, Request
+from fastmcp.exceptions import ToolError as FastMCPToolError
 from httpx import ConnectError, HTTPStatusError
 from pydantic import BaseModel, Field
 from starlette.responses import StreamingResponse
@@ -819,7 +820,10 @@ async def execute_mcp_tool(
         await client.connect_to_server()
 
         # Execute the tool
-        result, success = await client.execute_tool(tool_name, request.args)
+        try:
+            result, success = await client.execute_tool(tool_name, request.args)
+        except FastMCPToolError as e:
+            raise LettaInvalidArgumentError(f"Invalid arguments for MCP tool '{tool_name}': {str(e)}", argument_name="args")
 
         return {
             "result": result,
diff --git a/letta/services/conversation_manager.py b/letta/services/conversation_manager.py
index 101598fb..bce75ac5 100644
--- a/letta/services/conversation_manager.py
+++ b/letta/services/conversation_manager.py
@@ -12,7 +12,6 @@ from letta.orm.block import Block as BlockModel
 from letta.orm.blocks_conversations import BlocksConversations
 from letta.orm.conversation import Conversation as ConversationModel
 from letta.orm.conversation_messages import ConversationMessage as ConversationMessageModel
-from letta.orm.errors import NoResultFound
 from letta.orm.message import Message as MessageModel
 from letta.otel.tracing import trace_method
 from letta.schemas.agent import AgentState
@@ -22,6 +21,7 @@ from letta.schemas.letta_message import LettaMessage
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.user import User as PydanticUser
 from letta.server.db import db_registry
+from letta.services.helpers.agent_manager_helper import validate_agent_exists_async
 from letta.utils import enforce_types
 
 
@@ -48,6 +48,8 @@ class ConversationManager:
             The created conversation with isolated_block_ids if any were created
         """
         async with db_registry.async_session() as session:
+            # Validate that the agent exists before creating the conversation
+            await validate_agent_exists_async(session, agent_id, actor)
             conversation = ConversationModel(
                 agent_id=agent_id,
                 summary=conversation_create.summary,
diff --git a/letta/services/tool_executor/mcp_tool_executor.py b/letta/services/tool_executor/mcp_tool_executor.py
index 3ea16161..b830dc01 100644
--- a/letta/services/tool_executor/mcp_tool_executor.py
+++ b/letta/services/tool_executor/mcp_tool_executor.py
@@ -65,12 +65,20 @@ class ExternalMCPToolExecutor(ToolExecutor):
             # Check if this is an expected MCP error (ToolError, McpError)
             # These are user-facing errors from the external MCP server (e.g., "No connected account found")
             # We handle them gracefully instead of letting them propagate as exceptions
-            if e.__class__.__name__ in MCP_EXPECTED_ERROR_CLASSES:
-                logger.info(f"MCP tool '{function_name}' returned expected error: {str(e)}")
+
+            # Handle ExceptionGroup wrapping (Python 3.11+ async TaskGroup can wrap exceptions)
+            exception_to_check = e
+            if hasattr(e, "exceptions") and e.exceptions:
+                # If it's an ExceptionGroup with a single wrapped exception, unwrap it
+                if len(e.exceptions) == 1:
+                    exception_to_check = e.exceptions[0]
+
+            if exception_to_check.__class__.__name__ in MCP_EXPECTED_ERROR_CLASSES:
+                logger.info(f"MCP tool '{function_name}' returned expected error: {str(exception_to_check)}")
                 error_message = get_friendly_error_msg(
                     function_name=function_name,
-                    exception_name=e.__class__.__name__,
-                    exception_message=str(e),
+                    exception_name=exception_to_check.__class__.__name__,
+                    exception_message=str(exception_to_check),
                 )
                 return ToolExecutionResult(
                     status="error",