From d48932bdb632ab2e74e797c4e2c58b5d6fbd1586 Mon Sep 17 00:00:00 2001
From: Kian Jones <11655409+kianjones9@users.noreply.github.com>
Date: Thu, 5 Feb 2026 20:20:34 -0800
Subject: [PATCH] fix(core): sanitize Unicode surrogates in all LLM client
 requests (#9323)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Multiple OpenAI-compatible LLM clients (Azure, Deepseek, Groq, Together, XAI, ZAI)
and Anthropic-compatible clients (Anthropic, MiniMax, Google Vertex) were overriding
request_async/stream_async without calling sanitize_unicode_surrogates, causing
UnicodeEncodeError when message content contained lone UTF-16 surrogates.

Root cause: Child classes override parent methods but omit the sanitization step that
the base OpenAIClient includes. This allows corrupted Unicode (unpaired surrogates
from malformed emoji) to reach the httpx layer, which rejects it during UTF-8 encoding.

Fix: Import and call sanitize_unicode_surrogates in all overridden request methods.
Also removed duplicate sanitize_unicode_surrogates definition from openai_client.py
that shadowed the canonical implementation in letta.helpers.json_helpers.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

Issue-ID: 10c0f2e4-f87b-11f0-b91c-da7ad0900000
---
 letta/llm_api/anthropic_client.py     |  5 +++++
 letta/llm_api/azure_client.py         |  3 +++
 letta/llm_api/chatgpt_oauth_client.py |  5 +++++
 letta/llm_api/deepseek_client.py      |  5 +++++
 letta/llm_api/google_vertex_client.py |  6 +++++-
 letta/llm_api/groq_client.py          |  3 +++
 letta/llm_api/minimax_client.py       |  5 +++++
 letta/llm_api/openai_client.py        | 27 ---------------------------
 letta/llm_api/together_client.py      |  3 +++
 letta/llm_api/xai_client.py           |  5 +++++
 letta/llm_api/zai_client.py           |  5 +++++
 11 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index 383141d9..c645c2ce 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -29,6 +29,7 @@ from letta.errors import (
 )
 from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.decorators import deprecated
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.anthropic_constants import ANTHROPIC_MAX_STRICT_TOOLS, ANTHROPIC_STRICT_MODE_ALLOWLIST
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.llm_client_base import LLMClientBase
@@ -109,6 +110,8 @@ class AnthropicClient(LLMClientBase):
 
     @trace_method
     async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
         betas: list[str] = []
 
@@ -279,6 +282,8 @@ class AnthropicClient(LLMClientBase):
 
     @trace_method
     async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]:
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
         request_data["stream"] = True
 
diff --git a/letta/llm_api/azure_client.py b/letta/llm_api/azure_client.py
index 7ce10b56..3700c2a0 100644
--- a/letta/llm_api/azure_client.py
+++ b/letta/llm_api/azure_client.py
@@ -4,6 +4,7 @@ from typing import List, Optional, Tuple
 from openai import AsyncAzureOpenAI, AzureOpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -49,6 +50,8 @@ class AzureClient(OpenAIClient):
         """
         Performs underlying asynchronous request to OpenAI API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key, base_url, api_version = await self.get_byok_overrides_async(llm_config)
         if not api_key or not base_url or not api_version:
             api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
diff --git a/letta/llm_api/chatgpt_oauth_client.py b/letta/llm_api/chatgpt_oauth_client.py
index 86854c06..47e61eec 100644
--- a/letta/llm_api/chatgpt_oauth_client.py
+++ b/letta/llm_api/chatgpt_oauth_client.py
@@ -39,6 +39,7 @@ from letta.errors import (
     LLMServerError,
     LLMTimeoutError,
 )
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
@@ -356,6 +357,8 @@ class ChatGPTOAuthClient(LLMClientBase):
         Returns:
             Response data in OpenAI ChatCompletion format.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         _, creds = await self._get_provider_and_credentials_async(llm_config)
         headers = self._build_headers(creds)
 
@@ -550,6 +553,8 @@ class ChatGPTOAuthClient(LLMClientBase):
         Returns:
             Async generator yielding ResponseStreamEvent objects.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         _, creds = await self._get_provider_and_credentials_async(llm_config)
         headers = self._build_headers(creds)
 
diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py
index 0703445d..5b2b6ef0 100644
--- a/letta/llm_api/deepseek_client.py
+++ b/letta/llm_api/deepseek_client.py
@@ -5,6 +5,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
@@ -97,6 +98,8 @@ class DeepseekClient(OpenAIClient):
         """
         Performs underlying asynchronous request to OpenAI API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
 
@@ -108,6 +111,8 @@ class DeepseekClient(OpenAIClient):
         """
         Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
         response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index c4c11d3c..56ed0ae1 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -31,7 +31,7 @@ from letta.errors import (
     LLMUnprocessableEntityError,
 )
 from letta.helpers.datetime_helpers import get_utc_time_int
-from letta.helpers.json_helpers import json_dumps, json_loads
+from letta.helpers.json_helpers import json_dumps, json_loads, sanitize_unicode_surrogates
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
 from letta.log import get_logger
@@ -100,6 +100,8 @@ class GoogleVertexClient(LLMClientBase):
         """
         Performs underlying request to llm and returns raw response.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = self._get_client()
 
         # Gemini 2.5 models will often return MALFORMED_FUNCTION_CALL, force a retry
@@ -175,6 +177,8 @@ class GoogleVertexClient(LLMClientBase):
 
     @trace_method
     async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = self._get_client()
 
         try:
diff --git a/letta/llm_api/groq_client.py b/letta/llm_api/groq_client.py
index 34edf622..5f24669d 100644
--- a/letta/llm_api/groq_client.py
+++ b/letta/llm_api/groq_client.py
@@ -5,6 +5,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -74,6 +75,8 @@ class GroqClient(OpenAIClient):
         """
         Performs underlying asynchronous request to Groq API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.groq_api_key or os.environ.get("GROQ_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
 
diff --git a/letta/llm_api/minimax_client.py b/letta/llm_api/minimax_client.py
index 6029f460..79769942 100644
--- a/letta/llm_api/minimax_client.py
+++ b/letta/llm_api/minimax_client.py
@@ -4,6 +4,7 @@ import anthropic
 from anthropic import AsyncStream
 from anthropic.types.beta import BetaMessage, BetaRawMessageStreamEvent
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.anthropic_client import AnthropicClient
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
@@ -83,6 +84,8 @@ class MiniMaxClient(AnthropicClient):
 
         Uses beta messages API for compatibility with Anthropic streaming interfaces.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
 
         try:
@@ -105,6 +108,8 @@ class MiniMaxClient(AnthropicClient):
 
         Uses beta messages API for compatibility with Anthropic streaming interfaces.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
         request_data["stream"] = True
 
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 48201362..9bfc7c5d 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -67,33 +67,6 @@ from letta.settings import model_settings
 logger = get_logger(__name__)
 
 
-def sanitize_unicode_surrogates(obj: Any) -> Any:
-    """Recursively sanitize invalid Unicode surrogates in strings within nested data structures.
-
-    This fixes UnicodeEncodeError when the OpenAI SDK tries to encode requests containing
-    unpaired UTF-16 surrogates (e.g., '\ud83c' without its pair) which can occur in corrupted
-    emoji data or malformed Unicode sequences.
-
-    Args:
-        obj: The object to sanitize (dict, list, str, or other types)
-
-    Returns:
-        The sanitized object with invalid surrogates replaced by the Unicode replacement character
-    """
-    if isinstance(obj, dict):
-        return {k: sanitize_unicode_surrogates(v) for k, v in obj.items()}
-    elif isinstance(obj, list):
-        return [sanitize_unicode_surrogates(item) for item in obj]
-    elif isinstance(obj, str):
-        try:
-            obj.encode("utf-8")
-            return obj
-        except UnicodeEncodeError:
-            return obj.encode("utf-8", errors="replace").decode("utf-8")
-    else:
-        return obj
-
-
 def is_openai_reasoning_model(model: str) -> bool:
     """Utility function to check if the model is a 'reasoner'"""
 
diff --git a/letta/llm_api/together_client.py b/letta/llm_api/together_client.py
index 98ebf768..8117746a 100644
--- a/letta/llm_api/together_client.py
+++ b/letta/llm_api/together_client.py
@@ -4,6 +4,7 @@ from typing import List
 from openai import AsyncOpenAI, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -34,6 +35,8 @@ class TogetherClient(OpenAIClient):
         """
         Performs underlying asynchronous request to OpenAI API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key, _, _ = await self.get_byok_overrides_async(llm_config)
 
         if not api_key:
diff --git a/letta/llm_api/xai_client.py b/letta/llm_api/xai_client.py
index 1085e5a6..07249320 100644
--- a/letta/llm_api/xai_client.py
+++ b/letta/llm_api/xai_client.py
@@ -5,6 +5,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -59,6 +60,8 @@ class XAIClient(OpenAIClient):
         """
         Performs underlying asynchronous request to OpenAI API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
 
@@ -70,6 +73,8 @@ class XAIClient(OpenAIClient):
         """
         Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
         response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
diff --git a/letta/llm_api/zai_client.py b/letta/llm_api/zai_client.py
index c7e3d059..e2625e8a 100644
--- a/letta/llm_api/zai_client.py
+++ b/letta/llm_api/zai_client.py
@@ -4,6 +4,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -80,6 +81,8 @@ class ZAIClient(OpenAIClient):
         """
         Performs underlying asynchronous request to Z.ai API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.zai_api_key
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
 
@@ -91,6 +94,8 @@ class ZAIClient(OpenAIClient):
         """
         Performs underlying asynchronous streaming request to Z.ai and returns the async stream iterator.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.zai_api_key
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
         response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(