diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 383141d9..c645c2ce 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -29,6 +29,7 @@ from letta.errors import ( ) from letta.helpers.datetime_helpers import get_utc_time_int from letta.helpers.decorators import deprecated +from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.anthropic_constants import ANTHROPIC_MAX_STRICT_TOOLS, ANTHROPIC_STRICT_MODE_ALLOWLIST from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs from letta.llm_api.llm_client_base import LLMClientBase @@ -109,6 +110,8 @@ class AnthropicClient(LLMClientBase): @trace_method async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict: + request_data = sanitize_unicode_surrogates(request_data) + client = await self._get_anthropic_client_async(llm_config, async_client=True) betas: list[str] = [] @@ -279,6 +282,8 @@ class AnthropicClient(LLMClientBase): @trace_method async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]: + request_data = sanitize_unicode_surrogates(request_data) + client = await self._get_anthropic_client_async(llm_config, async_client=True) request_data["stream"] = True diff --git a/letta/llm_api/azure_client.py b/letta/llm_api/azure_client.py index 7ce10b56..3700c2a0 100644 --- a/letta/llm_api/azure_client.py +++ b/letta/llm_api/azure_client.py @@ -4,6 +4,7 @@ from typing import List, Optional, Tuple from openai import AsyncAzureOpenAI, AzureOpenAI from openai.types.chat.chat_completion import ChatCompletion +from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.openai_client import OpenAIClient from letta.otel.tracing import trace_method from letta.schemas.embedding_config import EmbeddingConfig @@ -49,6 +50,8 @@ class AzureClient(OpenAIClient): """ Performs underlying asynchronous request to OpenAI API and returns raw response dict. """ + request_data = sanitize_unicode_surrogates(request_data) + api_key, base_url, api_version = await self.get_byok_overrides_async(llm_config) if not api_key or not base_url or not api_version: api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY") diff --git a/letta/llm_api/chatgpt_oauth_client.py b/letta/llm_api/chatgpt_oauth_client.py index 86854c06..47e61eec 100644 --- a/letta/llm_api/chatgpt_oauth_client.py +++ b/letta/llm_api/chatgpt_oauth_client.py @@ -39,6 +39,7 @@ from letta.errors import ( LLMServerError, LLMTimeoutError, ) +from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.llm_client_base import LLMClientBase from letta.log import get_logger from letta.otel.tracing import trace_method @@ -356,6 +357,8 @@ class ChatGPTOAuthClient(LLMClientBase): Returns: Response data in OpenAI ChatCompletion format. """ + request_data = sanitize_unicode_surrogates(request_data) + _, creds = await self._get_provider_and_credentials_async(llm_config) headers = self._build_headers(creds) @@ -550,6 +553,8 @@ class ChatGPTOAuthClient(LLMClientBase): Returns: Async generator yielding ResponseStreamEvent objects. """ + request_data = sanitize_unicode_surrogates(request_data) + _, creds = await self._get_provider_and_credentials_async(llm_config) headers = self._build_headers(creds) diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py index 0703445d..5b2b6ef0 100644 --- a/letta/llm_api/deepseek_client.py +++ b/letta/llm_api/deepseek_client.py @@ -5,6 +5,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion_chunk import ChatCompletionChunk +from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.openai_client import OpenAIClient from letta.log import get_logger from letta.otel.tracing import trace_method @@ -97,6 +98,8 @@ class DeepseekClient(OpenAIClient): """ Performs underlying asynchronous request to OpenAI API and returns raw response dict. """ + request_data = sanitize_unicode_surrogates(request_data) + api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY") client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint) @@ -108,6 +111,8 @@ class DeepseekClient(OpenAIClient): """ Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator. """ + request_data = sanitize_unicode_surrogates(request_data) + api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY") client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint) response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create( diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index c4c11d3c..56ed0ae1 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -31,7 +31,7 @@ from letta.errors import ( LLMUnprocessableEntityError, ) from letta.helpers.datetime_helpers import get_utc_time_int -from letta.helpers.json_helpers import json_dumps, json_loads +from letta.helpers.json_helpers import json_dumps, json_loads, sanitize_unicode_surrogates from letta.llm_api.llm_client_base import LLMClientBase from letta.local_llm.json_parser import clean_json_string_extra_backslash from letta.log import get_logger @@ -100,6 +100,8 @@ class GoogleVertexClient(LLMClientBase): """ Performs underlying request to llm and returns raw response. """ + request_data = sanitize_unicode_surrogates(request_data) + client = self._get_client() # Gemini 2.5 models will often return MALFORMED_FUNCTION_CALL, force a retry @@ -175,6 +177,8 @@ class GoogleVertexClient(LLMClientBase): @trace_method async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]: + request_data = sanitize_unicode_surrogates(request_data) + client = self._get_client() try: diff --git a/letta/llm_api/groq_client.py b/letta/llm_api/groq_client.py index 34edf622..5f24669d 100644 --- a/letta/llm_api/groq_client.py +++ b/letta/llm_api/groq_client.py @@ -5,6 +5,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion_chunk import ChatCompletionChunk +from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.openai_client import OpenAIClient from letta.otel.tracing import trace_method from letta.schemas.embedding_config import EmbeddingConfig @@ -74,6 +75,8 @@ class GroqClient(OpenAIClient): """ Performs underlying asynchronous request to Groq API and returns raw response dict. """ + request_data = sanitize_unicode_surrogates(request_data) + api_key = model_settings.groq_api_key or os.environ.get("GROQ_API_KEY") client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint) diff --git a/letta/llm_api/minimax_client.py b/letta/llm_api/minimax_client.py index 6029f460..79769942 100644 --- a/letta/llm_api/minimax_client.py +++ b/letta/llm_api/minimax_client.py @@ -4,6 +4,7 @@ import anthropic from anthropic import AsyncStream from anthropic.types.beta import BetaMessage, BetaRawMessageStreamEvent +from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.anthropic_client import AnthropicClient from letta.log import get_logger from letta.otel.tracing import trace_method @@ -83,6 +84,8 @@ class MiniMaxClient(AnthropicClient): Uses beta messages API for compatibility with Anthropic streaming interfaces. """ + request_data = sanitize_unicode_surrogates(request_data) + client = await self._get_anthropic_client_async(llm_config, async_client=True) try: @@ -105,6 +108,8 @@ class MiniMaxClient(AnthropicClient): Uses beta messages API for compatibility with Anthropic streaming interfaces. """ + request_data = sanitize_unicode_surrogates(request_data) + client = await self._get_anthropic_client_async(llm_config, async_client=True) request_data["stream"] = True diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 48201362..9bfc7c5d 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -67,33 +67,6 @@ from letta.settings import model_settings logger = get_logger(__name__) -def sanitize_unicode_surrogates(obj: Any) -> Any: - """Recursively sanitize invalid Unicode surrogates in strings within nested data structures. - - This fixes UnicodeEncodeError when the OpenAI SDK tries to encode requests containing - unpaired UTF-16 surrogates (e.g., '\ud83c' without its pair) which can occur in corrupted - emoji data or malformed Unicode sequences. - - Args: - obj: The object to sanitize (dict, list, str, or other types) - - Returns: - The sanitized object with invalid surrogates replaced by the Unicode replacement character - """ - if isinstance(obj, dict): - return {k: sanitize_unicode_surrogates(v) for k, v in obj.items()} - elif isinstance(obj, list): - return [sanitize_unicode_surrogates(item) for item in obj] - elif isinstance(obj, str): - try: - obj.encode("utf-8") - return obj - except UnicodeEncodeError: - return obj.encode("utf-8", errors="replace").decode("utf-8") - else: - return obj - - def is_openai_reasoning_model(model: str) -> bool: """Utility function to check if the model is a 'reasoner'""" diff --git a/letta/llm_api/together_client.py b/letta/llm_api/together_client.py index 98ebf768..8117746a 100644 --- a/letta/llm_api/together_client.py +++ b/letta/llm_api/together_client.py @@ -4,6 +4,7 @@ from typing import List from openai import AsyncOpenAI, OpenAI from openai.types.chat.chat_completion import ChatCompletion +from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.openai_client import OpenAIClient from letta.otel.tracing import trace_method from letta.schemas.embedding_config import EmbeddingConfig @@ -34,6 +35,8 @@ class TogetherClient(OpenAIClient): """ Performs underlying asynchronous request to OpenAI API and returns raw response dict. """ + request_data = sanitize_unicode_surrogates(request_data) + api_key, _, _ = await self.get_byok_overrides_async(llm_config) if not api_key: diff --git a/letta/llm_api/xai_client.py b/letta/llm_api/xai_client.py index 1085e5a6..07249320 100644 --- a/letta/llm_api/xai_client.py +++ b/letta/llm_api/xai_client.py @@ -5,6 +5,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion_chunk import ChatCompletionChunk +from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.openai_client import OpenAIClient from letta.otel.tracing import trace_method from letta.schemas.embedding_config import EmbeddingConfig @@ -59,6 +60,8 @@ class XAIClient(OpenAIClient): """ Performs underlying asynchronous request to OpenAI API and returns raw response dict. """ + request_data = sanitize_unicode_surrogates(request_data) + api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY") client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint) @@ -70,6 +73,8 @@ class XAIClient(OpenAIClient): """ Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator. """ + request_data = sanitize_unicode_surrogates(request_data) + api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY") client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint) response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create( diff --git a/letta/llm_api/zai_client.py b/letta/llm_api/zai_client.py index c7e3d059..e2625e8a 100644 --- a/letta/llm_api/zai_client.py +++ b/letta/llm_api/zai_client.py @@ -4,6 +4,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion_chunk import ChatCompletionChunk +from letta.helpers.json_helpers import sanitize_unicode_surrogates from letta.llm_api.openai_client import OpenAIClient from letta.otel.tracing import trace_method from letta.schemas.embedding_config import EmbeddingConfig @@ -80,6 +81,8 @@ class ZAIClient(OpenAIClient): """ Performs underlying asynchronous request to Z.ai API and returns raw response dict. """ + request_data = sanitize_unicode_surrogates(request_data) + api_key = model_settings.zai_api_key client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint) @@ -91,6 +94,8 @@ class ZAIClient(OpenAIClient): """ Performs underlying asynchronous streaming request to Z.ai and returns the async stream iterator. """ + request_data = sanitize_unicode_surrogates(request_data) + api_key = model_settings.zai_api_key client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint) response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(