diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index 383141d9..c645c2ce 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -29,6 +29,7 @@ from letta.errors import (
 )
 from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.decorators import deprecated
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.anthropic_constants import ANTHROPIC_MAX_STRICT_TOOLS, ANTHROPIC_STRICT_MODE_ALLOWLIST
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
 from letta.llm_api.llm_client_base import LLMClientBase
@@ -109,6 +110,8 @@ class AnthropicClient(LLMClientBase):
 
     @trace_method
     async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
         betas: list[str] = []
 
@@ -279,6 +282,8 @@ class AnthropicClient(LLMClientBase):
 
     @trace_method
     async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]:
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
         request_data["stream"] = True
 
diff --git a/letta/llm_api/azure_client.py b/letta/llm_api/azure_client.py
index 7ce10b56..3700c2a0 100644
--- a/letta/llm_api/azure_client.py
+++ b/letta/llm_api/azure_client.py
@@ -4,6 +4,7 @@ from typing import List, Optional, Tuple
 from openai import AsyncAzureOpenAI, AzureOpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -49,6 +50,8 @@ class AzureClient(OpenAIClient):
         """
         Performs underlying asynchronous request to OpenAI API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key, base_url, api_version = await self.get_byok_overrides_async(llm_config)
         if not api_key or not base_url or not api_version:
             api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
diff --git a/letta/llm_api/chatgpt_oauth_client.py b/letta/llm_api/chatgpt_oauth_client.py
index 86854c06..47e61eec 100644
--- a/letta/llm_api/chatgpt_oauth_client.py
+++ b/letta/llm_api/chatgpt_oauth_client.py
@@ -39,6 +39,7 @@ from letta.errors import (
     LLMServerError,
     LLMTimeoutError,
 )
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
@@ -356,6 +357,8 @@ class ChatGPTOAuthClient(LLMClientBase):
         Returns:
             Response data in OpenAI ChatCompletion format.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         _, creds = await self._get_provider_and_credentials_async(llm_config)
         headers = self._build_headers(creds)
 
@@ -550,6 +553,8 @@ class ChatGPTOAuthClient(LLMClientBase):
         Returns:
             Async generator yielding ResponseStreamEvent objects.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         _, creds = await self._get_provider_and_credentials_async(llm_config)
         headers = self._build_headers(creds)
 
diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py
index 0703445d..5b2b6ef0 100644
--- a/letta/llm_api/deepseek_client.py
+++ b/letta/llm_api/deepseek_client.py
@@ -5,6 +5,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
@@ -97,6 +98,8 @@ class DeepseekClient(OpenAIClient):
         """
         Performs underlying asynchronous request to OpenAI API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
 
@@ -108,6 +111,8 @@ class DeepseekClient(OpenAIClient):
         """
         Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.deepseek_api_key or os.environ.get("DEEPSEEK_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
         response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index c4c11d3c..56ed0ae1 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -31,7 +31,7 @@ from letta.errors import (
     LLMUnprocessableEntityError,
 )
 from letta.helpers.datetime_helpers import get_utc_time_int
-from letta.helpers.json_helpers import json_dumps, json_loads
+from letta.helpers.json_helpers import json_dumps, json_loads, sanitize_unicode_surrogates
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
 from letta.log import get_logger
@@ -100,6 +100,8 @@ class GoogleVertexClient(LLMClientBase):
         """
         Performs underlying request to llm and returns raw response.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = self._get_client()
 
         # Gemini 2.5 models will often return MALFORMED_FUNCTION_CALL, force a retry
@@ -175,6 +177,8 @@ class GoogleVertexClient(LLMClientBase):
 
     @trace_method
     async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = self._get_client()
 
         try:
diff --git a/letta/llm_api/groq_client.py b/letta/llm_api/groq_client.py
index 34edf622..5f24669d 100644
--- a/letta/llm_api/groq_client.py
+++ b/letta/llm_api/groq_client.py
@@ -5,6 +5,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -74,6 +75,8 @@ class GroqClient(OpenAIClient):
         """
         Performs underlying asynchronous request to Groq API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.groq_api_key or os.environ.get("GROQ_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
 
diff --git a/letta/llm_api/minimax_client.py b/letta/llm_api/minimax_client.py
index 6029f460..79769942 100644
--- a/letta/llm_api/minimax_client.py
+++ b/letta/llm_api/minimax_client.py
@@ -4,6 +4,7 @@ import anthropic
 from anthropic import AsyncStream
 from anthropic.types.beta import BetaMessage, BetaRawMessageStreamEvent
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.anthropic_client import AnthropicClient
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
@@ -83,6 +84,8 @@ class MiniMaxClient(AnthropicClient):
 
         Uses beta messages API for compatibility with Anthropic streaming interfaces.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
 
         try:
@@ -105,6 +108,8 @@ class MiniMaxClient(AnthropicClient):
 
         Uses beta messages API for compatibility with Anthropic streaming interfaces.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         client = await self._get_anthropic_client_async(llm_config, async_client=True)
         request_data["stream"] = True
 
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index 48201362..9bfc7c5d 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -67,33 +67,6 @@ from letta.settings import model_settings
 logger = get_logger(__name__)
 
 
-def sanitize_unicode_surrogates(obj: Any) -> Any:
-    """Recursively sanitize invalid Unicode surrogates in strings within nested data structures.
-
-    This fixes UnicodeEncodeError when the OpenAI SDK tries to encode requests containing
-    unpaired UTF-16 surrogates (e.g., '\ud83c' without its pair) which can occur in corrupted
-    emoji data or malformed Unicode sequences.
-
-    Args:
-        obj: The object to sanitize (dict, list, str, or other types)
-
-    Returns:
-        The sanitized object with invalid surrogates replaced by the Unicode replacement character
-    """
-    if isinstance(obj, dict):
-        return {k: sanitize_unicode_surrogates(v) for k, v in obj.items()}
-    elif isinstance(obj, list):
-        return [sanitize_unicode_surrogates(item) for item in obj]
-    elif isinstance(obj, str):
-        try:
-            obj.encode("utf-8")
-            return obj
-        except UnicodeEncodeError:
-            return obj.encode("utf-8", errors="replace").decode("utf-8")
-    else:
-        return obj
-
-
 def is_openai_reasoning_model(model: str) -> bool:
     """Utility function to check if the model is a 'reasoner'"""
 
diff --git a/letta/llm_api/together_client.py b/letta/llm_api/together_client.py
index 98ebf768..8117746a 100644
--- a/letta/llm_api/together_client.py
+++ b/letta/llm_api/together_client.py
@@ -4,6 +4,7 @@ from typing import List
 from openai import AsyncOpenAI, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -34,6 +35,8 @@ class TogetherClient(OpenAIClient):
         """
         Performs underlying asynchronous request to OpenAI API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key, _, _ = await self.get_byok_overrides_async(llm_config)
 
         if not api_key:
diff --git a/letta/llm_api/xai_client.py b/letta/llm_api/xai_client.py
index 1085e5a6..07249320 100644
--- a/letta/llm_api/xai_client.py
+++ b/letta/llm_api/xai_client.py
@@ -5,6 +5,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -59,6 +60,8 @@ class XAIClient(OpenAIClient):
         """
         Performs underlying asynchronous request to OpenAI API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
 
@@ -70,6 +73,8 @@ class XAIClient(OpenAIClient):
         """
         Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.xai_api_key or os.environ.get("XAI_API_KEY")
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
         response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
diff --git a/letta/llm_api/zai_client.py b/letta/llm_api/zai_client.py
index c7e3d059..e2625e8a 100644
--- a/letta/llm_api/zai_client.py
+++ b/letta/llm_api/zai_client.py
@@ -4,6 +4,7 @@ from openai import AsyncOpenAI, AsyncStream, OpenAI
 from openai.types.chat.chat_completion import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 
+from letta.helpers.json_helpers import sanitize_unicode_surrogates
 from letta.llm_api.openai_client import OpenAIClient
 from letta.otel.tracing import trace_method
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -80,6 +81,8 @@ class ZAIClient(OpenAIClient):
         """
         Performs underlying asynchronous request to Z.ai API and returns raw response dict.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.zai_api_key
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
 
@@ -91,6 +94,8 @@ class ZAIClient(OpenAIClient):
         """
         Performs underlying asynchronous streaming request to Z.ai and returns the async stream iterator.
         """
+        request_data = sanitize_unicode_surrogates(request_data)
+
         api_key = model_settings.zai_api_key
         client = AsyncOpenAI(api_key=api_key, base_url=llm_config.model_endpoint)
         response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(