fix: sanitize control characters before sending to inference backends

Fireworks (via Synthetic Direct) chokes on raw ASCII control chars
(0x00-0x1F) in JSON payloads with "Unterminated string" errors.
The existing sanitize_unicode_surrogates only handles U+D800-DFFF.
Now we also strip control chars (preserving tab/newline/CR) at all
4 request paths — sync, async, and both streaming variants.
This commit is contained in:
Ani Tunturi
2026-03-21 20:23:56 -04:00
parent 2b3cfb1797
commit 08d3c26732
2 changed files with 36 additions and 1 deletions

View File

@@ -51,6 +51,37 @@ def sanitize_unicode_surrogates(value: Any) -> Any:
return value
def sanitize_control_characters(value: Any) -> Any:
"""Recursively remove ASCII control characters (0x00-0x1F) from strings,
preserving tab (0x09), newline (0x0A), and carriage return (0x0D).
Some inference backends (e.g. Fireworks AI) perform strict JSON parsing on
the request body and reject payloads containing unescaped control characters.
Python's json.dumps will escape these, but certain proxy layers may
double-parse or re-serialize in ways that expose the raw bytes.
This function sanitizes:
- Strings: strips control characters except whitespace (tab, newline, CR)
- Dicts: recursively sanitizes all string values
- Lists: recursively sanitizes all elements
- Other types: returned as-is
"""
if isinstance(value, str):
return "".join(
char for char in value
if ord(char) >= 0x20 # printable
or char in ("\t", "\n", "\r") # allowed whitespace
)
elif isinstance(value, dict):
return {sanitize_control_characters(k): sanitize_control_characters(v) for k, v in value.items()}
elif isinstance(value, list):
return [sanitize_control_characters(item) for item in value]
elif isinstance(value, tuple):
return tuple(sanitize_control_characters(item) for item in value)
else:
return value
def sanitize_null_bytes(value: Any) -> Any:
"""Recursively remove null bytes (0x00) from strings.

View File

@@ -28,7 +28,7 @@ from letta.errors import (
LLMTimeoutError,
LLMUnprocessableEntityError,
)
from letta.helpers.json_helpers import sanitize_unicode_surrogates
from letta.helpers.json_helpers import sanitize_control_characters, sanitize_unicode_surrogates
from letta.llm_api.error_utils import is_context_window_overflow_message, is_insufficient_credits_message
from letta.llm_api.helpers import (
add_inner_thoughts_to_functions,
@@ -669,6 +669,7 @@ class OpenAIClient(LLMClientBase):
"""
# Sanitize Unicode surrogates to prevent encoding errors
request_data = sanitize_unicode_surrogates(request_data)
request_data = sanitize_control_characters(request_data)
client = OpenAI(**self._prepare_client_kwargs(llm_config))
# Route based on payload shape: Responses uses 'input', Chat Completions uses 'messages'
@@ -694,6 +695,7 @@ class OpenAIClient(LLMClientBase):
"""
# Sanitize Unicode surrogates to prevent encoding errors
request_data = sanitize_unicode_surrogates(request_data)
request_data = sanitize_control_characters(request_data)
kwargs = await self._prepare_client_kwargs_async(llm_config)
client = AsyncOpenAI(**kwargs)
@@ -913,6 +915,7 @@ class OpenAIClient(LLMClientBase):
"""
# Sanitize Unicode surrogates to prevent encoding errors
request_data = sanitize_unicode_surrogates(request_data)
request_data = sanitize_control_characters(request_data)
kwargs = await self._prepare_client_kwargs_async(llm_config)
client = AsyncOpenAI(**kwargs)
@@ -947,6 +950,7 @@ class OpenAIClient(LLMClientBase):
"""
# Sanitize Unicode surrogates to prevent encoding errors
request_data = sanitize_unicode_surrogates(request_data)
request_data = sanitize_control_characters(request_data)
kwargs = await self._prepare_client_kwargs_async(llm_config)
client = AsyncOpenAI(**kwargs)