Multiple OpenAI-compatible LLM clients (Azure, Deepseek, Groq, Together, XAI, ZAI) and Anthropic-compatible clients (Anthropic, MiniMax, Google Vertex) were overriding request_async/stream_async without calling sanitize_unicode_surrogates, causing UnicodeEncodeError when message content contained lone UTF-16 surrogates. Root cause: Child classes override parent methods but omit the sanitization step that the base OpenAIClient includes. This allows corrupted Unicode (unpaired surrogates from malformed emoji) to reach the httpx layer, which rejects it during UTF-8 encoding. Fix: Import and call sanitize_unicode_surrogates in all overridden request methods. Also removed duplicate sanitize_unicode_surrogates definition from openai_client.py that shadowed the canonical implementation in letta.helpers.json_helpers. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> Issue-ID: 10c0f2e4-f87b-11f0-b91c-da7ad0900000
181 lines
7.2 KiB
Python
181 lines
7.2 KiB
Python
from typing import List, Optional, Union
|
|
|
|
import anthropic
|
|
from anthropic import AsyncStream
|
|
from anthropic.types.beta import BetaMessage, BetaRawMessageStreamEvent
|
|
|
|
from letta.helpers.json_helpers import sanitize_unicode_surrogates
|
|
from letta.llm_api.anthropic_client import AnthropicClient
|
|
from letta.log import get_logger
|
|
from letta.otel.tracing import trace_method
|
|
from letta.schemas.agent import AgentType
|
|
from letta.schemas.llm_config import LLMConfig
|
|
from letta.schemas.message import Message as PydanticMessage
|
|
from letta.settings import model_settings
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class MiniMaxClient(AnthropicClient):
|
|
"""
|
|
MiniMax LLM client using Anthropic-compatible API.
|
|
|
|
Uses the beta messages API to ensure compatibility with Anthropic streaming interfaces.
|
|
Temperature must be in range (0.0, 1.0].
|
|
Some Anthropic params are ignored: top_k, stop_sequences, service_tier, etc.
|
|
|
|
Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
|
|
|
Note: We override client creation to always use llm_config.model_endpoint as base_url
|
|
(required for BYOK where provider_name is user's custom name, not "minimax").
|
|
We also override request methods to avoid passing Anthropic-specific beta headers.
|
|
"""
|
|
|
|
@trace_method
|
|
def _get_anthropic_client(
|
|
self, llm_config: LLMConfig, async_client: bool = False
|
|
) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
|
|
"""Create Anthropic client configured for MiniMax API."""
|
|
api_key, _, _ = self.get_byok_overrides(llm_config)
|
|
|
|
if not api_key:
|
|
api_key = model_settings.minimax_api_key
|
|
|
|
# Always use model_endpoint for base_url (works for both base and BYOK providers)
|
|
base_url = llm_config.model_endpoint
|
|
|
|
if async_client:
|
|
return anthropic.AsyncAnthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries)
|
|
return anthropic.Anthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries)
|
|
|
|
@trace_method
|
|
async def _get_anthropic_client_async(
|
|
self, llm_config: LLMConfig, async_client: bool = False
|
|
) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
|
|
"""Create Anthropic client configured for MiniMax API (async version)."""
|
|
api_key, _, _ = await self.get_byok_overrides_async(llm_config)
|
|
|
|
if not api_key:
|
|
api_key = model_settings.minimax_api_key
|
|
|
|
# Always use model_endpoint for base_url (works for both base and BYOK providers)
|
|
base_url = llm_config.model_endpoint
|
|
|
|
if async_client:
|
|
return anthropic.AsyncAnthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries)
|
|
return anthropic.Anthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries)
|
|
|
|
@trace_method
|
|
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
|
"""
|
|
Synchronous request to MiniMax API.
|
|
|
|
Uses beta messages API for compatibility with Anthropic streaming interfaces.
|
|
"""
|
|
client = self._get_anthropic_client(llm_config, async_client=False)
|
|
|
|
response: BetaMessage = client.beta.messages.create(**request_data)
|
|
return response.model_dump()
|
|
|
|
@trace_method
|
|
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
|
"""
|
|
Asynchronous request to MiniMax API.
|
|
|
|
Uses beta messages API for compatibility with Anthropic streaming interfaces.
|
|
"""
|
|
request_data = sanitize_unicode_surrogates(request_data)
|
|
|
|
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
|
|
|
try:
|
|
response: BetaMessage = await client.beta.messages.create(**request_data)
|
|
return response.model_dump()
|
|
except ValueError as e:
|
|
# Handle streaming fallback if needed (similar to Anthropic client)
|
|
if "streaming is required" in str(e).lower():
|
|
logger.warning(
|
|
"[MiniMax] Non-streaming request rejected. Falling back to streaming mode. Error: %s",
|
|
str(e),
|
|
)
|
|
return await self._request_via_streaming(request_data, llm_config, betas=[])
|
|
raise
|
|
|
|
@trace_method
|
|
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]:
|
|
"""
|
|
Asynchronous streaming request to MiniMax API.
|
|
|
|
Uses beta messages API for compatibility with Anthropic streaming interfaces.
|
|
"""
|
|
request_data = sanitize_unicode_surrogates(request_data)
|
|
|
|
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
|
request_data["stream"] = True
|
|
|
|
try:
|
|
return await client.beta.messages.create(**request_data)
|
|
except Exception as e:
|
|
logger.error(f"Error streaming MiniMax request: {e}")
|
|
raise e
|
|
|
|
@trace_method
|
|
def build_request_data(
|
|
self,
|
|
agent_type: AgentType,
|
|
messages: List[PydanticMessage],
|
|
llm_config: LLMConfig,
|
|
tools: Optional[List[dict]] = None,
|
|
force_tool_call: Optional[str] = None,
|
|
requires_subsequent_tool_call: bool = False,
|
|
tool_return_truncation_chars: Optional[int] = None,
|
|
) -> dict:
|
|
"""
|
|
Build request data for MiniMax API.
|
|
|
|
Inherits most logic from AnthropicClient, with MiniMax-specific adjustments:
|
|
- Temperature must be in range (0.0, 1.0]
|
|
"""
|
|
data = super().build_request_data(
|
|
agent_type,
|
|
messages,
|
|
llm_config,
|
|
tools,
|
|
force_tool_call,
|
|
requires_subsequent_tool_call,
|
|
tool_return_truncation_chars,
|
|
)
|
|
|
|
# MiniMax temperature range is (0.0, 1.0], recommended value: 1
|
|
if data.get("temperature") is not None:
|
|
temp = data["temperature"]
|
|
if temp <= 0:
|
|
data["temperature"] = 0.01 # Minimum valid value (exclusive of 0)
|
|
logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 0.01.")
|
|
elif temp > 1.0:
|
|
data["temperature"] = 1.0 # Maximum valid value
|
|
logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 1.0.")
|
|
|
|
# MiniMax ignores these Anthropic-specific parameters, but we can remove them
|
|
# to avoid potential issues (they won't cause errors, just ignored)
|
|
# Note: We don't remove them since MiniMax silently ignores them
|
|
|
|
return data
|
|
|
|
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
|
|
"""
|
|
All MiniMax M2.x models support native interleaved thinking.
|
|
|
|
Unlike Anthropic where only certain models (Claude 3.7+) support extended thinking,
|
|
all MiniMax models natively support thinking blocks without beta headers.
|
|
"""
|
|
return True
|
|
|
|
def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
|
|
"""MiniMax models support all tool choice modes."""
|
|
return False
|
|
|
|
def supports_structured_output(self, llm_config: LLMConfig) -> bool:
|
|
"""MiniMax doesn't currently advertise structured output support."""
|
|
return False
|