From b34ad43691c1320792dccf65fef7438bc3c49e74 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Mon, 26 Jan 2026 20:13:03 -0800 Subject: [PATCH] feat: add minimax byok to ui (#9101) * fix: patch minimax * feat: add frontend changes for minimax * add logo, fix backend * better check for is minimax * more references fixed for minimax * start revering unnecessary changes * revert backend changes, just ui * fix minimax fully * fix test * add key to deploy action --------- Co-authored-by: Ari Webb Co-authored-by: Ari Webb --- letta/adapters/letta_llm_stream_adapter.py | 2 +- letta/adapters/simple_llm_stream_adapter.py | 2 +- letta/llm_api/anthropic_client.py | 32 +++------- letta/llm_api/minimax_client.py | 69 +++++++++------------ letta/schemas/llm_config.py | 10 +++ letta/services/streaming_service.py | 11 +++- tests/test_providers.py | 2 +- 7 files changed, 59 insertions(+), 69 deletions(-) diff --git a/letta/adapters/letta_llm_stream_adapter.py b/letta/adapters/letta_llm_stream_adapter.py index 5a37aebe..5e1fa7a6 100644 --- a/letta/adapters/letta_llm_stream_adapter.py +++ b/letta/adapters/letta_llm_stream_adapter.py @@ -62,7 +62,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): self.request_data = request_data # Instantiate streaming interface - if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]: + if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock, ProviderType.minimax]: self.interface = AnthropicStreamingInterface( use_assistant_message=use_assistant_message, put_inner_thoughts_in_kwarg=self.llm_config.put_inner_thoughts_in_kwargs, diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py index 3d033e88..a475c098 100644 --- a/letta/adapters/simple_llm_stream_adapter.py +++ b/letta/adapters/simple_llm_stream_adapter.py @@ -74,7 +74,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): cancellation_event = get_cancellation_event_for_run(self.run_id) if self.run_id else None # Instantiate streaming interface - if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]: + if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock, ProviderType.minimax]: # NOTE: different self.interface = SimpleAnthropicStreamingInterface( requires_approval_tools=requires_approval_tools, diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 7f133afb..8acd65dd 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -356,17 +356,9 @@ class AnthropicClient(LLMClientBase): ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]: api_key, _, _ = self.get_byok_overrides(llm_config) - # For MiniMax provider, use minimax_api_key from settings - if not api_key and llm_config.provider_name == "minimax": - api_key = model_settings.minimax_api_key - # For claude-pro-max provider, use OAuth Bearer token instead of api_key is_oauth_provider = llm_config.provider_name == "claude-pro-max" - # Only use custom base_url for MiniMax (Anthropic-compatible API) - # The Anthropic SDK adds /v1/messages internally, so we only override for non-Anthropic providers - base_url = llm_config.model_endpoint if llm_config.provider_name == "minimax" else None - if async_client: if api_key: if is_oauth_provider: @@ -378,8 +370,8 @@ class AnthropicClient(LLMClientBase): "anthropic-beta": "oauth-2025-04-20", }, ) - return anthropic.AsyncAnthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) - return anthropic.AsyncAnthropic(base_url=base_url, max_retries=model_settings.anthropic_max_retries) + return anthropic.AsyncAnthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries) + return anthropic.AsyncAnthropic(max_retries=model_settings.anthropic_max_retries) if api_key: if is_oauth_provider: @@ -391,8 +383,8 @@ class AnthropicClient(LLMClientBase): "anthropic-beta": "oauth-2025-04-20", }, ) - return anthropic.Anthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) - return anthropic.Anthropic(base_url=base_url, max_retries=model_settings.anthropic_max_retries) + return anthropic.Anthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries) + return anthropic.Anthropic(max_retries=model_settings.anthropic_max_retries) @trace_method async def _get_anthropic_client_async( @@ -400,17 +392,9 @@ class AnthropicClient(LLMClientBase): ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]: api_key, _, _ = await self.get_byok_overrides_async(llm_config) - # For MiniMax provider, use minimax_api_key from settings - if not api_key and llm_config.provider_name == "minimax": - api_key = model_settings.minimax_api_key - # For claude-pro-max provider, use OAuth Bearer token instead of api_key is_oauth_provider = llm_config.provider_name == "claude-pro-max" - # Only use custom base_url for MiniMax (Anthropic-compatible API) - # The Anthropic SDK adds /v1/messages internally, so we only override for non-Anthropic providers - base_url = llm_config.model_endpoint if llm_config.provider_name == "minimax" else None - if async_client: if api_key: if is_oauth_provider: @@ -422,8 +406,8 @@ class AnthropicClient(LLMClientBase): "anthropic-beta": "oauth-2025-04-20", }, ) - return anthropic.AsyncAnthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) - return anthropic.AsyncAnthropic(base_url=base_url, max_retries=model_settings.anthropic_max_retries) + return anthropic.AsyncAnthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries) + return anthropic.AsyncAnthropic(max_retries=model_settings.anthropic_max_retries) if api_key: if is_oauth_provider: @@ -435,8 +419,8 @@ class AnthropicClient(LLMClientBase): "anthropic-beta": "oauth-2025-04-20", }, ) - return anthropic.Anthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) - return anthropic.Anthropic(base_url=base_url, max_retries=model_settings.anthropic_max_retries) + return anthropic.Anthropic(api_key=api_key, max_retries=model_settings.anthropic_max_retries) + return anthropic.Anthropic(max_retries=model_settings.anthropic_max_retries) @trace_method def build_request_data( diff --git a/letta/llm_api/minimax_client.py b/letta/llm_api/minimax_client.py index 1c6d0081..6029f460 100644 --- a/letta/llm_api/minimax_client.py +++ b/letta/llm_api/minimax_client.py @@ -1,35 +1,33 @@ -import os from typing import List, Optional, Union import anthropic from anthropic import AsyncStream -from anthropic.types import Message as AnthropicMessage, RawMessageStreamEvent +from anthropic.types.beta import BetaMessage, BetaRawMessageStreamEvent from letta.llm_api.anthropic_client import AnthropicClient from letta.log import get_logger from letta.otel.tracing import trace_method -from letta.schemas.enums import AgentType +from letta.schemas.agent import AgentType from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message as PydanticMessage from letta.settings import model_settings logger = get_logger(__name__) -# MiniMax Anthropic-compatible API base URL -MINIMAX_BASE_URL = "https://api.minimax.io/anthropic" - class MiniMaxClient(AnthropicClient): """ MiniMax LLM client using Anthropic-compatible API. - Key differences from AnthropicClient: - - Uses standard messages API (client.messages.create), NOT beta API - - Thinking blocks are natively supported without beta headers - - Temperature must be in range (0.0, 1.0] - - Some Anthropic params are ignored: top_k, stop_sequences, service_tier, etc. + Uses the beta messages API to ensure compatibility with Anthropic streaming interfaces. + Temperature must be in range (0.0, 1.0]. + Some Anthropic params are ignored: top_k, stop_sequences, service_tier, etc. Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api + + Note: We override client creation to always use llm_config.model_endpoint as base_url + (required for BYOK where provider_name is user's custom name, not "minimax"). + We also override request methods to avoid passing Anthropic-specific beta headers. """ @trace_method @@ -40,16 +38,14 @@ class MiniMaxClient(AnthropicClient): api_key, _, _ = self.get_byok_overrides(llm_config) if not api_key: - api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY") + api_key = model_settings.minimax_api_key + + # Always use model_endpoint for base_url (works for both base and BYOK providers) + base_url = llm_config.model_endpoint if async_client: - if api_key: - return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL) - return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL) - - if api_key: - return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL) - return anthropic.Anthropic(base_url=MINIMAX_BASE_URL) + return anthropic.AsyncAnthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) + return anthropic.Anthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) @trace_method async def _get_anthropic_client_async( @@ -59,29 +55,25 @@ class MiniMaxClient(AnthropicClient): api_key, _, _ = await self.get_byok_overrides_async(llm_config) if not api_key: - api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY") + api_key = model_settings.minimax_api_key + + # Always use model_endpoint for base_url (works for both base and BYOK providers) + base_url = llm_config.model_endpoint if async_client: - if api_key: - return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL) - return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL) - - if api_key: - return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL) - return anthropic.Anthropic(base_url=MINIMAX_BASE_URL) + return anthropic.AsyncAnthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) + return anthropic.Anthropic(api_key=api_key, base_url=base_url, max_retries=model_settings.anthropic_max_retries) @trace_method def request(self, request_data: dict, llm_config: LLMConfig) -> dict: """ Synchronous request to MiniMax API. - Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks. + Uses beta messages API for compatibility with Anthropic streaming interfaces. """ client = self._get_anthropic_client(llm_config, async_client=False) - # MiniMax uses client.messages.create() - NOT client.beta.messages.create() - # Thinking blocks are natively supported without beta headers - response: AnthropicMessage = client.messages.create(**request_data) + response: BetaMessage = client.beta.messages.create(**request_data) return response.model_dump() @trace_method @@ -89,14 +81,12 @@ class MiniMaxClient(AnthropicClient): """ Asynchronous request to MiniMax API. - Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks. + Uses beta messages API for compatibility with Anthropic streaming interfaces. """ client = await self._get_anthropic_client_async(llm_config, async_client=True) - # MiniMax uses client.messages.create() - NOT client.beta.messages.create() - # Thinking blocks are natively supported without beta headers try: - response: AnthropicMessage = await client.messages.create(**request_data) + response: BetaMessage = await client.beta.messages.create(**request_data) return response.model_dump() except ValueError as e: # Handle streaming fallback if needed (similar to Anthropic client) @@ -109,19 +99,17 @@ class MiniMaxClient(AnthropicClient): raise @trace_method - async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[RawMessageStreamEvent]: + async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]: """ Asynchronous streaming request to MiniMax API. - Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks. + Uses beta messages API for compatibility with Anthropic streaming interfaces. """ client = await self._get_anthropic_client_async(llm_config, async_client=True) request_data["stream"] = True - # MiniMax uses client.messages.create() - NOT client.beta.messages.create() - # No beta headers needed - thinking blocks are natively supported try: - return await client.messages.create(**request_data) + return await client.beta.messages.create(**request_data) except Exception as e: logger.error(f"Error streaming MiniMax request: {e}") raise e @@ -142,7 +130,6 @@ class MiniMaxClient(AnthropicClient): Inherits most logic from AnthropicClient, with MiniMax-specific adjustments: - Temperature must be in range (0.0, 1.0] - - Removes extended thinking params (natively supported) """ data = super().build_request_data( agent_type, diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 5705fc36..f440ff8b 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -402,6 +402,16 @@ class LLMConfig(BaseModel): temperature=self.temperature, reasoning=ChatGPTOAuthReasoning(reasoning_effort=self.reasoning_effort or "medium"), ) + elif self.model_endpoint_type == "minimax": + # MiniMax uses Anthropic-compatible API + thinking_type = "enabled" if self.enable_reasoner else "disabled" + return AnthropicModelSettings( + max_output_tokens=self.max_tokens or 4096, + temperature=self.temperature, + thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024), + verbosity=self.verbosity, + strict=self.strict, + ) else: # If we don't know the model type, use the default Model schema return Model(max_output_tokens=self.max_tokens or 4096) diff --git a/letta/services/streaming_service.py b/letta/services/streaming_service.py index 563e1846..b5fe0d87 100644 --- a/letta/services/streaming_service.py +++ b/letta/services/streaming_service.py @@ -516,11 +516,20 @@ class StreamingService: "groq", "deepseek", "chatgpt_oauth", + "minimax", ] def _is_token_streaming_compatible(self, agent: AgentState) -> bool: """Check if agent's model supports token-level streaming.""" - base_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek", "zai", "chatgpt_oauth"] + base_compatible = agent.llm_config.model_endpoint_type in [ + "anthropic", + "openai", + "bedrock", + "deepseek", + "zai", + "chatgpt_oauth", + "minimax", + ] google_letta_v1 = agent.agent_type == AgentType.letta_v1_agent and agent.llm_config.model_endpoint_type in [ "google_ai", "google_vertex", diff --git a/tests/test_providers.py b/tests/test_providers.py index 3b013a35..508e23e6 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -155,7 +155,7 @@ async def test_minimax(): # All MiniMax models have 128K max output assert model.max_tokens == 128000 # MiniMax uses Anthropic-compatible API endpoint - assert model.model_endpoint_type == "anthropic" + assert model.model_endpoint_type == "minimax" @pytest.mark.skipif(model_settings.azure_api_key is None, reason="Only run if AZURE_API_KEY is set.")