diff --git a/fern/openapi.json b/fern/openapi.json index bfef32f8..5b5af81f 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -35719,6 +35719,7 @@ "koboldcpp", "vllm", "hugging-face", + "minimax", "mistral", "together", "bedrock", @@ -38206,6 +38207,7 @@ "koboldcpp", "vllm", "hugging-face", + "minimax", "mistral", "together", "bedrock", @@ -39822,6 +39824,7 @@ "hugging-face", "letta", "lmstudio_openai", + "minimax", "mistral", "ollama", "openai", diff --git a/letta/llm_api/llm_client.py b/letta/llm_api/llm_client.py index 805e4038..c10b67f5 100644 --- a/letta/llm_api/llm_client.py +++ b/letta/llm_api/llm_client.py @@ -93,6 +93,13 @@ class LLMClient: put_inner_thoughts_first=put_inner_thoughts_first, actor=actor, ) + case ProviderType.minimax: + from letta.llm_api.minimax_client import MiniMaxClient + + return MiniMaxClient( + put_inner_thoughts_first=put_inner_thoughts_first, + actor=actor, + ) case ProviderType.deepseek: from letta.llm_api.deepseek_client import DeepseekClient diff --git a/letta/llm_api/minimax_client.py b/letta/llm_api/minimax_client.py new file mode 100644 index 00000000..1c6d0081 --- /dev/null +++ b/letta/llm_api/minimax_client.py @@ -0,0 +1,188 @@ +import os +from typing import List, Optional, Union + +import anthropic +from anthropic import AsyncStream +from anthropic.types import Message as AnthropicMessage, RawMessageStreamEvent + +from letta.llm_api.anthropic_client import AnthropicClient +from letta.log import get_logger +from letta.otel.tracing import trace_method +from letta.schemas.enums import AgentType +from letta.schemas.llm_config import LLMConfig +from letta.schemas.message import Message as PydanticMessage +from letta.settings import model_settings + +logger = get_logger(__name__) + +# MiniMax Anthropic-compatible API base URL +MINIMAX_BASE_URL = "https://api.minimax.io/anthropic" + + +class MiniMaxClient(AnthropicClient): + """ + MiniMax LLM client using Anthropic-compatible API. + + Key differences from AnthropicClient: + - Uses standard messages API (client.messages.create), NOT beta API + - Thinking blocks are natively supported without beta headers + - Temperature must be in range (0.0, 1.0] + - Some Anthropic params are ignored: top_k, stop_sequences, service_tier, etc. + + Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api + """ + + @trace_method + def _get_anthropic_client( + self, llm_config: LLMConfig, async_client: bool = False + ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]: + """Create Anthropic client configured for MiniMax API.""" + api_key, _, _ = self.get_byok_overrides(llm_config) + + if not api_key: + api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY") + + if async_client: + if api_key: + return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL) + return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL) + + if api_key: + return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL) + return anthropic.Anthropic(base_url=MINIMAX_BASE_URL) + + @trace_method + async def _get_anthropic_client_async( + self, llm_config: LLMConfig, async_client: bool = False + ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]: + """Create Anthropic client configured for MiniMax API (async version).""" + api_key, _, _ = await self.get_byok_overrides_async(llm_config) + + if not api_key: + api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY") + + if async_client: + if api_key: + return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL) + return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL) + + if api_key: + return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL) + return anthropic.Anthropic(base_url=MINIMAX_BASE_URL) + + @trace_method + def request(self, request_data: dict, llm_config: LLMConfig) -> dict: + """ + Synchronous request to MiniMax API. + + Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks. + """ + client = self._get_anthropic_client(llm_config, async_client=False) + + # MiniMax uses client.messages.create() - NOT client.beta.messages.create() + # Thinking blocks are natively supported without beta headers + response: AnthropicMessage = client.messages.create(**request_data) + return response.model_dump() + + @trace_method + async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict: + """ + Asynchronous request to MiniMax API. + + Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks. + """ + client = await self._get_anthropic_client_async(llm_config, async_client=True) + + # MiniMax uses client.messages.create() - NOT client.beta.messages.create() + # Thinking blocks are natively supported without beta headers + try: + response: AnthropicMessage = await client.messages.create(**request_data) + return response.model_dump() + except ValueError as e: + # Handle streaming fallback if needed (similar to Anthropic client) + if "streaming is required" in str(e).lower(): + logger.warning( + "[MiniMax] Non-streaming request rejected. Falling back to streaming mode. Error: %s", + str(e), + ) + return await self._request_via_streaming(request_data, llm_config, betas=[]) + raise + + @trace_method + async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[RawMessageStreamEvent]: + """ + Asynchronous streaming request to MiniMax API. + + Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks. + """ + client = await self._get_anthropic_client_async(llm_config, async_client=True) + request_data["stream"] = True + + # MiniMax uses client.messages.create() - NOT client.beta.messages.create() + # No beta headers needed - thinking blocks are natively supported + try: + return await client.messages.create(**request_data) + except Exception as e: + logger.error(f"Error streaming MiniMax request: {e}") + raise e + + @trace_method + def build_request_data( + self, + agent_type: AgentType, + messages: List[PydanticMessage], + llm_config: LLMConfig, + tools: Optional[List[dict]] = None, + force_tool_call: Optional[str] = None, + requires_subsequent_tool_call: bool = False, + tool_return_truncation_chars: Optional[int] = None, + ) -> dict: + """ + Build request data for MiniMax API. + + Inherits most logic from AnthropicClient, with MiniMax-specific adjustments: + - Temperature must be in range (0.0, 1.0] + - Removes extended thinking params (natively supported) + """ + data = super().build_request_data( + agent_type, + messages, + llm_config, + tools, + force_tool_call, + requires_subsequent_tool_call, + tool_return_truncation_chars, + ) + + # MiniMax temperature range is (0.0, 1.0], recommended value: 1 + if data.get("temperature") is not None: + temp = data["temperature"] + if temp <= 0: + data["temperature"] = 0.01 # Minimum valid value (exclusive of 0) + logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 0.01.") + elif temp > 1.0: + data["temperature"] = 1.0 # Maximum valid value + logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 1.0.") + + # MiniMax ignores these Anthropic-specific parameters, but we can remove them + # to avoid potential issues (they won't cause errors, just ignored) + # Note: We don't remove them since MiniMax silently ignores them + + return data + + def is_reasoning_model(self, llm_config: LLMConfig) -> bool: + """ + All MiniMax M2.x models support native interleaved thinking. + + Unlike Anthropic where only certain models (Claude 3.7+) support extended thinking, + all MiniMax models natively support thinking blocks without beta headers. + """ + return True + + def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool: + """MiniMax models support all tool choice modes.""" + return False + + def supports_structured_output(self, llm_config: LLMConfig) -> bool: + """MiniMax doesn't currently advertise structured output support.""" + return False diff --git a/letta/schemas/enums.py b/letta/schemas/enums.py index 1c1f06a5..d4db5c18 100644 --- a/letta/schemas/enums.py +++ b/letta/schemas/enums.py @@ -63,6 +63,7 @@ class ProviderType(str, Enum): hugging_face = "hugging-face" letta = "letta" lmstudio_openai = "lmstudio_openai" + minimax = "minimax" mistral = "mistral" ollama = "ollama" openai = "openai" diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 5ce041f8..5705fc36 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -43,6 +43,7 @@ class LLMConfig(BaseModel): "koboldcpp", "vllm", "hugging-face", + "minimax", "mistral", "together", # completions endpoint "bedrock", diff --git a/letta/schemas/model.py b/letta/schemas/model.py index cb2926e9..4023c0a0 100644 --- a/letta/schemas/model.py +++ b/letta/schemas/model.py @@ -42,6 +42,7 @@ class Model(LLMConfig, ModelBase): "koboldcpp", "vllm", "hugging-face", + "minimax", "mistral", "together", "bedrock", diff --git a/letta/schemas/providers/__init__.py b/letta/schemas/providers/__init__.py index 230881a0..2790ba7e 100644 --- a/letta/schemas/providers/__init__.py +++ b/letta/schemas/providers/__init__.py @@ -12,13 +12,14 @@ from .google_vertex import GoogleVertexProvider from .groq import GroqProvider from .letta import LettaProvider from .lmstudio import LMStudioOpenAIProvider +from .minimax import MiniMaxProvider from .mistral import MistralProvider from .ollama import OllamaProvider from .openai import OpenAIProvider from .openrouter import OpenRouterProvider +from .sglang import SGLangProvider from .together import TogetherProvider from .vllm import VLLMProvider -from .sglang import SGLangProvider from .xai import XAIProvider from .zai import ZAIProvider @@ -41,6 +42,7 @@ __all__ = [ "GroqProvider", "LettaProvider", "LMStudioOpenAIProvider", + "MiniMaxProvider", "MistralProvider", "OllamaProvider", "OpenAIProvider", diff --git a/letta/schemas/providers/minimax.py b/letta/schemas/providers/minimax.py new file mode 100644 index 00000000..616cf11d --- /dev/null +++ b/letta/schemas/providers/minimax.py @@ -0,0 +1,87 @@ +from typing import Literal + +from pydantic import Field + +from letta.log import get_logger +from letta.schemas.enums import ProviderCategory, ProviderType +from letta.schemas.llm_config import LLMConfig +from letta.schemas.providers.base import Provider + +logger = get_logger(__name__) + +# MiniMax model specifications from official documentation +# https://platform.minimax.io/docs/guides/models-intro +MODEL_LIST = [ + { + "name": "MiniMax-M2.1", + "context_window": 200000, + "max_output": 128000, + "description": "Polyglot code mastery, precision code refactoring (~60 tps)", + }, + { + "name": "MiniMax-M2.1-lightning", + "context_window": 200000, + "max_output": 128000, + "description": "Same performance as M2.1, significantly faster (~100 tps)", + }, + { + "name": "MiniMax-M2", + "context_window": 200000, + "max_output": 128000, + "description": "Agentic capabilities, advanced reasoning", + }, +] + + +class MiniMaxProvider(Provider): + """ + MiniMax provider using Anthropic-compatible API. + + MiniMax models support native interleaved thinking without requiring beta headers. + The API uses the standard messages endpoint (not beta). + + Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api + """ + + provider_type: Literal[ProviderType.minimax] = Field(ProviderType.minimax, description="The type of the provider.") + provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)") + api_key: str | None = Field(None, description="API key for the MiniMax API.", deprecated=True) + base_url: str = Field("https://api.minimax.io/anthropic", description="Base URL for the MiniMax Anthropic-compatible API.") + + def get_default_max_output_tokens(self, model_name: str) -> int: + """Get the default max output tokens for MiniMax models.""" + # All MiniMax models support 128K output tokens + return 128000 + + def get_model_context_window_size(self, model_name: str) -> int | None: + """Get the context window size for a MiniMax model.""" + # All current MiniMax models have 200K context window + for model in MODEL_LIST: + if model["name"] == model_name: + return model["context_window"] + # Default fallback + return 200000 + + async def list_llm_models_async(self) -> list[LLMConfig]: + """ + Return available MiniMax models. + + MiniMax doesn't have a models listing endpoint, so we use a hardcoded list. + """ + configs = [] + for model in MODEL_LIST: + configs.append( + LLMConfig( + model=model["name"], + model_endpoint_type="minimax", + model_endpoint=self.base_url, + context_window=model["context_window"], + handle=self.get_handle(model["name"]), + max_tokens=model["max_output"], + # MiniMax models support native thinking, similar to Claude's extended thinking + put_inner_thoughts_in_kwargs=True, + provider_name=self.name, + provider_category=self.provider_category, + ) + ) + return configs diff --git a/letta/settings.py b/letta/settings.py index 10297603..83649312 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -150,6 +150,9 @@ class ModelSettings(BaseSettings): # groq groq_api_key: Optional[str] = None + # minimax + minimax_api_key: Optional[str] = None + # Bedrock aws_access_key_id: Optional[str] = None aws_secret_access_key: Optional[str] = None diff --git a/tests/integration_test_send_message.py b/tests/integration_test_send_message.py index c329180a..4c1d71b4 100644 --- a/tests/integration_test_send_message.py +++ b/tests/integration_test_send_message.py @@ -189,6 +189,7 @@ all_configs = [ "openai-gpt-5.json", # TODO: GPT-5 disabled for now, it sends HiddenReasoningMessages which break the tests. "claude-4-5-sonnet.json", "gemini-2.5-pro.json", + "minimax-m2.1-lightning.json", ] reasoning_configs = [ @@ -243,6 +244,10 @@ def is_reasoner_model(model_handle: str, model_settings: dict) -> bool: elif provider_type in ["google_vertex", "google_ai"]: return model.startswith("gemini-2.5-flash") or model.startswith("gemini-2.5-pro") or model.startswith("gemini-3") + # MiniMax reasoning models (all M2.x models support native interleaved thinking) + elif provider_type == "minimax": + return model.startswith("MiniMax-M2") + return False diff --git a/tests/model_settings/minimax-m2.1-lightning.json b/tests/model_settings/minimax-m2.1-lightning.json new file mode 100644 index 00000000..7a7f03bd --- /dev/null +++ b/tests/model_settings/minimax-m2.1-lightning.json @@ -0,0 +1,9 @@ +{ + "handle": "minimax/MiniMax-M2.1-lightning", + "model_settings": { + "provider_type": "minimax", + "temperature": 1.0, + "max_output_tokens": 4096, + "parallel_tool_calls": false + } +} diff --git a/tests/test_minimax_client.py b/tests/test_minimax_client.py new file mode 100644 index 00000000..32ac1d61 --- /dev/null +++ b/tests/test_minimax_client.py @@ -0,0 +1,270 @@ +"""Unit tests for MiniMax client.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from letta.llm_api.minimax_client import MINIMAX_BASE_URL, MiniMaxClient +from letta.schemas.enums import AgentType +from letta.schemas.llm_config import LLMConfig + + +class TestMiniMaxClient: + """Tests for MiniMaxClient.""" + + def setup_method(self): + """Set up test fixtures.""" + self.client = MiniMaxClient(put_inner_thoughts_first=True) + self.llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + + def test_is_reasoning_model_always_true(self): + """All MiniMax models support native interleaved thinking.""" + assert self.client.is_reasoning_model(self.llm_config) is True + + # Test with different models + for model_name in ["MiniMax-M2.1", "MiniMax-M2.1-lightning", "MiniMax-M2"]: + config = LLMConfig( + model=model_name, + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + assert self.client.is_reasoning_model(config) is True + + def test_requires_auto_tool_choice(self): + """MiniMax supports all tool choice modes.""" + assert self.client.requires_auto_tool_choice(self.llm_config) is False + + def test_supports_structured_output(self): + """MiniMax doesn't currently advertise structured output support.""" + assert self.client.supports_structured_output(self.llm_config) is False + + @patch("letta.llm_api.minimax_client.model_settings") + def test_get_anthropic_client_with_api_key(self, mock_settings): + """Test client creation with API key.""" + mock_settings.minimax_api_key = "test-api-key" + + with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic: + mock_anthropic.Anthropic.return_value = MagicMock() + + # Mock BYOK to return no override + self.client.get_byok_overrides = MagicMock(return_value=(None, None, None)) + + client = self.client._get_anthropic_client(self.llm_config, async_client=False) + + mock_anthropic.Anthropic.assert_called_once_with( + api_key="test-api-key", + base_url=MINIMAX_BASE_URL, + ) + + @patch("letta.llm_api.minimax_client.model_settings") + def test_get_anthropic_client_async(self, mock_settings): + """Test async client creation.""" + mock_settings.minimax_api_key = "test-api-key" + + with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic: + mock_anthropic.AsyncAnthropic.return_value = MagicMock() + + # Mock BYOK to return no override + self.client.get_byok_overrides = MagicMock(return_value=(None, None, None)) + + client = self.client._get_anthropic_client(self.llm_config, async_client=True) + + mock_anthropic.AsyncAnthropic.assert_called_once_with( + api_key="test-api-key", + base_url=MINIMAX_BASE_URL, + ) + + +class TestMiniMaxClientTemperatureClamping: + """Tests for temperature clamping in build_request_data.""" + + def setup_method(self): + """Set up test fixtures.""" + self.client = MiniMaxClient(put_inner_thoughts_first=True) + self.llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=0.7, + ) + + @patch.object(MiniMaxClient, "build_request_data") + def test_temperature_clamping_is_applied(self, mock_build): + """Verify build_request_data is called for temperature clamping.""" + # This is a basic test to ensure the method exists and can be called + mock_build.return_value = {"temperature": 0.7} + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=self.llm_config, + ) + mock_build.assert_called_once() + + def test_temperature_zero_clamped(self): + """Test that temperature=0 is clamped to 0.01.""" + config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=0, + ) + + # Mock the parent class method to return a basic dict + with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent: + mock_parent.return_value = {"temperature": 0, "model": "MiniMax-M2.1"} + + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=config, + ) + + # Temperature should be clamped to 0.01 + assert result["temperature"] == 0.01 + + def test_temperature_negative_clamped(self): + """Test that negative temperature is clamped to 0.01.""" + config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=-0.5, + ) + + with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent: + mock_parent.return_value = {"temperature": -0.5, "model": "MiniMax-M2.1"} + + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=config, + ) + + assert result["temperature"] == 0.01 + + def test_temperature_above_one_clamped(self): + """Test that temperature > 1.0 is clamped to 1.0.""" + config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=1.5, + ) + + with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent: + mock_parent.return_value = {"temperature": 1.5, "model": "MiniMax-M2.1"} + + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=config, + ) + + assert result["temperature"] == 1.0 + + def test_temperature_valid_not_modified(self): + """Test that valid temperature values are not modified.""" + config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + temperature=0.7, + ) + + with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent: + mock_parent.return_value = {"temperature": 0.7, "model": "MiniMax-M2.1"} + + result = self.client.build_request_data( + agent_type=AgentType.letta_v1_agent, + messages=[], + llm_config=config, + ) + + assert result["temperature"] == 0.7 + + +class TestMiniMaxClientUsesNonBetaAPI: + """Tests to verify MiniMax client uses non-beta API.""" + + def test_request_uses_messages_not_beta(self): + """Verify request() uses client.messages.create, not client.beta.messages.create.""" + client = MiniMaxClient(put_inner_thoughts_first=True) + llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + + with patch.object(client, "_get_anthropic_client") as mock_get_client: + mock_anthropic_client = MagicMock() + mock_response = MagicMock() + mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]} + mock_anthropic_client.messages.create.return_value = mock_response + mock_get_client.return_value = mock_anthropic_client + + result = client.request({"model": "MiniMax-M2.1"}, llm_config) + + # Verify messages.create was called (not beta.messages.create) + mock_anthropic_client.messages.create.assert_called_once() + # Verify beta was NOT accessed + assert not hasattr(mock_anthropic_client, "beta") or not mock_anthropic_client.beta.messages.create.called + + @pytest.mark.asyncio + async def test_request_async_uses_messages_not_beta(self): + """Verify request_async() uses client.messages.create, not client.beta.messages.create.""" + client = MiniMaxClient(put_inner_thoughts_first=True) + llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + + with patch.object(client, "_get_anthropic_client_async") as mock_get_client: + mock_anthropic_client = AsyncMock() + mock_response = MagicMock() + mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]} + mock_anthropic_client.messages.create.return_value = mock_response + mock_get_client.return_value = mock_anthropic_client + + result = await client.request_async({"model": "MiniMax-M2.1"}, llm_config) + + # Verify messages.create was called (not beta.messages.create) + mock_anthropic_client.messages.create.assert_called_once() + + @pytest.mark.asyncio + async def test_stream_async_uses_messages_not_beta(self): + """Verify stream_async() uses client.messages.create, not client.beta.messages.create.""" + client = MiniMaxClient(put_inner_thoughts_first=True) + llm_config = LLMConfig( + model="MiniMax-M2.1", + model_endpoint_type="minimax", + model_endpoint=MINIMAX_BASE_URL, + context_window=200000, + ) + + with patch.object(client, "_get_anthropic_client_async") as mock_get_client: + mock_anthropic_client = AsyncMock() + mock_stream = AsyncMock() + mock_anthropic_client.messages.create.return_value = mock_stream + mock_get_client.return_value = mock_anthropic_client + + result = await client.stream_async({"model": "MiniMax-M2.1"}, llm_config) + + # Verify messages.create was called (not beta.messages.create) + mock_anthropic_client.messages.create.assert_called_once() + # Verify stream=True was set + call_kwargs = mock_anthropic_client.messages.create.call_args[1] + assert call_kwargs.get("stream") is True diff --git a/tests/test_providers.py b/tests/test_providers.py index 6b66217e..21600682 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -11,6 +11,7 @@ from letta.schemas.providers import ( GoogleAIProvider, GoogleVertexProvider, GroqProvider, + MiniMaxProvider, OllamaProvider, OpenAIProvider, SGLangProvider, @@ -131,6 +132,32 @@ async def test_groq(): assert models[0].handle == f"{provider.name}/{models[0].model}" +@pytest.mark.asyncio +async def test_minimax(): + """Test MiniMax provider - uses hardcoded model list, no API key required.""" + provider = MiniMaxProvider(name="minimax") + models = await provider.list_llm_models_async() + + # Should have exactly 3 models: M2.1, M2.1-lightning, M2 + assert len(models) == 3 + + # Verify model properties + model_names = {m.model for m in models} + assert "MiniMax-M2.1" in model_names + assert "MiniMax-M2.1-lightning" in model_names + assert "MiniMax-M2" in model_names + + # Verify handle format + for model in models: + assert model.handle == f"{provider.name}/{model.model}" + # All MiniMax models have 200K context window + assert model.context_window == 200000 + # All MiniMax models have 128K max output + assert model.max_tokens == 128000 + # All use minimax endpoint type + assert model.model_endpoint_type == "minimax" + + @pytest.mark.skipif(model_settings.azure_api_key is None, reason="Only run if AZURE_API_KEY is set.") @pytest.mark.asyncio async def test_azure():