feat: add MiniMax provider support (#9095)

* feat: add MiniMax provider support

Add MiniMax as a new LLM provider using their Anthropic-compatible API.

Key implementation details:
- Uses standard messages API (not beta) - MiniMax supports thinking blocks natively
- Base URL: https://api.minimax.io/anthropic
- Models: MiniMax-M2.1, MiniMax-M2.1-lightning, MiniMax-M2 (all 200K context, 128K output)
- Temperature clamped to valid range (0.0, 1.0]
- All M2.x models treated as reasoning models (support interleaved thinking)

Files added:
- letta/schemas/providers/minimax.py - MiniMax provider schema
- letta/llm_api/minimax_client.py - Client extending AnthropicClient
- tests/test_minimax_client.py - Unit tests (13 tests)
- tests/model_settings/minimax-m2.1.json - Integration test config

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* chore: regenerate API spec with MiniMax provider

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* chore: use MiniMax-M2.1-lightning for CI tests

Switch to the faster/cheaper lightning model variant for integration tests.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* chore: add MINIMAX_API_KEY to deploy-core command

Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>

* chore: regenerate web openapi spec with MiniMax provider

Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>

🐾 Generated with [Letta Code](https://letta.com)

---------

Co-authored-by: Letta <noreply@letta.com>
Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com>
Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>
This commit is contained in:
Sarah Wooders
2026-01-25 19:15:25 -08:00
committed by Caren Thomas
parent 221b4e6279
commit adab8cd9b5
13 changed files with 605 additions and 1 deletions

View File

@@ -35719,6 +35719,7 @@
"koboldcpp",
"vllm",
"hugging-face",
"minimax",
"mistral",
"together",
"bedrock",
@@ -38206,6 +38207,7 @@
"koboldcpp",
"vllm",
"hugging-face",
"minimax",
"mistral",
"together",
"bedrock",
@@ -39822,6 +39824,7 @@
"hugging-face",
"letta",
"lmstudio_openai",
"minimax",
"mistral",
"ollama",
"openai",

View File

@@ -93,6 +93,13 @@ class LLMClient:
put_inner_thoughts_first=put_inner_thoughts_first,
actor=actor,
)
case ProviderType.minimax:
from letta.llm_api.minimax_client import MiniMaxClient
return MiniMaxClient(
put_inner_thoughts_first=put_inner_thoughts_first,
actor=actor,
)
case ProviderType.deepseek:
from letta.llm_api.deepseek_client import DeepseekClient

View File

@@ -0,0 +1,188 @@
import os
from typing import List, Optional, Union
import anthropic
from anthropic import AsyncStream
from anthropic.types import Message as AnthropicMessage, RawMessageStreamEvent
from letta.llm_api.anthropic_client import AnthropicClient
from letta.log import get_logger
from letta.otel.tracing import trace_method
from letta.schemas.enums import AgentType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message as PydanticMessage
from letta.settings import model_settings
logger = get_logger(__name__)
# MiniMax Anthropic-compatible API base URL
MINIMAX_BASE_URL = "https://api.minimax.io/anthropic"
class MiniMaxClient(AnthropicClient):
"""
MiniMax LLM client using Anthropic-compatible API.
Key differences from AnthropicClient:
- Uses standard messages API (client.messages.create), NOT beta API
- Thinking blocks are natively supported without beta headers
- Temperature must be in range (0.0, 1.0]
- Some Anthropic params are ignored: top_k, stop_sequences, service_tier, etc.
Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api
"""
@trace_method
def _get_anthropic_client(
self, llm_config: LLMConfig, async_client: bool = False
) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
"""Create Anthropic client configured for MiniMax API."""
api_key, _, _ = self.get_byok_overrides(llm_config)
if not api_key:
api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY")
if async_client:
if api_key:
return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL)
if api_key:
return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
return anthropic.Anthropic(base_url=MINIMAX_BASE_URL)
@trace_method
async def _get_anthropic_client_async(
self, llm_config: LLMConfig, async_client: bool = False
) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
"""Create Anthropic client configured for MiniMax API (async version)."""
api_key, _, _ = await self.get_byok_overrides_async(llm_config)
if not api_key:
api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY")
if async_client:
if api_key:
return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL)
if api_key:
return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
return anthropic.Anthropic(base_url=MINIMAX_BASE_URL)
@trace_method
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
"""
Synchronous request to MiniMax API.
Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks.
"""
client = self._get_anthropic_client(llm_config, async_client=False)
# MiniMax uses client.messages.create() - NOT client.beta.messages.create()
# Thinking blocks are natively supported without beta headers
response: AnthropicMessage = client.messages.create(**request_data)
return response.model_dump()
@trace_method
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
"""
Asynchronous request to MiniMax API.
Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks.
"""
client = await self._get_anthropic_client_async(llm_config, async_client=True)
# MiniMax uses client.messages.create() - NOT client.beta.messages.create()
# Thinking blocks are natively supported without beta headers
try:
response: AnthropicMessage = await client.messages.create(**request_data)
return response.model_dump()
except ValueError as e:
# Handle streaming fallback if needed (similar to Anthropic client)
if "streaming is required" in str(e).lower():
logger.warning(
"[MiniMax] Non-streaming request rejected. Falling back to streaming mode. Error: %s",
str(e),
)
return await self._request_via_streaming(request_data, llm_config, betas=[])
raise
@trace_method
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[RawMessageStreamEvent]:
"""
Asynchronous streaming request to MiniMax API.
Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks.
"""
client = await self._get_anthropic_client_async(llm_config, async_client=True)
request_data["stream"] = True
# MiniMax uses client.messages.create() - NOT client.beta.messages.create()
# No beta headers needed - thinking blocks are natively supported
try:
return await client.messages.create(**request_data)
except Exception as e:
logger.error(f"Error streaming MiniMax request: {e}")
raise e
@trace_method
def build_request_data(
self,
agent_type: AgentType,
messages: List[PydanticMessage],
llm_config: LLMConfig,
tools: Optional[List[dict]] = None,
force_tool_call: Optional[str] = None,
requires_subsequent_tool_call: bool = False,
tool_return_truncation_chars: Optional[int] = None,
) -> dict:
"""
Build request data for MiniMax API.
Inherits most logic from AnthropicClient, with MiniMax-specific adjustments:
- Temperature must be in range (0.0, 1.0]
- Removes extended thinking params (natively supported)
"""
data = super().build_request_data(
agent_type,
messages,
llm_config,
tools,
force_tool_call,
requires_subsequent_tool_call,
tool_return_truncation_chars,
)
# MiniMax temperature range is (0.0, 1.0], recommended value: 1
if data.get("temperature") is not None:
temp = data["temperature"]
if temp <= 0:
data["temperature"] = 0.01 # Minimum valid value (exclusive of 0)
logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 0.01.")
elif temp > 1.0:
data["temperature"] = 1.0 # Maximum valid value
logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 1.0.")
# MiniMax ignores these Anthropic-specific parameters, but we can remove them
# to avoid potential issues (they won't cause errors, just ignored)
# Note: We don't remove them since MiniMax silently ignores them
return data
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
"""
All MiniMax M2.x models support native interleaved thinking.
Unlike Anthropic where only certain models (Claude 3.7+) support extended thinking,
all MiniMax models natively support thinking blocks without beta headers.
"""
return True
def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
"""MiniMax models support all tool choice modes."""
return False
def supports_structured_output(self, llm_config: LLMConfig) -> bool:
"""MiniMax doesn't currently advertise structured output support."""
return False

View File

@@ -63,6 +63,7 @@ class ProviderType(str, Enum):
hugging_face = "hugging-face"
letta = "letta"
lmstudio_openai = "lmstudio_openai"
minimax = "minimax"
mistral = "mistral"
ollama = "ollama"
openai = "openai"

View File

@@ -43,6 +43,7 @@ class LLMConfig(BaseModel):
"koboldcpp",
"vllm",
"hugging-face",
"minimax",
"mistral",
"together", # completions endpoint
"bedrock",

View File

@@ -42,6 +42,7 @@ class Model(LLMConfig, ModelBase):
"koboldcpp",
"vllm",
"hugging-face",
"minimax",
"mistral",
"together",
"bedrock",

View File

@@ -12,13 +12,14 @@ from .google_vertex import GoogleVertexProvider
from .groq import GroqProvider
from .letta import LettaProvider
from .lmstudio import LMStudioOpenAIProvider
from .minimax import MiniMaxProvider
from .mistral import MistralProvider
from .ollama import OllamaProvider
from .openai import OpenAIProvider
from .openrouter import OpenRouterProvider
from .sglang import SGLangProvider
from .together import TogetherProvider
from .vllm import VLLMProvider
from .sglang import SGLangProvider
from .xai import XAIProvider
from .zai import ZAIProvider
@@ -41,6 +42,7 @@ __all__ = [
"GroqProvider",
"LettaProvider",
"LMStudioOpenAIProvider",
"MiniMaxProvider",
"MistralProvider",
"OllamaProvider",
"OpenAIProvider",

View File

@@ -0,0 +1,87 @@
from typing import Literal
from pydantic import Field
from letta.log import get_logger
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.providers.base import Provider
logger = get_logger(__name__)
# MiniMax model specifications from official documentation
# https://platform.minimax.io/docs/guides/models-intro
MODEL_LIST = [
{
"name": "MiniMax-M2.1",
"context_window": 200000,
"max_output": 128000,
"description": "Polyglot code mastery, precision code refactoring (~60 tps)",
},
{
"name": "MiniMax-M2.1-lightning",
"context_window": 200000,
"max_output": 128000,
"description": "Same performance as M2.1, significantly faster (~100 tps)",
},
{
"name": "MiniMax-M2",
"context_window": 200000,
"max_output": 128000,
"description": "Agentic capabilities, advanced reasoning",
},
]
class MiniMaxProvider(Provider):
"""
MiniMax provider using Anthropic-compatible API.
MiniMax models support native interleaved thinking without requiring beta headers.
The API uses the standard messages endpoint (not beta).
Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api
"""
provider_type: Literal[ProviderType.minimax] = Field(ProviderType.minimax, description="The type of the provider.")
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
api_key: str | None = Field(None, description="API key for the MiniMax API.", deprecated=True)
base_url: str = Field("https://api.minimax.io/anthropic", description="Base URL for the MiniMax Anthropic-compatible API.")
def get_default_max_output_tokens(self, model_name: str) -> int:
"""Get the default max output tokens for MiniMax models."""
# All MiniMax models support 128K output tokens
return 128000
def get_model_context_window_size(self, model_name: str) -> int | None:
"""Get the context window size for a MiniMax model."""
# All current MiniMax models have 200K context window
for model in MODEL_LIST:
if model["name"] == model_name:
return model["context_window"]
# Default fallback
return 200000
async def list_llm_models_async(self) -> list[LLMConfig]:
"""
Return available MiniMax models.
MiniMax doesn't have a models listing endpoint, so we use a hardcoded list.
"""
configs = []
for model in MODEL_LIST:
configs.append(
LLMConfig(
model=model["name"],
model_endpoint_type="minimax",
model_endpoint=self.base_url,
context_window=model["context_window"],
handle=self.get_handle(model["name"]),
max_tokens=model["max_output"],
# MiniMax models support native thinking, similar to Claude's extended thinking
put_inner_thoughts_in_kwargs=True,
provider_name=self.name,
provider_category=self.provider_category,
)
)
return configs

View File

@@ -150,6 +150,9 @@ class ModelSettings(BaseSettings):
# groq
groq_api_key: Optional[str] = None
# minimax
minimax_api_key: Optional[str] = None
# Bedrock
aws_access_key_id: Optional[str] = None
aws_secret_access_key: Optional[str] = None

View File

@@ -189,6 +189,7 @@ all_configs = [
"openai-gpt-5.json", # TODO: GPT-5 disabled for now, it sends HiddenReasoningMessages which break the tests.
"claude-4-5-sonnet.json",
"gemini-2.5-pro.json",
"minimax-m2.1-lightning.json",
]
reasoning_configs = [
@@ -243,6 +244,10 @@ def is_reasoner_model(model_handle: str, model_settings: dict) -> bool:
elif provider_type in ["google_vertex", "google_ai"]:
return model.startswith("gemini-2.5-flash") or model.startswith("gemini-2.5-pro") or model.startswith("gemini-3")
# MiniMax reasoning models (all M2.x models support native interleaved thinking)
elif provider_type == "minimax":
return model.startswith("MiniMax-M2")
return False

View File

@@ -0,0 +1,9 @@
{
"handle": "minimax/MiniMax-M2.1-lightning",
"model_settings": {
"provider_type": "minimax",
"temperature": 1.0,
"max_output_tokens": 4096,
"parallel_tool_calls": false
}
}

View File

@@ -0,0 +1,270 @@
"""Unit tests for MiniMax client."""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from letta.llm_api.minimax_client import MINIMAX_BASE_URL, MiniMaxClient
from letta.schemas.enums import AgentType
from letta.schemas.llm_config import LLMConfig
class TestMiniMaxClient:
"""Tests for MiniMaxClient."""
def setup_method(self):
"""Set up test fixtures."""
self.client = MiniMaxClient(put_inner_thoughts_first=True)
self.llm_config = LLMConfig(
model="MiniMax-M2.1",
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
)
def test_is_reasoning_model_always_true(self):
"""All MiniMax models support native interleaved thinking."""
assert self.client.is_reasoning_model(self.llm_config) is True
# Test with different models
for model_name in ["MiniMax-M2.1", "MiniMax-M2.1-lightning", "MiniMax-M2"]:
config = LLMConfig(
model=model_name,
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
)
assert self.client.is_reasoning_model(config) is True
def test_requires_auto_tool_choice(self):
"""MiniMax supports all tool choice modes."""
assert self.client.requires_auto_tool_choice(self.llm_config) is False
def test_supports_structured_output(self):
"""MiniMax doesn't currently advertise structured output support."""
assert self.client.supports_structured_output(self.llm_config) is False
@patch("letta.llm_api.minimax_client.model_settings")
def test_get_anthropic_client_with_api_key(self, mock_settings):
"""Test client creation with API key."""
mock_settings.minimax_api_key = "test-api-key"
with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic:
mock_anthropic.Anthropic.return_value = MagicMock()
# Mock BYOK to return no override
self.client.get_byok_overrides = MagicMock(return_value=(None, None, None))
client = self.client._get_anthropic_client(self.llm_config, async_client=False)
mock_anthropic.Anthropic.assert_called_once_with(
api_key="test-api-key",
base_url=MINIMAX_BASE_URL,
)
@patch("letta.llm_api.minimax_client.model_settings")
def test_get_anthropic_client_async(self, mock_settings):
"""Test async client creation."""
mock_settings.minimax_api_key = "test-api-key"
with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic:
mock_anthropic.AsyncAnthropic.return_value = MagicMock()
# Mock BYOK to return no override
self.client.get_byok_overrides = MagicMock(return_value=(None, None, None))
client = self.client._get_anthropic_client(self.llm_config, async_client=True)
mock_anthropic.AsyncAnthropic.assert_called_once_with(
api_key="test-api-key",
base_url=MINIMAX_BASE_URL,
)
class TestMiniMaxClientTemperatureClamping:
"""Tests for temperature clamping in build_request_data."""
def setup_method(self):
"""Set up test fixtures."""
self.client = MiniMaxClient(put_inner_thoughts_first=True)
self.llm_config = LLMConfig(
model="MiniMax-M2.1",
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
temperature=0.7,
)
@patch.object(MiniMaxClient, "build_request_data")
def test_temperature_clamping_is_applied(self, mock_build):
"""Verify build_request_data is called for temperature clamping."""
# This is a basic test to ensure the method exists and can be called
mock_build.return_value = {"temperature": 0.7}
result = self.client.build_request_data(
agent_type=AgentType.letta_v1_agent,
messages=[],
llm_config=self.llm_config,
)
mock_build.assert_called_once()
def test_temperature_zero_clamped(self):
"""Test that temperature=0 is clamped to 0.01."""
config = LLMConfig(
model="MiniMax-M2.1",
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
temperature=0,
)
# Mock the parent class method to return a basic dict
with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
mock_parent.return_value = {"temperature": 0, "model": "MiniMax-M2.1"}
result = self.client.build_request_data(
agent_type=AgentType.letta_v1_agent,
messages=[],
llm_config=config,
)
# Temperature should be clamped to 0.01
assert result["temperature"] == 0.01
def test_temperature_negative_clamped(self):
"""Test that negative temperature is clamped to 0.01."""
config = LLMConfig(
model="MiniMax-M2.1",
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
temperature=-0.5,
)
with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
mock_parent.return_value = {"temperature": -0.5, "model": "MiniMax-M2.1"}
result = self.client.build_request_data(
agent_type=AgentType.letta_v1_agent,
messages=[],
llm_config=config,
)
assert result["temperature"] == 0.01
def test_temperature_above_one_clamped(self):
"""Test that temperature > 1.0 is clamped to 1.0."""
config = LLMConfig(
model="MiniMax-M2.1",
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
temperature=1.5,
)
with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
mock_parent.return_value = {"temperature": 1.5, "model": "MiniMax-M2.1"}
result = self.client.build_request_data(
agent_type=AgentType.letta_v1_agent,
messages=[],
llm_config=config,
)
assert result["temperature"] == 1.0
def test_temperature_valid_not_modified(self):
"""Test that valid temperature values are not modified."""
config = LLMConfig(
model="MiniMax-M2.1",
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
temperature=0.7,
)
with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
mock_parent.return_value = {"temperature": 0.7, "model": "MiniMax-M2.1"}
result = self.client.build_request_data(
agent_type=AgentType.letta_v1_agent,
messages=[],
llm_config=config,
)
assert result["temperature"] == 0.7
class TestMiniMaxClientUsesNonBetaAPI:
"""Tests to verify MiniMax client uses non-beta API."""
def test_request_uses_messages_not_beta(self):
"""Verify request() uses client.messages.create, not client.beta.messages.create."""
client = MiniMaxClient(put_inner_thoughts_first=True)
llm_config = LLMConfig(
model="MiniMax-M2.1",
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
)
with patch.object(client, "_get_anthropic_client") as mock_get_client:
mock_anthropic_client = MagicMock()
mock_response = MagicMock()
mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]}
mock_anthropic_client.messages.create.return_value = mock_response
mock_get_client.return_value = mock_anthropic_client
result = client.request({"model": "MiniMax-M2.1"}, llm_config)
# Verify messages.create was called (not beta.messages.create)
mock_anthropic_client.messages.create.assert_called_once()
# Verify beta was NOT accessed
assert not hasattr(mock_anthropic_client, "beta") or not mock_anthropic_client.beta.messages.create.called
@pytest.mark.asyncio
async def test_request_async_uses_messages_not_beta(self):
"""Verify request_async() uses client.messages.create, not client.beta.messages.create."""
client = MiniMaxClient(put_inner_thoughts_first=True)
llm_config = LLMConfig(
model="MiniMax-M2.1",
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
)
with patch.object(client, "_get_anthropic_client_async") as mock_get_client:
mock_anthropic_client = AsyncMock()
mock_response = MagicMock()
mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]}
mock_anthropic_client.messages.create.return_value = mock_response
mock_get_client.return_value = mock_anthropic_client
result = await client.request_async({"model": "MiniMax-M2.1"}, llm_config)
# Verify messages.create was called (not beta.messages.create)
mock_anthropic_client.messages.create.assert_called_once()
@pytest.mark.asyncio
async def test_stream_async_uses_messages_not_beta(self):
"""Verify stream_async() uses client.messages.create, not client.beta.messages.create."""
client = MiniMaxClient(put_inner_thoughts_first=True)
llm_config = LLMConfig(
model="MiniMax-M2.1",
model_endpoint_type="minimax",
model_endpoint=MINIMAX_BASE_URL,
context_window=200000,
)
with patch.object(client, "_get_anthropic_client_async") as mock_get_client:
mock_anthropic_client = AsyncMock()
mock_stream = AsyncMock()
mock_anthropic_client.messages.create.return_value = mock_stream
mock_get_client.return_value = mock_anthropic_client
result = await client.stream_async({"model": "MiniMax-M2.1"}, llm_config)
# Verify messages.create was called (not beta.messages.create)
mock_anthropic_client.messages.create.assert_called_once()
# Verify stream=True was set
call_kwargs = mock_anthropic_client.messages.create.call_args[1]
assert call_kwargs.get("stream") is True

View File

@@ -11,6 +11,7 @@ from letta.schemas.providers import (
GoogleAIProvider,
GoogleVertexProvider,
GroqProvider,
MiniMaxProvider,
OllamaProvider,
OpenAIProvider,
SGLangProvider,
@@ -131,6 +132,32 @@ async def test_groq():
assert models[0].handle == f"{provider.name}/{models[0].model}"
@pytest.mark.asyncio
async def test_minimax():
"""Test MiniMax provider - uses hardcoded model list, no API key required."""
provider = MiniMaxProvider(name="minimax")
models = await provider.list_llm_models_async()
# Should have exactly 3 models: M2.1, M2.1-lightning, M2
assert len(models) == 3
# Verify model properties
model_names = {m.model for m in models}
assert "MiniMax-M2.1" in model_names
assert "MiniMax-M2.1-lightning" in model_names
assert "MiniMax-M2" in model_names
# Verify handle format
for model in models:
assert model.handle == f"{provider.name}/{model.model}"
# All MiniMax models have 200K context window
assert model.context_window == 200000
# All MiniMax models have 128K max output
assert model.max_tokens == 128000
# All use minimax endpoint type
assert model.model_endpoint_type == "minimax"
@pytest.mark.skipif(model_settings.azure_api_key is None, reason="Only run if AZURE_API_KEY is set.")
@pytest.mark.asyncio
async def test_azure():