feat: add MiniMax provider support (#9095)
* feat: add MiniMax provider support Add MiniMax as a new LLM provider using their Anthropic-compatible API. Key implementation details: - Uses standard messages API (not beta) - MiniMax supports thinking blocks natively - Base URL: https://api.minimax.io/anthropic - Models: MiniMax-M2.1, MiniMax-M2.1-lightning, MiniMax-M2 (all 200K context, 128K output) - Temperature clamped to valid range (0.0, 1.0] - All M2.x models treated as reasoning models (support interleaved thinking) Files added: - letta/schemas/providers/minimax.py - MiniMax provider schema - letta/llm_api/minimax_client.py - Client extending AnthropicClient - tests/test_minimax_client.py - Unit tests (13 tests) - tests/model_settings/minimax-m2.1.json - Integration test config 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * chore: regenerate API spec with MiniMax provider 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * chore: use MiniMax-M2.1-lightning for CI tests Switch to the faster/cheaper lightning model variant for integration tests. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * chore: add MINIMAX_API_KEY to deploy-core command Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com> * chore: regenerate web openapi spec with MiniMax provider Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com> 🐾 Generated with [Letta Code](https://letta.com) --------- Co-authored-by: Letta <noreply@letta.com> Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>
This commit is contained in:
committed by
Caren Thomas
parent
221b4e6279
commit
adab8cd9b5
@@ -35719,6 +35719,7 @@
|
||||
"koboldcpp",
|
||||
"vllm",
|
||||
"hugging-face",
|
||||
"minimax",
|
||||
"mistral",
|
||||
"together",
|
||||
"bedrock",
|
||||
@@ -38206,6 +38207,7 @@
|
||||
"koboldcpp",
|
||||
"vllm",
|
||||
"hugging-face",
|
||||
"minimax",
|
||||
"mistral",
|
||||
"together",
|
||||
"bedrock",
|
||||
@@ -39822,6 +39824,7 @@
|
||||
"hugging-face",
|
||||
"letta",
|
||||
"lmstudio_openai",
|
||||
"minimax",
|
||||
"mistral",
|
||||
"ollama",
|
||||
"openai",
|
||||
|
||||
@@ -93,6 +93,13 @@ class LLMClient:
|
||||
put_inner_thoughts_first=put_inner_thoughts_first,
|
||||
actor=actor,
|
||||
)
|
||||
case ProviderType.minimax:
|
||||
from letta.llm_api.minimax_client import MiniMaxClient
|
||||
|
||||
return MiniMaxClient(
|
||||
put_inner_thoughts_first=put_inner_thoughts_first,
|
||||
actor=actor,
|
||||
)
|
||||
case ProviderType.deepseek:
|
||||
from letta.llm_api.deepseek_client import DeepseekClient
|
||||
|
||||
|
||||
188
letta/llm_api/minimax_client.py
Normal file
188
letta/llm_api/minimax_client.py
Normal file
@@ -0,0 +1,188 @@
|
||||
import os
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import anthropic
|
||||
from anthropic import AsyncStream
|
||||
from anthropic.types import Message as AnthropicMessage, RawMessageStreamEvent
|
||||
|
||||
from letta.llm_api.anthropic_client import AnthropicClient
|
||||
from letta.log import get_logger
|
||||
from letta.otel.tracing import trace_method
|
||||
from letta.schemas.enums import AgentType
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.message import Message as PydanticMessage
|
||||
from letta.settings import model_settings
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# MiniMax Anthropic-compatible API base URL
|
||||
MINIMAX_BASE_URL = "https://api.minimax.io/anthropic"
|
||||
|
||||
|
||||
class MiniMaxClient(AnthropicClient):
|
||||
"""
|
||||
MiniMax LLM client using Anthropic-compatible API.
|
||||
|
||||
Key differences from AnthropicClient:
|
||||
- Uses standard messages API (client.messages.create), NOT beta API
|
||||
- Thinking blocks are natively supported without beta headers
|
||||
- Temperature must be in range (0.0, 1.0]
|
||||
- Some Anthropic params are ignored: top_k, stop_sequences, service_tier, etc.
|
||||
|
||||
Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
||||
"""
|
||||
|
||||
@trace_method
|
||||
def _get_anthropic_client(
|
||||
self, llm_config: LLMConfig, async_client: bool = False
|
||||
) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
|
||||
"""Create Anthropic client configured for MiniMax API."""
|
||||
api_key, _, _ = self.get_byok_overrides(llm_config)
|
||||
|
||||
if not api_key:
|
||||
api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY")
|
||||
|
||||
if async_client:
|
||||
if api_key:
|
||||
return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
|
||||
return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL)
|
||||
|
||||
if api_key:
|
||||
return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
|
||||
return anthropic.Anthropic(base_url=MINIMAX_BASE_URL)
|
||||
|
||||
@trace_method
|
||||
async def _get_anthropic_client_async(
|
||||
self, llm_config: LLMConfig, async_client: bool = False
|
||||
) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
|
||||
"""Create Anthropic client configured for MiniMax API (async version)."""
|
||||
api_key, _, _ = await self.get_byok_overrides_async(llm_config)
|
||||
|
||||
if not api_key:
|
||||
api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY")
|
||||
|
||||
if async_client:
|
||||
if api_key:
|
||||
return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
|
||||
return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL)
|
||||
|
||||
if api_key:
|
||||
return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
|
||||
return anthropic.Anthropic(base_url=MINIMAX_BASE_URL)
|
||||
|
||||
@trace_method
|
||||
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
||||
"""
|
||||
Synchronous request to MiniMax API.
|
||||
|
||||
Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks.
|
||||
"""
|
||||
client = self._get_anthropic_client(llm_config, async_client=False)
|
||||
|
||||
# MiniMax uses client.messages.create() - NOT client.beta.messages.create()
|
||||
# Thinking blocks are natively supported without beta headers
|
||||
response: AnthropicMessage = client.messages.create(**request_data)
|
||||
return response.model_dump()
|
||||
|
||||
@trace_method
|
||||
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
||||
"""
|
||||
Asynchronous request to MiniMax API.
|
||||
|
||||
Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks.
|
||||
"""
|
||||
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
||||
|
||||
# MiniMax uses client.messages.create() - NOT client.beta.messages.create()
|
||||
# Thinking blocks are natively supported without beta headers
|
||||
try:
|
||||
response: AnthropicMessage = await client.messages.create(**request_data)
|
||||
return response.model_dump()
|
||||
except ValueError as e:
|
||||
# Handle streaming fallback if needed (similar to Anthropic client)
|
||||
if "streaming is required" in str(e).lower():
|
||||
logger.warning(
|
||||
"[MiniMax] Non-streaming request rejected. Falling back to streaming mode. Error: %s",
|
||||
str(e),
|
||||
)
|
||||
return await self._request_via_streaming(request_data, llm_config, betas=[])
|
||||
raise
|
||||
|
||||
@trace_method
|
||||
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[RawMessageStreamEvent]:
|
||||
"""
|
||||
Asynchronous streaming request to MiniMax API.
|
||||
|
||||
Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks.
|
||||
"""
|
||||
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
||||
request_data["stream"] = True
|
||||
|
||||
# MiniMax uses client.messages.create() - NOT client.beta.messages.create()
|
||||
# No beta headers needed - thinking blocks are natively supported
|
||||
try:
|
||||
return await client.messages.create(**request_data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error streaming MiniMax request: {e}")
|
||||
raise e
|
||||
|
||||
@trace_method
|
||||
def build_request_data(
|
||||
self,
|
||||
agent_type: AgentType,
|
||||
messages: List[PydanticMessage],
|
||||
llm_config: LLMConfig,
|
||||
tools: Optional[List[dict]] = None,
|
||||
force_tool_call: Optional[str] = None,
|
||||
requires_subsequent_tool_call: bool = False,
|
||||
tool_return_truncation_chars: Optional[int] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Build request data for MiniMax API.
|
||||
|
||||
Inherits most logic from AnthropicClient, with MiniMax-specific adjustments:
|
||||
- Temperature must be in range (0.0, 1.0]
|
||||
- Removes extended thinking params (natively supported)
|
||||
"""
|
||||
data = super().build_request_data(
|
||||
agent_type,
|
||||
messages,
|
||||
llm_config,
|
||||
tools,
|
||||
force_tool_call,
|
||||
requires_subsequent_tool_call,
|
||||
tool_return_truncation_chars,
|
||||
)
|
||||
|
||||
# MiniMax temperature range is (0.0, 1.0], recommended value: 1
|
||||
if data.get("temperature") is not None:
|
||||
temp = data["temperature"]
|
||||
if temp <= 0:
|
||||
data["temperature"] = 0.01 # Minimum valid value (exclusive of 0)
|
||||
logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 0.01.")
|
||||
elif temp > 1.0:
|
||||
data["temperature"] = 1.0 # Maximum valid value
|
||||
logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 1.0.")
|
||||
|
||||
# MiniMax ignores these Anthropic-specific parameters, but we can remove them
|
||||
# to avoid potential issues (they won't cause errors, just ignored)
|
||||
# Note: We don't remove them since MiniMax silently ignores them
|
||||
|
||||
return data
|
||||
|
||||
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
|
||||
"""
|
||||
All MiniMax M2.x models support native interleaved thinking.
|
||||
|
||||
Unlike Anthropic where only certain models (Claude 3.7+) support extended thinking,
|
||||
all MiniMax models natively support thinking blocks without beta headers.
|
||||
"""
|
||||
return True
|
||||
|
||||
def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
|
||||
"""MiniMax models support all tool choice modes."""
|
||||
return False
|
||||
|
||||
def supports_structured_output(self, llm_config: LLMConfig) -> bool:
|
||||
"""MiniMax doesn't currently advertise structured output support."""
|
||||
return False
|
||||
@@ -63,6 +63,7 @@ class ProviderType(str, Enum):
|
||||
hugging_face = "hugging-face"
|
||||
letta = "letta"
|
||||
lmstudio_openai = "lmstudio_openai"
|
||||
minimax = "minimax"
|
||||
mistral = "mistral"
|
||||
ollama = "ollama"
|
||||
openai = "openai"
|
||||
|
||||
@@ -43,6 +43,7 @@ class LLMConfig(BaseModel):
|
||||
"koboldcpp",
|
||||
"vllm",
|
||||
"hugging-face",
|
||||
"minimax",
|
||||
"mistral",
|
||||
"together", # completions endpoint
|
||||
"bedrock",
|
||||
|
||||
@@ -42,6 +42,7 @@ class Model(LLMConfig, ModelBase):
|
||||
"koboldcpp",
|
||||
"vllm",
|
||||
"hugging-face",
|
||||
"minimax",
|
||||
"mistral",
|
||||
"together",
|
||||
"bedrock",
|
||||
|
||||
@@ -12,13 +12,14 @@ from .google_vertex import GoogleVertexProvider
|
||||
from .groq import GroqProvider
|
||||
from .letta import LettaProvider
|
||||
from .lmstudio import LMStudioOpenAIProvider
|
||||
from .minimax import MiniMaxProvider
|
||||
from .mistral import MistralProvider
|
||||
from .ollama import OllamaProvider
|
||||
from .openai import OpenAIProvider
|
||||
from .openrouter import OpenRouterProvider
|
||||
from .sglang import SGLangProvider
|
||||
from .together import TogetherProvider
|
||||
from .vllm import VLLMProvider
|
||||
from .sglang import SGLangProvider
|
||||
from .xai import XAIProvider
|
||||
from .zai import ZAIProvider
|
||||
|
||||
@@ -41,6 +42,7 @@ __all__ = [
|
||||
"GroqProvider",
|
||||
"LettaProvider",
|
||||
"LMStudioOpenAIProvider",
|
||||
"MiniMaxProvider",
|
||||
"MistralProvider",
|
||||
"OllamaProvider",
|
||||
"OpenAIProvider",
|
||||
|
||||
87
letta/schemas/providers/minimax.py
Normal file
87
letta/schemas/providers/minimax.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from letta.log import get_logger
|
||||
from letta.schemas.enums import ProviderCategory, ProviderType
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.providers.base import Provider
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# MiniMax model specifications from official documentation
|
||||
# https://platform.minimax.io/docs/guides/models-intro
|
||||
MODEL_LIST = [
|
||||
{
|
||||
"name": "MiniMax-M2.1",
|
||||
"context_window": 200000,
|
||||
"max_output": 128000,
|
||||
"description": "Polyglot code mastery, precision code refactoring (~60 tps)",
|
||||
},
|
||||
{
|
||||
"name": "MiniMax-M2.1-lightning",
|
||||
"context_window": 200000,
|
||||
"max_output": 128000,
|
||||
"description": "Same performance as M2.1, significantly faster (~100 tps)",
|
||||
},
|
||||
{
|
||||
"name": "MiniMax-M2",
|
||||
"context_window": 200000,
|
||||
"max_output": 128000,
|
||||
"description": "Agentic capabilities, advanced reasoning",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class MiniMaxProvider(Provider):
|
||||
"""
|
||||
MiniMax provider using Anthropic-compatible API.
|
||||
|
||||
MiniMax models support native interleaved thinking without requiring beta headers.
|
||||
The API uses the standard messages endpoint (not beta).
|
||||
|
||||
Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
||||
"""
|
||||
|
||||
provider_type: Literal[ProviderType.minimax] = Field(ProviderType.minimax, description="The type of the provider.")
|
||||
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
||||
api_key: str | None = Field(None, description="API key for the MiniMax API.", deprecated=True)
|
||||
base_url: str = Field("https://api.minimax.io/anthropic", description="Base URL for the MiniMax Anthropic-compatible API.")
|
||||
|
||||
def get_default_max_output_tokens(self, model_name: str) -> int:
|
||||
"""Get the default max output tokens for MiniMax models."""
|
||||
# All MiniMax models support 128K output tokens
|
||||
return 128000
|
||||
|
||||
def get_model_context_window_size(self, model_name: str) -> int | None:
|
||||
"""Get the context window size for a MiniMax model."""
|
||||
# All current MiniMax models have 200K context window
|
||||
for model in MODEL_LIST:
|
||||
if model["name"] == model_name:
|
||||
return model["context_window"]
|
||||
# Default fallback
|
||||
return 200000
|
||||
|
||||
async def list_llm_models_async(self) -> list[LLMConfig]:
|
||||
"""
|
||||
Return available MiniMax models.
|
||||
|
||||
MiniMax doesn't have a models listing endpoint, so we use a hardcoded list.
|
||||
"""
|
||||
configs = []
|
||||
for model in MODEL_LIST:
|
||||
configs.append(
|
||||
LLMConfig(
|
||||
model=model["name"],
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=self.base_url,
|
||||
context_window=model["context_window"],
|
||||
handle=self.get_handle(model["name"]),
|
||||
max_tokens=model["max_output"],
|
||||
# MiniMax models support native thinking, similar to Claude's extended thinking
|
||||
put_inner_thoughts_in_kwargs=True,
|
||||
provider_name=self.name,
|
||||
provider_category=self.provider_category,
|
||||
)
|
||||
)
|
||||
return configs
|
||||
@@ -150,6 +150,9 @@ class ModelSettings(BaseSettings):
|
||||
# groq
|
||||
groq_api_key: Optional[str] = None
|
||||
|
||||
# minimax
|
||||
minimax_api_key: Optional[str] = None
|
||||
|
||||
# Bedrock
|
||||
aws_access_key_id: Optional[str] = None
|
||||
aws_secret_access_key: Optional[str] = None
|
||||
|
||||
@@ -189,6 +189,7 @@ all_configs = [
|
||||
"openai-gpt-5.json", # TODO: GPT-5 disabled for now, it sends HiddenReasoningMessages which break the tests.
|
||||
"claude-4-5-sonnet.json",
|
||||
"gemini-2.5-pro.json",
|
||||
"minimax-m2.1-lightning.json",
|
||||
]
|
||||
|
||||
reasoning_configs = [
|
||||
@@ -243,6 +244,10 @@ def is_reasoner_model(model_handle: str, model_settings: dict) -> bool:
|
||||
elif provider_type in ["google_vertex", "google_ai"]:
|
||||
return model.startswith("gemini-2.5-flash") or model.startswith("gemini-2.5-pro") or model.startswith("gemini-3")
|
||||
|
||||
# MiniMax reasoning models (all M2.x models support native interleaved thinking)
|
||||
elif provider_type == "minimax":
|
||||
return model.startswith("MiniMax-M2")
|
||||
|
||||
return False
|
||||
|
||||
|
||||
|
||||
9
tests/model_settings/minimax-m2.1-lightning.json
Normal file
9
tests/model_settings/minimax-m2.1-lightning.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"handle": "minimax/MiniMax-M2.1-lightning",
|
||||
"model_settings": {
|
||||
"provider_type": "minimax",
|
||||
"temperature": 1.0,
|
||||
"max_output_tokens": 4096,
|
||||
"parallel_tool_calls": false
|
||||
}
|
||||
}
|
||||
270
tests/test_minimax_client.py
Normal file
270
tests/test_minimax_client.py
Normal file
@@ -0,0 +1,270 @@
|
||||
"""Unit tests for MiniMax client."""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from letta.llm_api.minimax_client import MINIMAX_BASE_URL, MiniMaxClient
|
||||
from letta.schemas.enums import AgentType
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
|
||||
|
||||
class TestMiniMaxClient:
|
||||
"""Tests for MiniMaxClient."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.client = MiniMaxClient(put_inner_thoughts_first=True)
|
||||
self.llm_config = LLMConfig(
|
||||
model="MiniMax-M2.1",
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
)
|
||||
|
||||
def test_is_reasoning_model_always_true(self):
|
||||
"""All MiniMax models support native interleaved thinking."""
|
||||
assert self.client.is_reasoning_model(self.llm_config) is True
|
||||
|
||||
# Test with different models
|
||||
for model_name in ["MiniMax-M2.1", "MiniMax-M2.1-lightning", "MiniMax-M2"]:
|
||||
config = LLMConfig(
|
||||
model=model_name,
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
)
|
||||
assert self.client.is_reasoning_model(config) is True
|
||||
|
||||
def test_requires_auto_tool_choice(self):
|
||||
"""MiniMax supports all tool choice modes."""
|
||||
assert self.client.requires_auto_tool_choice(self.llm_config) is False
|
||||
|
||||
def test_supports_structured_output(self):
|
||||
"""MiniMax doesn't currently advertise structured output support."""
|
||||
assert self.client.supports_structured_output(self.llm_config) is False
|
||||
|
||||
@patch("letta.llm_api.minimax_client.model_settings")
|
||||
def test_get_anthropic_client_with_api_key(self, mock_settings):
|
||||
"""Test client creation with API key."""
|
||||
mock_settings.minimax_api_key = "test-api-key"
|
||||
|
||||
with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic:
|
||||
mock_anthropic.Anthropic.return_value = MagicMock()
|
||||
|
||||
# Mock BYOK to return no override
|
||||
self.client.get_byok_overrides = MagicMock(return_value=(None, None, None))
|
||||
|
||||
client = self.client._get_anthropic_client(self.llm_config, async_client=False)
|
||||
|
||||
mock_anthropic.Anthropic.assert_called_once_with(
|
||||
api_key="test-api-key",
|
||||
base_url=MINIMAX_BASE_URL,
|
||||
)
|
||||
|
||||
@patch("letta.llm_api.minimax_client.model_settings")
|
||||
def test_get_anthropic_client_async(self, mock_settings):
|
||||
"""Test async client creation."""
|
||||
mock_settings.minimax_api_key = "test-api-key"
|
||||
|
||||
with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic:
|
||||
mock_anthropic.AsyncAnthropic.return_value = MagicMock()
|
||||
|
||||
# Mock BYOK to return no override
|
||||
self.client.get_byok_overrides = MagicMock(return_value=(None, None, None))
|
||||
|
||||
client = self.client._get_anthropic_client(self.llm_config, async_client=True)
|
||||
|
||||
mock_anthropic.AsyncAnthropic.assert_called_once_with(
|
||||
api_key="test-api-key",
|
||||
base_url=MINIMAX_BASE_URL,
|
||||
)
|
||||
|
||||
|
||||
class TestMiniMaxClientTemperatureClamping:
|
||||
"""Tests for temperature clamping in build_request_data."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.client = MiniMaxClient(put_inner_thoughts_first=True)
|
||||
self.llm_config = LLMConfig(
|
||||
model="MiniMax-M2.1",
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
@patch.object(MiniMaxClient, "build_request_data")
|
||||
def test_temperature_clamping_is_applied(self, mock_build):
|
||||
"""Verify build_request_data is called for temperature clamping."""
|
||||
# This is a basic test to ensure the method exists and can be called
|
||||
mock_build.return_value = {"temperature": 0.7}
|
||||
result = self.client.build_request_data(
|
||||
agent_type=AgentType.letta_v1_agent,
|
||||
messages=[],
|
||||
llm_config=self.llm_config,
|
||||
)
|
||||
mock_build.assert_called_once()
|
||||
|
||||
def test_temperature_zero_clamped(self):
|
||||
"""Test that temperature=0 is clamped to 0.01."""
|
||||
config = LLMConfig(
|
||||
model="MiniMax-M2.1",
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
temperature=0,
|
||||
)
|
||||
|
||||
# Mock the parent class method to return a basic dict
|
||||
with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
|
||||
mock_parent.return_value = {"temperature": 0, "model": "MiniMax-M2.1"}
|
||||
|
||||
result = self.client.build_request_data(
|
||||
agent_type=AgentType.letta_v1_agent,
|
||||
messages=[],
|
||||
llm_config=config,
|
||||
)
|
||||
|
||||
# Temperature should be clamped to 0.01
|
||||
assert result["temperature"] == 0.01
|
||||
|
||||
def test_temperature_negative_clamped(self):
|
||||
"""Test that negative temperature is clamped to 0.01."""
|
||||
config = LLMConfig(
|
||||
model="MiniMax-M2.1",
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
temperature=-0.5,
|
||||
)
|
||||
|
||||
with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
|
||||
mock_parent.return_value = {"temperature": -0.5, "model": "MiniMax-M2.1"}
|
||||
|
||||
result = self.client.build_request_data(
|
||||
agent_type=AgentType.letta_v1_agent,
|
||||
messages=[],
|
||||
llm_config=config,
|
||||
)
|
||||
|
||||
assert result["temperature"] == 0.01
|
||||
|
||||
def test_temperature_above_one_clamped(self):
|
||||
"""Test that temperature > 1.0 is clamped to 1.0."""
|
||||
config = LLMConfig(
|
||||
model="MiniMax-M2.1",
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
temperature=1.5,
|
||||
)
|
||||
|
||||
with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
|
||||
mock_parent.return_value = {"temperature": 1.5, "model": "MiniMax-M2.1"}
|
||||
|
||||
result = self.client.build_request_data(
|
||||
agent_type=AgentType.letta_v1_agent,
|
||||
messages=[],
|
||||
llm_config=config,
|
||||
)
|
||||
|
||||
assert result["temperature"] == 1.0
|
||||
|
||||
def test_temperature_valid_not_modified(self):
|
||||
"""Test that valid temperature values are not modified."""
|
||||
config = LLMConfig(
|
||||
model="MiniMax-M2.1",
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
|
||||
mock_parent.return_value = {"temperature": 0.7, "model": "MiniMax-M2.1"}
|
||||
|
||||
result = self.client.build_request_data(
|
||||
agent_type=AgentType.letta_v1_agent,
|
||||
messages=[],
|
||||
llm_config=config,
|
||||
)
|
||||
|
||||
assert result["temperature"] == 0.7
|
||||
|
||||
|
||||
class TestMiniMaxClientUsesNonBetaAPI:
|
||||
"""Tests to verify MiniMax client uses non-beta API."""
|
||||
|
||||
def test_request_uses_messages_not_beta(self):
|
||||
"""Verify request() uses client.messages.create, not client.beta.messages.create."""
|
||||
client = MiniMaxClient(put_inner_thoughts_first=True)
|
||||
llm_config = LLMConfig(
|
||||
model="MiniMax-M2.1",
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
)
|
||||
|
||||
with patch.object(client, "_get_anthropic_client") as mock_get_client:
|
||||
mock_anthropic_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]}
|
||||
mock_anthropic_client.messages.create.return_value = mock_response
|
||||
mock_get_client.return_value = mock_anthropic_client
|
||||
|
||||
result = client.request({"model": "MiniMax-M2.1"}, llm_config)
|
||||
|
||||
# Verify messages.create was called (not beta.messages.create)
|
||||
mock_anthropic_client.messages.create.assert_called_once()
|
||||
# Verify beta was NOT accessed
|
||||
assert not hasattr(mock_anthropic_client, "beta") or not mock_anthropic_client.beta.messages.create.called
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_request_async_uses_messages_not_beta(self):
|
||||
"""Verify request_async() uses client.messages.create, not client.beta.messages.create."""
|
||||
client = MiniMaxClient(put_inner_thoughts_first=True)
|
||||
llm_config = LLMConfig(
|
||||
model="MiniMax-M2.1",
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
)
|
||||
|
||||
with patch.object(client, "_get_anthropic_client_async") as mock_get_client:
|
||||
mock_anthropic_client = AsyncMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]}
|
||||
mock_anthropic_client.messages.create.return_value = mock_response
|
||||
mock_get_client.return_value = mock_anthropic_client
|
||||
|
||||
result = await client.request_async({"model": "MiniMax-M2.1"}, llm_config)
|
||||
|
||||
# Verify messages.create was called (not beta.messages.create)
|
||||
mock_anthropic_client.messages.create.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_async_uses_messages_not_beta(self):
|
||||
"""Verify stream_async() uses client.messages.create, not client.beta.messages.create."""
|
||||
client = MiniMaxClient(put_inner_thoughts_first=True)
|
||||
llm_config = LLMConfig(
|
||||
model="MiniMax-M2.1",
|
||||
model_endpoint_type="minimax",
|
||||
model_endpoint=MINIMAX_BASE_URL,
|
||||
context_window=200000,
|
||||
)
|
||||
|
||||
with patch.object(client, "_get_anthropic_client_async") as mock_get_client:
|
||||
mock_anthropic_client = AsyncMock()
|
||||
mock_stream = AsyncMock()
|
||||
mock_anthropic_client.messages.create.return_value = mock_stream
|
||||
mock_get_client.return_value = mock_anthropic_client
|
||||
|
||||
result = await client.stream_async({"model": "MiniMax-M2.1"}, llm_config)
|
||||
|
||||
# Verify messages.create was called (not beta.messages.create)
|
||||
mock_anthropic_client.messages.create.assert_called_once()
|
||||
# Verify stream=True was set
|
||||
call_kwargs = mock_anthropic_client.messages.create.call_args[1]
|
||||
assert call_kwargs.get("stream") is True
|
||||
@@ -11,6 +11,7 @@ from letta.schemas.providers import (
|
||||
GoogleAIProvider,
|
||||
GoogleVertexProvider,
|
||||
GroqProvider,
|
||||
MiniMaxProvider,
|
||||
OllamaProvider,
|
||||
OpenAIProvider,
|
||||
SGLangProvider,
|
||||
@@ -131,6 +132,32 @@ async def test_groq():
|
||||
assert models[0].handle == f"{provider.name}/{models[0].model}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_minimax():
|
||||
"""Test MiniMax provider - uses hardcoded model list, no API key required."""
|
||||
provider = MiniMaxProvider(name="minimax")
|
||||
models = await provider.list_llm_models_async()
|
||||
|
||||
# Should have exactly 3 models: M2.1, M2.1-lightning, M2
|
||||
assert len(models) == 3
|
||||
|
||||
# Verify model properties
|
||||
model_names = {m.model for m in models}
|
||||
assert "MiniMax-M2.1" in model_names
|
||||
assert "MiniMax-M2.1-lightning" in model_names
|
||||
assert "MiniMax-M2" in model_names
|
||||
|
||||
# Verify handle format
|
||||
for model in models:
|
||||
assert model.handle == f"{provider.name}/{model.model}"
|
||||
# All MiniMax models have 200K context window
|
||||
assert model.context_window == 200000
|
||||
# All MiniMax models have 128K max output
|
||||
assert model.max_tokens == 128000
|
||||
# All use minimax endpoint type
|
||||
assert model.model_endpoint_type == "minimax"
|
||||
|
||||
|
||||
@pytest.mark.skipif(model_settings.azure_api_key is None, reason="Only run if AZURE_API_KEY is set.")
|
||||
@pytest.mark.asyncio
|
||||
async def test_azure():
|
||||
|
||||
Reference in New Issue
Block a user