feat: add MiniMax provider support (#9095)

* feat: add MiniMax provider support Add MiniMax as a new LLM provider using their Anthropic-compatible API. Key implementation details: - Uses standard messages API (not beta) - MiniMax supports thinking blocks natively - Base URL: https://api.minimax.io/anthropic - Models: MiniMax-M2.1, MiniMax-M2.1-lightning, MiniMax-M2 (all 200K context, 128K output) - Temperature clamped to valid range (0.0, 1.0] - All M2.x models treated as reasoning models (support interleaved thinking) Files added: - letta/schemas/providers/minimax.py - MiniMax provider schema - letta/llm_api/minimax_client.py - Client extending AnthropicClient - tests/test_minimax_client.py - Unit tests (13 tests) - tests/model_settings/minimax-m2.1.json - Integration test config 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * chore: regenerate API spec with MiniMax provider 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * chore: use MiniMax-M2.1-lightning for CI tests Switch to the faster/cheaper lightning model variant for integration tests. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * chore: add MINIMAX_API_KEY to deploy-core command Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com> * chore: regenerate web openapi spec with MiniMax provider Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com> 🐾 Generated with [Letta Code](https://letta.com) --------- Co-authored-by: Letta <noreply@letta.com> Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>
2026-01-25 19:15:25 -08:00
parent 221b4e6279
commit adab8cd9b5
13 changed files with 605 additions and 1 deletions
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -35719,6 +35719,7 @@
              "koboldcpp",
              "vllm",
              "hugging-face",
+              "minimax",
              "mistral",
              "together",
              "bedrock",
@@ -38206,6 +38207,7 @@
              "koboldcpp",
              "vllm",
              "hugging-face",
+              "minimax",
              "mistral",
              "together",
              "bedrock",
@@ -39822,6 +39824,7 @@
          "hugging-face",
          "letta",
          "lmstudio_openai",
+          "minimax",
          "mistral",
          "ollama",
          "openai",
--- a/letta/llm_api/llm_client.py
+++ b/letta/llm_api/llm_client.py
@@ -93,6 +93,13 @@ class LLMClient:
                    put_inner_thoughts_first=put_inner_thoughts_first,
                    actor=actor,
                )
+            case ProviderType.minimax:
+                from letta.llm_api.minimax_client import MiniMaxClient
+
+                return MiniMaxClient(
+                    put_inner_thoughts_first=put_inner_thoughts_first,
+                    actor=actor,
+                )
            case ProviderType.deepseek:
                from letta.llm_api.deepseek_client import DeepseekClient

--- a/letta/llm_api/minimax_client.py
+++ b/letta/llm_api/minimax_client.py
@@ -0,0 +1,188 @@
+import os
+from typing import List, Optional, Union
+
+import anthropic
+from anthropic import AsyncStream
+from anthropic.types import Message as AnthropicMessage, RawMessageStreamEvent
+
+from letta.llm_api.anthropic_client import AnthropicClient
+from letta.log import get_logger
+from letta.otel.tracing import trace_method
+from letta.schemas.enums import AgentType
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.message import Message as PydanticMessage
+from letta.settings import model_settings
+
+logger = get_logger(__name__)
+
+# MiniMax Anthropic-compatible API base URL
+MINIMAX_BASE_URL = "https://api.minimax.io/anthropic"
+
+
+class MiniMaxClient(AnthropicClient):
+    """
+    MiniMax LLM client using Anthropic-compatible API.
+
+    Key differences from AnthropicClient:
+    - Uses standard messages API (client.messages.create), NOT beta API
+    - Thinking blocks are natively supported without beta headers
+    - Temperature must be in range (0.0, 1.0]
+    - Some Anthropic params are ignored: top_k, stop_sequences, service_tier, etc.
+
+    Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api
+    """
+
+    @trace_method
+    def _get_anthropic_client(
+        self, llm_config: LLMConfig, async_client: bool = False
+    ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
+        """Create Anthropic client configured for MiniMax API."""
+        api_key, _, _ = self.get_byok_overrides(llm_config)
+
+        if not api_key:
+            api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY")
+
+        if async_client:
+            if api_key:
+                return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
+            return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL)
+
+        if api_key:
+            return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
+        return anthropic.Anthropic(base_url=MINIMAX_BASE_URL)
+
+    @trace_method
+    async def _get_anthropic_client_async(
+        self, llm_config: LLMConfig, async_client: bool = False
+    ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
+        """Create Anthropic client configured for MiniMax API (async version)."""
+        api_key, _, _ = await self.get_byok_overrides_async(llm_config)
+
+        if not api_key:
+            api_key = model_settings.minimax_api_key or os.environ.get("MINIMAX_API_KEY")
+
+        if async_client:
+            if api_key:
+                return anthropic.AsyncAnthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
+            return anthropic.AsyncAnthropic(base_url=MINIMAX_BASE_URL)
+
+        if api_key:
+            return anthropic.Anthropic(api_key=api_key, base_url=MINIMAX_BASE_URL)
+        return anthropic.Anthropic(base_url=MINIMAX_BASE_URL)
+
+    @trace_method
+    def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
+        """
+        Synchronous request to MiniMax API.
+
+        Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks.
+        """
+        client = self._get_anthropic_client(llm_config, async_client=False)
+
+        # MiniMax uses client.messages.create() - NOT client.beta.messages.create()
+        # Thinking blocks are natively supported without beta headers
+        response: AnthropicMessage = client.messages.create(**request_data)
+        return response.model_dump()
+
+    @trace_method
+    async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
+        """
+        Asynchronous request to MiniMax API.
+
+        Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks.
+        """
+        client = await self._get_anthropic_client_async(llm_config, async_client=True)
+
+        # MiniMax uses client.messages.create() - NOT client.beta.messages.create()
+        # Thinking blocks are natively supported without beta headers
+        try:
+            response: AnthropicMessage = await client.messages.create(**request_data)
+            return response.model_dump()
+        except ValueError as e:
+            # Handle streaming fallback if needed (similar to Anthropic client)
+            if "streaming is required" in str(e).lower():
+                logger.warning(
+                    "[MiniMax] Non-streaming request rejected. Falling back to streaming mode. Error: %s",
+                    str(e),
+                )
+                return await self._request_via_streaming(request_data, llm_config, betas=[])
+            raise
+
+    @trace_method
+    async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[RawMessageStreamEvent]:
+        """
+        Asynchronous streaming request to MiniMax API.
+
+        Uses standard messages API (NOT beta) - MiniMax natively supports thinking blocks.
+        """
+        client = await self._get_anthropic_client_async(llm_config, async_client=True)
+        request_data["stream"] = True
+
+        # MiniMax uses client.messages.create() - NOT client.beta.messages.create()
+        # No beta headers needed - thinking blocks are natively supported
+        try:
+            return await client.messages.create(**request_data)
+        except Exception as e:
+            logger.error(f"Error streaming MiniMax request: {e}")
+            raise e
+
+    @trace_method
+    def build_request_data(
+        self,
+        agent_type: AgentType,
+        messages: List[PydanticMessage],
+        llm_config: LLMConfig,
+        tools: Optional[List[dict]] = None,
+        force_tool_call: Optional[str] = None,
+        requires_subsequent_tool_call: bool = False,
+        tool_return_truncation_chars: Optional[int] = None,
+    ) -> dict:
+        """
+        Build request data for MiniMax API.
+
+        Inherits most logic from AnthropicClient, with MiniMax-specific adjustments:
+        - Temperature must be in range (0.0, 1.0]
+        - Removes extended thinking params (natively supported)
+        """
+        data = super().build_request_data(
+            agent_type,
+            messages,
+            llm_config,
+            tools,
+            force_tool_call,
+            requires_subsequent_tool_call,
+            tool_return_truncation_chars,
+        )
+
+        # MiniMax temperature range is (0.0, 1.0], recommended value: 1
+        if data.get("temperature") is not None:
+            temp = data["temperature"]
+            if temp <= 0:
+                data["temperature"] = 0.01  # Minimum valid value (exclusive of 0)
+                logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 0.01.")
+            elif temp > 1.0:
+                data["temperature"] = 1.0  # Maximum valid value
+                logger.warning(f"[MiniMax] Temperature {temp} is invalid. Clamped to 1.0.")
+
+        # MiniMax ignores these Anthropic-specific parameters, but we can remove them
+        # to avoid potential issues (they won't cause errors, just ignored)
+        # Note: We don't remove them since MiniMax silently ignores them
+
+        return data
+
+    def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
+        """
+        All MiniMax M2.x models support native interleaved thinking.
+
+        Unlike Anthropic where only certain models (Claude 3.7+) support extended thinking,
+        all MiniMax models natively support thinking blocks without beta headers.
+        """
+        return True
+
+    def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
+        """MiniMax models support all tool choice modes."""
+        return False
+
+    def supports_structured_output(self, llm_config: LLMConfig) -> bool:
+        """MiniMax doesn't currently advertise structured output support."""
+        return False
--- a/letta/schemas/enums.py
+++ b/letta/schemas/enums.py
@@ -63,6 +63,7 @@ class ProviderType(str, Enum):
    hugging_face = "hugging-face"
    letta = "letta"
    lmstudio_openai = "lmstudio_openai"
+    minimax = "minimax"
    mistral = "mistral"
    ollama = "ollama"
    openai = "openai"
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -43,6 +43,7 @@ class LLMConfig(BaseModel):
        "koboldcpp",
        "vllm",
        "hugging-face",
+        "minimax",
        "mistral",
        "together",  # completions endpoint
        "bedrock",
--- a/letta/schemas/model.py
+++ b/letta/schemas/model.py
@@ -42,6 +42,7 @@ class Model(LLMConfig, ModelBase):
        "koboldcpp",
        "vllm",
        "hugging-face",
+        "minimax",
        "mistral",
        "together",
        "bedrock",
--- a/letta/schemas/providers/init.py
+++ b/letta/schemas/providers/init.py
@@ -12,13 +12,14 @@ from .google_vertex import GoogleVertexProvider
 from .groq import GroqProvider
 from .letta import LettaProvider
 from .lmstudio import LMStudioOpenAIProvider
+from .minimax import MiniMaxProvider
 from .mistral import MistralProvider
 from .ollama import OllamaProvider
 from .openai import OpenAIProvider
 from .openrouter import OpenRouterProvider
+from .sglang import SGLangProvider
 from .together import TogetherProvider
 from .vllm import VLLMProvider
-from .sglang import SGLangProvider
 from .xai import XAIProvider
 from .zai import ZAIProvider

@@ -41,6 +42,7 @@ __all__ = [
    "GroqProvider",
    "LettaProvider",
    "LMStudioOpenAIProvider",
+    "MiniMaxProvider",
    "MistralProvider",
    "OllamaProvider",
    "OpenAIProvider",
--- a/letta/schemas/providers/minimax.py
+++ b/letta/schemas/providers/minimax.py
@@ -0,0 +1,87 @@
+from typing import Literal
+
+from pydantic import Field
+
+from letta.log import get_logger
+from letta.schemas.enums import ProviderCategory, ProviderType
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.providers.base import Provider
+
+logger = get_logger(__name__)
+
+# MiniMax model specifications from official documentation
+# https://platform.minimax.io/docs/guides/models-intro
+MODEL_LIST = [
+    {
+        "name": "MiniMax-M2.1",
+        "context_window": 200000,
+        "max_output": 128000,
+        "description": "Polyglot code mastery, precision code refactoring (~60 tps)",
+    },
+    {
+        "name": "MiniMax-M2.1-lightning",
+        "context_window": 200000,
+        "max_output": 128000,
+        "description": "Same performance as M2.1, significantly faster (~100 tps)",
+    },
+    {
+        "name": "MiniMax-M2",
+        "context_window": 200000,
+        "max_output": 128000,
+        "description": "Agentic capabilities, advanced reasoning",
+    },
+]
+
+
+class MiniMaxProvider(Provider):
+    """
+    MiniMax provider using Anthropic-compatible API.
+
+    MiniMax models support native interleaved thinking without requiring beta headers.
+    The API uses the standard messages endpoint (not beta).
+
+    Documentation: https://platform.minimax.io/docs/api-reference/text-anthropic-api
+    """
+
+    provider_type: Literal[ProviderType.minimax] = Field(ProviderType.minimax, description="The type of the provider.")
+    provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
+    api_key: str | None = Field(None, description="API key for the MiniMax API.", deprecated=True)
+    base_url: str = Field("https://api.minimax.io/anthropic", description="Base URL for the MiniMax Anthropic-compatible API.")
+
+    def get_default_max_output_tokens(self, model_name: str) -> int:
+        """Get the default max output tokens for MiniMax models."""
+        # All MiniMax models support 128K output tokens
+        return 128000
+
+    def get_model_context_window_size(self, model_name: str) -> int | None:
+        """Get the context window size for a MiniMax model."""
+        # All current MiniMax models have 200K context window
+        for model in MODEL_LIST:
+            if model["name"] == model_name:
+                return model["context_window"]
+        # Default fallback
+        return 200000
+
+    async def list_llm_models_async(self) -> list[LLMConfig]:
+        """
+        Return available MiniMax models.
+
+        MiniMax doesn't have a models listing endpoint, so we use a hardcoded list.
+        """
+        configs = []
+        for model in MODEL_LIST:
+            configs.append(
+                LLMConfig(
+                    model=model["name"],
+                    model_endpoint_type="minimax",
+                    model_endpoint=self.base_url,
+                    context_window=model["context_window"],
+                    handle=self.get_handle(model["name"]),
+                    max_tokens=model["max_output"],
+                    # MiniMax models support native thinking, similar to Claude's extended thinking
+                    put_inner_thoughts_in_kwargs=True,
+                    provider_name=self.name,
+                    provider_category=self.provider_category,
+                )
+            )
+        return configs
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -150,6 +150,9 @@ class ModelSettings(BaseSettings):
    # groq
    groq_api_key: Optional[str] = None

+    # minimax
+    minimax_api_key: Optional[str] = None
+
    # Bedrock
    aws_access_key_id: Optional[str] = None
    aws_secret_access_key: Optional[str] = None
--- a/tests/integration_test_send_message.py
+++ b/tests/integration_test_send_message.py
@@ -189,6 +189,7 @@ all_configs = [
    "openai-gpt-5.json",  # TODO: GPT-5 disabled for now, it sends HiddenReasoningMessages which break the tests.
    "claude-4-5-sonnet.json",
    "gemini-2.5-pro.json",
+    "minimax-m2.1-lightning.json",
 ]

 reasoning_configs = [
@@ -243,6 +244,10 @@ def is_reasoner_model(model_handle: str, model_settings: dict) -> bool:
    elif provider_type in ["google_vertex", "google_ai"]:
        return model.startswith("gemini-2.5-flash") or model.startswith("gemini-2.5-pro") or model.startswith("gemini-3")

+    # MiniMax reasoning models (all M2.x models support native interleaved thinking)
+    elif provider_type == "minimax":
+        return model.startswith("MiniMax-M2")
+
    return False


--- a/tests/model_settings/minimax-m2.1-lightning.json
+++ b/tests/model_settings/minimax-m2.1-lightning.json
@@ -0,0 +1,9 @@
+{
+  "handle": "minimax/MiniMax-M2.1-lightning",
+  "model_settings": {
+    "provider_type": "minimax",
+    "temperature": 1.0,
+    "max_output_tokens": 4096,
+    "parallel_tool_calls": false
+  }
+}
--- a/tests/test_minimax_client.py
+++ b/tests/test_minimax_client.py
@@ -0,0 +1,270 @@
+"""Unit tests for MiniMax client."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from letta.llm_api.minimax_client import MINIMAX_BASE_URL, MiniMaxClient
+from letta.schemas.enums import AgentType
+from letta.schemas.llm_config import LLMConfig
+
+
+class TestMiniMaxClient:
+    """Tests for MiniMaxClient."""
+
+    def setup_method(self):
+        """Set up test fixtures."""
+        self.client = MiniMaxClient(put_inner_thoughts_first=True)
+        self.llm_config = LLMConfig(
+            model="MiniMax-M2.1",
+            model_endpoint_type="minimax",
+            model_endpoint=MINIMAX_BASE_URL,
+            context_window=200000,
+        )
+
+    def test_is_reasoning_model_always_true(self):
+        """All MiniMax models support native interleaved thinking."""
+        assert self.client.is_reasoning_model(self.llm_config) is True
+
+        # Test with different models
+        for model_name in ["MiniMax-M2.1", "MiniMax-M2.1-lightning", "MiniMax-M2"]:
+            config = LLMConfig(
+                model=model_name,
+                model_endpoint_type="minimax",
+                model_endpoint=MINIMAX_BASE_URL,
+                context_window=200000,
+            )
+            assert self.client.is_reasoning_model(config) is True
+
+    def test_requires_auto_tool_choice(self):
+        """MiniMax supports all tool choice modes."""
+        assert self.client.requires_auto_tool_choice(self.llm_config) is False
+
+    def test_supports_structured_output(self):
+        """MiniMax doesn't currently advertise structured output support."""
+        assert self.client.supports_structured_output(self.llm_config) is False
+
+    @patch("letta.llm_api.minimax_client.model_settings")
+    def test_get_anthropic_client_with_api_key(self, mock_settings):
+        """Test client creation with API key."""
+        mock_settings.minimax_api_key = "test-api-key"
+
+        with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic:
+            mock_anthropic.Anthropic.return_value = MagicMock()
+
+            # Mock BYOK to return no override
+            self.client.get_byok_overrides = MagicMock(return_value=(None, None, None))
+
+            client = self.client._get_anthropic_client(self.llm_config, async_client=False)
+
+            mock_anthropic.Anthropic.assert_called_once_with(
+                api_key="test-api-key",
+                base_url=MINIMAX_BASE_URL,
+            )
+
+    @patch("letta.llm_api.minimax_client.model_settings")
+    def test_get_anthropic_client_async(self, mock_settings):
+        """Test async client creation."""
+        mock_settings.minimax_api_key = "test-api-key"
+
+        with patch("letta.llm_api.minimax_client.anthropic") as mock_anthropic:
+            mock_anthropic.AsyncAnthropic.return_value = MagicMock()
+
+            # Mock BYOK to return no override
+            self.client.get_byok_overrides = MagicMock(return_value=(None, None, None))
+
+            client = self.client._get_anthropic_client(self.llm_config, async_client=True)
+
+            mock_anthropic.AsyncAnthropic.assert_called_once_with(
+                api_key="test-api-key",
+                base_url=MINIMAX_BASE_URL,
+            )
+
+
+class TestMiniMaxClientTemperatureClamping:
+    """Tests for temperature clamping in build_request_data."""
+
+    def setup_method(self):
+        """Set up test fixtures."""
+        self.client = MiniMaxClient(put_inner_thoughts_first=True)
+        self.llm_config = LLMConfig(
+            model="MiniMax-M2.1",
+            model_endpoint_type="minimax",
+            model_endpoint=MINIMAX_BASE_URL,
+            context_window=200000,
+            temperature=0.7,
+        )
+
+    @patch.object(MiniMaxClient, "build_request_data")
+    def test_temperature_clamping_is_applied(self, mock_build):
+        """Verify build_request_data is called for temperature clamping."""
+        # This is a basic test to ensure the method exists and can be called
+        mock_build.return_value = {"temperature": 0.7}
+        result = self.client.build_request_data(
+            agent_type=AgentType.letta_v1_agent,
+            messages=[],
+            llm_config=self.llm_config,
+        )
+        mock_build.assert_called_once()
+
+    def test_temperature_zero_clamped(self):
+        """Test that temperature=0 is clamped to 0.01."""
+        config = LLMConfig(
+            model="MiniMax-M2.1",
+            model_endpoint_type="minimax",
+            model_endpoint=MINIMAX_BASE_URL,
+            context_window=200000,
+            temperature=0,
+        )
+
+        # Mock the parent class method to return a basic dict
+        with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
+            mock_parent.return_value = {"temperature": 0, "model": "MiniMax-M2.1"}
+
+            result = self.client.build_request_data(
+                agent_type=AgentType.letta_v1_agent,
+                messages=[],
+                llm_config=config,
+            )
+
+            # Temperature should be clamped to 0.01
+            assert result["temperature"] == 0.01
+
+    def test_temperature_negative_clamped(self):
+        """Test that negative temperature is clamped to 0.01."""
+        config = LLMConfig(
+            model="MiniMax-M2.1",
+            model_endpoint_type="minimax",
+            model_endpoint=MINIMAX_BASE_URL,
+            context_window=200000,
+            temperature=-0.5,
+        )
+
+        with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
+            mock_parent.return_value = {"temperature": -0.5, "model": "MiniMax-M2.1"}
+
+            result = self.client.build_request_data(
+                agent_type=AgentType.letta_v1_agent,
+                messages=[],
+                llm_config=config,
+            )
+
+            assert result["temperature"] == 0.01
+
+    def test_temperature_above_one_clamped(self):
+        """Test that temperature > 1.0 is clamped to 1.0."""
+        config = LLMConfig(
+            model="MiniMax-M2.1",
+            model_endpoint_type="minimax",
+            model_endpoint=MINIMAX_BASE_URL,
+            context_window=200000,
+            temperature=1.5,
+        )
+
+        with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
+            mock_parent.return_value = {"temperature": 1.5, "model": "MiniMax-M2.1"}
+
+            result = self.client.build_request_data(
+                agent_type=AgentType.letta_v1_agent,
+                messages=[],
+                llm_config=config,
+            )
+
+            assert result["temperature"] == 1.0
+
+    def test_temperature_valid_not_modified(self):
+        """Test that valid temperature values are not modified."""
+        config = LLMConfig(
+            model="MiniMax-M2.1",
+            model_endpoint_type="minimax",
+            model_endpoint=MINIMAX_BASE_URL,
+            context_window=200000,
+            temperature=0.7,
+        )
+
+        with patch.object(MiniMaxClient.__bases__[0], "build_request_data") as mock_parent:
+            mock_parent.return_value = {"temperature": 0.7, "model": "MiniMax-M2.1"}
+
+            result = self.client.build_request_data(
+                agent_type=AgentType.letta_v1_agent,
+                messages=[],
+                llm_config=config,
+            )
+
+            assert result["temperature"] == 0.7
+
+
+class TestMiniMaxClientUsesNonBetaAPI:
+    """Tests to verify MiniMax client uses non-beta API."""
+
+    def test_request_uses_messages_not_beta(self):
+        """Verify request() uses client.messages.create, not client.beta.messages.create."""
+        client = MiniMaxClient(put_inner_thoughts_first=True)
+        llm_config = LLMConfig(
+            model="MiniMax-M2.1",
+            model_endpoint_type="minimax",
+            model_endpoint=MINIMAX_BASE_URL,
+            context_window=200000,
+        )
+
+        with patch.object(client, "_get_anthropic_client") as mock_get_client:
+            mock_anthropic_client = MagicMock()
+            mock_response = MagicMock()
+            mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]}
+            mock_anthropic_client.messages.create.return_value = mock_response
+            mock_get_client.return_value = mock_anthropic_client
+
+            result = client.request({"model": "MiniMax-M2.1"}, llm_config)
+
+            # Verify messages.create was called (not beta.messages.create)
+            mock_anthropic_client.messages.create.assert_called_once()
+            # Verify beta was NOT accessed
+            assert not hasattr(mock_anthropic_client, "beta") or not mock_anthropic_client.beta.messages.create.called
+
+    @pytest.mark.asyncio
+    async def test_request_async_uses_messages_not_beta(self):
+        """Verify request_async() uses client.messages.create, not client.beta.messages.create."""
+        client = MiniMaxClient(put_inner_thoughts_first=True)
+        llm_config = LLMConfig(
+            model="MiniMax-M2.1",
+            model_endpoint_type="minimax",
+            model_endpoint=MINIMAX_BASE_URL,
+            context_window=200000,
+        )
+
+        with patch.object(client, "_get_anthropic_client_async") as mock_get_client:
+            mock_anthropic_client = AsyncMock()
+            mock_response = MagicMock()
+            mock_response.model_dump.return_value = {"content": [{"type": "text", "text": "Hello"}]}
+            mock_anthropic_client.messages.create.return_value = mock_response
+            mock_get_client.return_value = mock_anthropic_client
+
+            result = await client.request_async({"model": "MiniMax-M2.1"}, llm_config)
+
+            # Verify messages.create was called (not beta.messages.create)
+            mock_anthropic_client.messages.create.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_stream_async_uses_messages_not_beta(self):
+        """Verify stream_async() uses client.messages.create, not client.beta.messages.create."""
+        client = MiniMaxClient(put_inner_thoughts_first=True)
+        llm_config = LLMConfig(
+            model="MiniMax-M2.1",
+            model_endpoint_type="minimax",
+            model_endpoint=MINIMAX_BASE_URL,
+            context_window=200000,
+        )
+
+        with patch.object(client, "_get_anthropic_client_async") as mock_get_client:
+            mock_anthropic_client = AsyncMock()
+            mock_stream = AsyncMock()
+            mock_anthropic_client.messages.create.return_value = mock_stream
+            mock_get_client.return_value = mock_anthropic_client
+
+            result = await client.stream_async({"model": "MiniMax-M2.1"}, llm_config)
+
+            # Verify messages.create was called (not beta.messages.create)
+            mock_anthropic_client.messages.create.assert_called_once()
+            # Verify stream=True was set
+            call_kwargs = mock_anthropic_client.messages.create.call_args[1]
+            assert call_kwargs.get("stream") is True
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -11,6 +11,7 @@ from letta.schemas.providers import (
    GoogleAIProvider,
    GoogleVertexProvider,
    GroqProvider,
+    MiniMaxProvider,
    OllamaProvider,
    OpenAIProvider,
    SGLangProvider,
@@ -131,6 +132,32 @@ async def test_groq():
    assert models[0].handle == f"{provider.name}/{models[0].model}"


+@pytest.mark.asyncio
+async def test_minimax():
+    """Test MiniMax provider - uses hardcoded model list, no API key required."""
+    provider = MiniMaxProvider(name="minimax")
+    models = await provider.list_llm_models_async()
+
+    # Should have exactly 3 models: M2.1, M2.1-lightning, M2
+    assert len(models) == 3
+
+    # Verify model properties
+    model_names = {m.model for m in models}
+    assert "MiniMax-M2.1" in model_names
+    assert "MiniMax-M2.1-lightning" in model_names
+    assert "MiniMax-M2" in model_names
+
+    # Verify handle format
+    for model in models:
+        assert model.handle == f"{provider.name}/{model.model}"
+        # All MiniMax models have 200K context window
+        assert model.context_window == 200000
+        # All MiniMax models have 128K max output
+        assert model.max_tokens == 128000
+        # All use minimax endpoint type
+        assert model.model_endpoint_type == "minimax"
+
+
@pytest.mark.skipif(model_settings.azure_api_key is None, reason="Only run if AZURE_API_KEY is set.")
@pytest.mark.asyncio
 async def test_azure():