Files
letta-server/tests/test_providers.py
Sarah Wooders b34ad43691 feat: add minimax byok to ui (#9101)
* fix: patch minimax

* feat: add frontend changes for minimax

* add logo, fix backend

* better check for is minimax

* more references fixed for minimax

* start revering unnecessary changes

* revert backend changes, just ui

* fix minimax fully

* fix test

* add key to deploy action

---------

Co-authored-by: Ari Webb <ari@letta.com>
Co-authored-by: Ari Webb <arijwebb@gmail.com>
2026-01-29 12:44:04 -08:00

461 lines
19 KiB
Python

from typing import Literal, Optional
import pytest
from letta.schemas.enums import AgentType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.providers import (
AnthropicProvider,
AzureProvider,
DeepSeekProvider,
GoogleAIProvider,
GoogleVertexProvider,
GroqProvider,
MiniMaxProvider,
OllamaProvider,
OpenAIProvider,
SGLangProvider,
TogetherProvider,
VLLMProvider,
ZAIProvider,
)
from letta.schemas.secret import Secret
from letta.settings import model_settings
def test_openai():
provider = OpenAIProvider(
name="openai",
api_key_enc=Secret.from_plaintext(model_settings.openai_api_key),
base_url=model_settings.openai_api_base,
)
models = provider.list_llm_models()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
embedding_models = provider.list_embedding_models()
assert len(embedding_models) > 0
assert embedding_models[0].handle == f"{provider.name}/{embedding_models[0].embedding_model}"
@pytest.mark.asyncio
async def test_openai_async():
provider = OpenAIProvider(
name="openai",
api_key_enc=Secret.from_plaintext(model_settings.openai_api_key),
base_url=model_settings.openai_api_base,
)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
embedding_models = await provider.list_embedding_models_async()
assert len(embedding_models) > 0
assert embedding_models[0].handle == f"{provider.name}/{embedding_models[0].embedding_model}"
@pytest.mark.asyncio
async def test_anthropic():
provider = AnthropicProvider(
name="anthropic",
api_key_enc=Secret.from_plaintext(model_settings.anthropic_api_key),
)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
@pytest.mark.asyncio
async def test_googleai():
api_key = model_settings.gemini_api_key
assert api_key is not None
provider = GoogleAIProvider(
name="google_ai",
api_key_enc=Secret.from_plaintext(api_key),
)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
embedding_models = await provider.list_embedding_models_async()
assert len(embedding_models) > 0
assert embedding_models[0].handle == f"{provider.name}/{embedding_models[0].embedding_model}"
@pytest.mark.asyncio
async def test_google_vertex():
provider = GoogleVertexProvider(
name="google_vertex",
google_cloud_project=model_settings.google_cloud_project,
google_cloud_location=model_settings.google_cloud_location,
)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
embedding_models = await provider.list_embedding_models_async()
assert len(embedding_models) > 0
assert embedding_models[0].handle == f"{provider.name}/{embedding_models[0].embedding_model}"
@pytest.mark.skipif(model_settings.deepseek_api_key is None, reason="Only run if DEEPSEEK_API_KEY is set.")
@pytest.mark.asyncio
async def test_deepseek():
provider = DeepSeekProvider(name="deepseek", api_key_enc=Secret.from_plaintext(model_settings.deepseek_api_key))
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
@pytest.mark.skipif(model_settings.zai_api_key is None, reason="Only run if ZAI_API_KEY is set.")
@pytest.mark.asyncio
async def test_zai():
provider = ZAIProvider(
name="zai",
api_key_enc=Secret.from_plaintext(model_settings.zai_api_key),
base_url=model_settings.zai_base_url,
)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
@pytest.mark.skipif(model_settings.groq_api_key is None, reason="Only run if GROQ_API_KEY is set.")
@pytest.mark.asyncio
async def test_groq():
provider = GroqProvider(
name="groq",
api_key_enc=Secret.from_plaintext(model_settings.groq_api_key),
)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
@pytest.mark.asyncio
async def test_minimax():
"""Test MiniMax provider - uses hardcoded model list, no API key required."""
provider = MiniMaxProvider(name="minimax")
models = await provider.list_llm_models_async()
# Should have exactly 3 models: M2.1, M2.1-lightning, M2
assert len(models) == 3
# Verify model properties
model_names = {m.model for m in models}
assert "MiniMax-M2.1" in model_names
assert "MiniMax-M2.1-lightning" in model_names
assert "MiniMax-M2" in model_names
# Verify handle format
for model in models:
assert model.handle == f"{provider.name}/{model.model}"
# All MiniMax models have 200K context window
assert model.context_window == 200000
# All MiniMax models have 128K max output
assert model.max_tokens == 128000
# MiniMax uses Anthropic-compatible API endpoint
assert model.model_endpoint_type == "minimax"
@pytest.mark.skipif(model_settings.azure_api_key is None, reason="Only run if AZURE_API_KEY is set.")
@pytest.mark.asyncio
async def test_azure():
provider = AzureProvider(
name="azure",
api_key_enc=Secret.from_plaintext(model_settings.azure_api_key),
base_url=model_settings.azure_base_url,
api_version=model_settings.azure_api_version,
)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
embedding_models = await provider.list_embedding_models_async()
assert len(embedding_models) > 0
assert embedding_models[0].handle == f"{provider.name}/{embedding_models[0].embedding_model}"
@pytest.mark.skipif(model_settings.together_api_key is None, reason="Only run if TOGETHER_API_KEY is set.")
@pytest.mark.asyncio
async def test_together():
provider = TogetherProvider(
name="together",
api_key_enc=Secret.from_plaintext(model_settings.together_api_key),
default_prompt_formatter=model_settings.default_prompt_formatter,
)
models = await provider.list_llm_models_async()
assert len(models) > 0
# Handle may be different from raw model name due to LLM_HANDLE_OVERRIDES
assert models[0].handle.startswith(f"{provider.name}/")
# Verify the handle is properly constructed via get_handle method
assert models[0].handle == provider.get_handle(models[0].model)
# TODO: We don't have embedding models on together for CI
# embedding_models = provider.list_embedding_models()
# assert len(embedding_models) > 0
# assert embedding_models[0].handle == f"{provider.name}/{embedding_models[0].embedding_model}"
# ===== Local Models =====
@pytest.mark.skipif(model_settings.ollama_base_url is None, reason="Only run if OLLAMA_BASE_URL is set.")
@pytest.mark.asyncio
async def test_ollama():
provider = OllamaProvider(
name="ollama",
base_url=model_settings.ollama_base_url,
default_prompt_formatter=model_settings.default_prompt_formatter,
)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
embedding_models = await provider.list_embedding_models_async()
assert len(embedding_models) > 0
assert embedding_models[0].handle == f"{provider.name}/{embedding_models[0].embedding_model}"
@pytest.mark.skipif(model_settings.vllm_api_base is None, reason="Only run if VLLM_API_BASE is set.")
@pytest.mark.asyncio
async def test_vllm():
provider = VLLMProvider(name="vllm", base_url=model_settings.vllm_api_base)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
embedding_models = await provider.list_embedding_models_async()
assert len(embedding_models) == 0 # embedding models currently not supported by vLLM
@pytest.mark.skipif(model_settings.sglang_api_base is None, reason="Only run if SGLANG_API_BASE is set.")
@pytest.mark.asyncio
async def test_sglang():
provider = SGLangProvider(name="sglang", base_url=model_settings.sglang_api_base)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
embedding_models = await provider.list_embedding_models_async()
assert len(embedding_models) == 0 # embedding models currently not supported by SGLang
# TODO: Add back in, difficulty adding this to CI properly, need boto credentials
# def test_anthropic_bedrock():
# from letta.settings import model_settings
#
# provider = AnthropicBedrockProvider(name="bedrock", aws_region=model_settings.aws_region)
# models = provider.list_llm_models()
# assert len(models) > 0
# assert models[0].handle == f"{provider.name}/{models[0].model}"
#
# embedding_models = provider.list_embedding_models()
# assert len(embedding_models) > 0
# assert embedding_models[0].handle == f"{provider.name}/{embedding_models[0].embedding_model}"
@pytest.mark.asyncio
async def test_custom_anthropic():
provider = AnthropicProvider(
name="custom_anthropic",
api_key_enc=Secret.from_plaintext(model_settings.anthropic_api_key),
)
models = await provider.list_llm_models_async()
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
def test_provider_context_window():
"""Test that providers implement context window methods correctly."""
provider = OpenAIProvider(
name="openai",
api_key_enc=Secret.from_plaintext(model_settings.openai_api_key),
base_url=model_settings.openai_api_base,
)
# Test both sync and async context window methods
context_window = provider.get_model_context_window("gpt-4")
assert context_window is not None
assert isinstance(context_window, int)
assert context_window > 0
@pytest.mark.asyncio
async def test_provider_context_window_async():
"""Test that providers implement async context window methods correctly."""
provider = OpenAIProvider(
name="openai",
api_key_enc=Secret.from_plaintext(model_settings.openai_api_key),
base_url=model_settings.openai_api_base,
)
context_window = await provider.get_model_context_window_async("gpt-4")
assert context_window is not None
assert isinstance(context_window, int)
assert context_window > 0
def test_provider_handle_generation():
"""Test that providers generate handles correctly."""
provider = OpenAIProvider(
name="test_openai",
api_key_enc=Secret.from_plaintext("test_key"),
base_url="https://api.openai.com/v1",
)
# Test LLM handle
llm_handle = provider.get_handle("gpt-4")
assert llm_handle == "test_openai/gpt-4"
# Test embedding handle
embedding_handle = provider.get_handle("text-embedding-ada-002", is_embedding=True)
assert embedding_handle == "test_openai/text-embedding-ada-002"
def test_provider_casting():
"""Test that providers can be cast to their specific subtypes."""
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.providers.base import Provider
base_provider = Provider(
name="test_provider",
provider_type=ProviderType.openai,
provider_category=ProviderCategory.base,
api_key_enc=Secret.from_plaintext("test_key"),
base_url="https://api.openai.com/v1",
)
cast_provider = base_provider.cast_to_subtype()
assert isinstance(cast_provider, OpenAIProvider)
assert cast_provider.name == "test_provider"
assert cast_provider.api_key_enc.get_plaintext() == "test_key"
@pytest.mark.asyncio
async def test_provider_embedding_models_consistency():
"""Test that providers return consistent embedding model formats."""
provider = OpenAIProvider(
name="openai",
api_key_enc=Secret.from_plaintext(model_settings.openai_api_key),
base_url=model_settings.openai_api_base,
)
embedding_models = await provider.list_embedding_models_async()
if embedding_models: # Only test if provider supports embedding models
for model in embedding_models:
assert hasattr(model, "embedding_model")
assert hasattr(model, "embedding_endpoint_type")
assert hasattr(model, "embedding_endpoint")
assert hasattr(model, "embedding_dim")
assert hasattr(model, "handle")
assert model.handle.startswith(f"{provider.name}/")
@pytest.mark.asyncio
async def test_provider_llm_models_consistency():
"""Test that providers return consistent LLM model formats."""
provider = OpenAIProvider(
name="openai",
api_key_enc=Secret.from_plaintext(model_settings.openai_api_key),
base_url=model_settings.openai_api_base,
)
models = await provider.list_llm_models_async()
assert len(models) > 0
for model in models:
assert hasattr(model, "model")
assert hasattr(model, "model_endpoint_type")
assert hasattr(model, "model_endpoint")
assert hasattr(model, "context_window")
assert hasattr(model, "handle")
assert hasattr(model, "provider_name")
assert hasattr(model, "provider_category")
assert model.handle.startswith(f"{provider.name}/")
assert model.provider_name == provider.name
assert model.context_window > 0
@pytest.mark.parametrize(
"handle, agent_type, reasoning, expected_enable_reasoner, expected_put_inner_thoughts_in_kwargs, expected_max_reasoning_tokens, expected_reasoning_effort",
[
# send message agent loop
("openai/gpt-4o-mini", AgentType.memgpt_v2_agent, True, True, True, 0, None),
("openai/gpt-4o-mini", AgentType.memgpt_v2_agent, False, False, False, 0, None),
("openai/o3-mini", AgentType.memgpt_v2_agent, True, True, False, 0, "medium"),
("openai/o3-mini", AgentType.memgpt_v2_agent, False, True, False, 0, "medium"),
("anthropic/claude-3.5-sonnet", AgentType.memgpt_v2_agent, True, True, True, 0, None),
("anthropic/claude-3.5-sonnet", AgentType.memgpt_v2_agent, False, False, False, 0, None),
("anthropic/claude-3-7-sonnet", AgentType.memgpt_v2_agent, True, True, False, 1024, None),
("anthropic/claude-3-7-sonnet", AgentType.memgpt_v2_agent, False, False, False, 0, None),
("anthropic/claude-sonnet-4", AgentType.memgpt_v2_agent, True, True, False, 1024, None),
("anthropic/claude-sonnet-4", AgentType.memgpt_v2_agent, False, False, False, 0, None),
("google_vertex/gemini-2.0-flash", AgentType.memgpt_v2_agent, True, True, True, 0, None),
("google_vertex/gemini-2.0-flash", AgentType.memgpt_v2_agent, False, False, False, 0, None),
("google_vertex/gemini-2.5-flash", AgentType.memgpt_v2_agent, True, True, True, 1024, None),
("google_vertex/gemini-2.5-flash", AgentType.memgpt_v2_agent, False, False, False, 0, None),
("google_vertex/gemini-2.5-pro", AgentType.memgpt_v2_agent, True, True, True, 1024, None),
("google_vertex/gemini-2.5-pro", AgentType.memgpt_v2_agent, True, True, True, 1024, None),
# letta_v1_agent loop
("openai/gpt-4o-mini", AgentType.letta_v1_agent, True, False, False, 0, None),
("openai/gpt-4o-mini", AgentType.letta_v1_agent, False, False, False, 0, None),
("openai/o3-mini", AgentType.letta_v1_agent, True, True, False, 0, "medium"),
("openai/o3-mini", AgentType.letta_v1_agent, False, True, False, 0, "medium"),
("anthropic/claude-3.5-sonnet", AgentType.letta_v1_agent, True, False, False, 0, None),
("anthropic/claude-3.5-sonnet", AgentType.letta_v1_agent, False, False, False, 0, None),
("anthropic/claude-3-7-sonnet", AgentType.letta_v1_agent, True, True, False, 1024, None),
("anthropic/claude-3-7-sonnet", AgentType.letta_v1_agent, False, False, False, 0, None),
("anthropic/claude-sonnet-4", AgentType.letta_v1_agent, True, True, False, 1024, None),
("anthropic/claude-sonnet-4", AgentType.letta_v1_agent, False, False, False, 0, None),
("google_vertex/gemini-2.0-flash", AgentType.letta_v1_agent, True, False, False, 0, None),
("google_vertex/gemini-2.0-flash", AgentType.letta_v1_agent, False, False, False, 0, None),
("google_vertex/gemini-2.5-flash", AgentType.letta_v1_agent, True, True, False, 1024, None),
("google_vertex/gemini-2.5-flash", AgentType.letta_v1_agent, False, False, False, 0, None),
("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, True, True, False, 1024, None),
("google_vertex/gemini-2.5-pro", AgentType.letta_v1_agent, False, True, False, 1024, None),
],
)
def test_reasoning_toggle_by_provider(
handle: str,
agent_type: AgentType,
reasoning: bool,
expected_enable_reasoner: bool,
expected_put_inner_thoughts_in_kwargs: bool,
expected_max_reasoning_tokens: int,
expected_reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]],
):
model_endpoint_type, model = handle.split("/")
config = LLMConfig(
model_endpoint_type=model_endpoint_type,
model=model,
handle=handle,
context_window=1024,
)
new_config = LLMConfig.apply_reasoning_setting_to_config(config, reasoning=reasoning, agent_type=agent_type)
assert new_config.enable_reasoner == expected_enable_reasoner
assert new_config.put_inner_thoughts_in_kwargs == expected_put_inner_thoughts_in_kwargs
assert new_config.reasoning_effort == expected_reasoning_effort
assert new_config.max_reasoning_tokens == expected_max_reasoning_tokens
def test_codex_default_reasoning_effort():
"""Test that gpt-5-codex defaults to 'medium' reasoning effort, not 'minimal'."""
# Test with apply_reasoning_setting_to_config for v2 agent
config = LLMConfig(
model="gpt-5-codex",
model_endpoint_type="openai",
context_window=272000,
)
# For v2 agent with reasoning=True
new_config = LLMConfig.apply_reasoning_setting_to_config(config, reasoning=True, agent_type=AgentType.memgpt_v2_agent)
assert new_config.reasoning_effort == "medium", "gpt-5-codex should default to 'medium', not 'minimal'"
# For v2 agent with reasoning=False (still can't disable for reasoning models)
new_config = LLMConfig.apply_reasoning_setting_to_config(config, reasoning=False, agent_type=AgentType.memgpt_v2_agent)
assert new_config.reasoning_effort == "medium", "gpt-5-codex should default to 'medium', not 'minimal'"
# For v1 agent with reasoning=True
new_config = LLMConfig.apply_reasoning_setting_to_config(config, reasoning=True, agent_type=AgentType.letta_v1_agent)
assert new_config.reasoning_effort == "medium", "gpt-5-codex should default to 'medium', not 'minimal'"