Files
letta-server/letta/schemas/model.py
jnjpng a5bac26556 fix: add "max" to AnthropicModelSettings effort type (#9754)
* fix: add "max" to AnthropicModelSettings effort type

The effort field on AnthropicModelSettings only accepted
"low" | "medium" | "high", but the LLMConfig.effort field and the
Anthropic API both support "max" for Opus 4.6. This type mismatch
caused Pydantic validation to reject conversation updates that set
effort to "max" (mapped from xhigh in letta-code).

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta Code <noreply@letta.com>

* generate

---------

Co-authored-by: Letta Code <noreply@letta.com>
2026-03-03 18:34:15 -08:00

534 lines
25 KiB
Python

from typing import Annotated, Literal, Optional, Union
from pydantic import BaseModel, Field
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.response_format import ResponseFormatUnion
class ModelBase(BaseModel):
handle: str = Field(..., description="Unique handle for API reference (format: provider_display_name/model_display_name)")
name: str = Field(..., description="The actual model name used by the provider")
display_name: str = Field(..., description="Display name for the model shown in UI")
provider_type: ProviderType = Field(..., description="The type of the provider")
provider_name: str = Field(..., description="The name of the provider")
model_type: Literal["llm", "embedding"] = Field(..., description="Type of model (llm or embedding)")
class Model(LLMConfig, ModelBase):
model_type: Literal["llm"] = Field("llm", description="Type of model (llm or embedding)")
max_context_window: int = Field(..., description="The maximum context window for the model")
# supports_token_streaming: Optional[bool] = Field(None, description="Whether token streaming is supported")
# supports_tool_calling: Optional[bool] = Field(None, description="Whether tool calling is supported")
# Deprecated fields from LLMConfig - use new field names instead
model: str = Field(..., description="Deprecated: Use 'name' field instead. LLM model name.", deprecated=True)
model_endpoint_type: Literal[
"openai",
"anthropic",
"google_ai",
"google_vertex",
"azure",
"groq",
"ollama",
"webui",
"webui-legacy",
"lmstudio",
"lmstudio-legacy",
"lmstudio-chatcompletions",
"llamacpp",
"koboldcpp",
"vllm",
"hugging-face",
"minimax",
"mistral",
"together",
"bedrock",
"deepseek",
"xai",
"zai",
"openrouter",
"chatgpt_oauth",
] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the model.", deprecated=True)
context_window: int = Field(
..., description="Deprecated: Use 'max_context_window' field instead. The context window size for the model.", deprecated=True
)
# Additional deprecated LLMConfig fields - kept for backward compatibility
model_endpoint: Optional[str] = Field(None, description="Deprecated: The endpoint for the model.", deprecated=True)
model_wrapper: Optional[str] = Field(None, description="Deprecated: The wrapper for the model.", deprecated=True)
put_inner_thoughts_in_kwargs: Optional[bool] = Field(
True, description="Deprecated: Puts 'inner_thoughts' as a kwarg in the function call.", deprecated=True
)
temperature: float = Field(0.7, description="Deprecated: The temperature to use when generating text with the model.", deprecated=True)
max_tokens: Optional[int] = Field(None, description="Deprecated: The maximum number of tokens to generate.", deprecated=True)
enable_reasoner: bool = Field(
True,
description="Deprecated: Whether or not the model should use extended thinking if it is a 'reasoning' style model.",
deprecated=True,
)
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] = Field(
None, description="Deprecated: The reasoning effort to use when generating text reasoning models.", deprecated=True
)
max_reasoning_tokens: int = Field(0, description="Deprecated: Configurable thinking budget for extended thinking.", deprecated=True)
frequency_penalty: Optional[float] = Field(
None,
description="Deprecated: Positive values penalize new tokens based on their existing frequency in the text so far.",
deprecated=True,
)
compatibility_type: Optional[Literal["gguf", "mlx"]] = Field(
None, description="Deprecated: The framework compatibility type for the model.", deprecated=True
)
verbosity: Optional[Literal["low", "medium", "high"]] = Field(
None, description="Deprecated: Soft control for how verbose model output should be.", deprecated=True
)
tier: Optional[str] = Field(None, description="Deprecated: The cost tier for the model (cloud only).", deprecated=True)
parallel_tool_calls: Optional[bool] = Field(
False, description="Deprecated: If set to True, enables parallel tool calling.", deprecated=True
)
provider_category: Optional[ProviderCategory] = Field(
None, description="Deprecated: The provider category for the model.", deprecated=True
)
@classmethod
def from_llm_config(cls, llm_config: "LLMConfig") -> "Model":
"""Create a Model instance from an LLMConfig"""
return cls(
# New fields
handle=llm_config.handle or f"{llm_config.provider_name}/{llm_config.model}",
name=llm_config.model,
display_name=llm_config.display_name or llm_config.model,
provider_type=llm_config.model_endpoint_type,
provider_name=llm_config.provider_name or llm_config.model_endpoint_type,
model_type="llm",
max_context_window=llm_config.context_window,
# Deprecated fields (copy from LLMConfig for backward compatibility)
model=llm_config.model,
model_endpoint_type=llm_config.model_endpoint_type,
model_endpoint=llm_config.model_endpoint,
model_wrapper=llm_config.model_wrapper,
context_window=llm_config.context_window,
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
temperature=llm_config.temperature,
max_tokens=llm_config.max_tokens,
enable_reasoner=llm_config.enable_reasoner,
reasoning_effort=llm_config.reasoning_effort,
max_reasoning_tokens=llm_config.max_reasoning_tokens,
effort=llm_config.effort,
frequency_penalty=llm_config.frequency_penalty,
compatibility_type=llm_config.compatibility_type,
verbosity=llm_config.verbosity,
tier=llm_config.tier,
parallel_tool_calls=llm_config.parallel_tool_calls,
provider_category=llm_config.provider_category,
)
@property
def model_settings_schema(self) -> Optional[dict]:
"""Returns the JSON schema for the ModelSettings class corresponding to this model's provider."""
PROVIDER_SETTINGS_MAP = {
ProviderType.openai: OpenAIModelSettings,
ProviderType.anthropic: AnthropicModelSettings,
ProviderType.google_ai: GoogleAIModelSettings,
ProviderType.google_vertex: GoogleVertexModelSettings,
ProviderType.azure: AzureModelSettings,
ProviderType.xai: XAIModelSettings,
ProviderType.zai: ZAIModelSettings,
ProviderType.groq: GroqModelSettings,
ProviderType.deepseek: DeepseekModelSettings,
ProviderType.together: TogetherModelSettings,
ProviderType.bedrock: BedrockModelSettings,
ProviderType.openrouter: OpenRouterModelSettings,
}
settings_class = PROVIDER_SETTINGS_MAP.get(self.provider_type)
return settings_class.model_json_schema() if settings_class else None
class EmbeddingModel(EmbeddingConfig, ModelBase):
model_type: Literal["embedding"] = Field("embedding", description="Type of model (llm or embedding)")
embedding_dim: int = Field(..., description="The dimension of the embedding")
# Deprecated fields from EmbeddingConfig - use new field names instead
embedding_model: str = Field(..., description="Deprecated: Use 'name' field instead. Embedding model name.", deprecated=True)
embedding_endpoint_type: Literal[
"openai",
"anthropic",
"bedrock",
"google_ai",
"google_vertex",
"azure",
"groq",
"ollama",
"webui",
"webui-legacy",
"lmstudio",
"lmstudio-legacy",
"llamacpp",
"koboldcpp",
"vllm",
"hugging-face",
"mistral",
"together",
"pinecone",
] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the embedding model.", deprecated=True)
# Additional deprecated EmbeddingConfig fields - kept for backward compatibility
embedding_endpoint: Optional[str] = Field(None, description="Deprecated: The endpoint for the model.", deprecated=True)
embedding_chunk_size: Optional[int] = Field(300, description="Deprecated: The chunk size of the embedding.", deprecated=True)
batch_size: int = Field(32, description="Deprecated: The maximum batch size for processing embeddings.", deprecated=True)
azure_endpoint: Optional[str] = Field(None, description="Deprecated: The Azure endpoint for the model.", deprecated=True)
azure_version: Optional[str] = Field(None, description="Deprecated: The Azure version for the model.", deprecated=True)
azure_deployment: Optional[str] = Field(None, description="Deprecated: The Azure deployment for the model.", deprecated=True)
@classmethod
def from_embedding_config(cls, embedding_config: "EmbeddingConfig") -> "EmbeddingModel":
"""Create an EmbeddingModel instance from an EmbeddingConfig"""
return cls(
# New fields
handle=embedding_config.handle or f"{embedding_config.embedding_endpoint_type}/{embedding_config.embedding_model}",
name=embedding_config.embedding_model,
display_name=embedding_config.embedding_model,
provider_type=embedding_config.embedding_endpoint_type,
provider_name=embedding_config.embedding_endpoint_type,
model_type="embedding",
embedding_dim=embedding_config.embedding_dim,
# Deprecated fields (copy from EmbeddingConfig for backward compatibility)
embedding_model=embedding_config.embedding_model,
embedding_endpoint_type=embedding_config.embedding_endpoint_type,
embedding_endpoint=embedding_config.embedding_endpoint,
embedding_chunk_size=embedding_config.embedding_chunk_size,
batch_size=embedding_config.batch_size,
azure_endpoint=embedding_config.azure_endpoint,
azure_version=embedding_config.azure_version,
azure_deployment=embedding_config.azure_deployment,
)
class ModelSettings(BaseModel):
"""Schema for defining settings for a model"""
# model: str = Field(..., description="The name of the model.")
max_output_tokens: int = Field(4096, description="The maximum number of tokens the model can generate.")
parallel_tool_calls: bool = Field(True, description="Whether to enable parallel tool calling.")
class OpenAIReasoning(BaseModel):
reasoning_effort: Literal["none", "minimal", "low", "medium", "high", "xhigh"] = Field(
"minimal", description="The reasoning effort to use when generating text reasoning models"
)
# TODO: implement support for this
# summary: Optional[Literal["auto", "detailed"]] = Field(
# None, description="The reasoning summary level to use when generating text reasoning models"
# )
class OpenAIModelSettings(ModelSettings):
provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
reasoning: OpenAIReasoning = Field(OpenAIReasoning(reasoning_effort="high"), description="The reasoning configuration for the model.")
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
# OpenAI supports strict mode for tool calling - defaults to True
strict: bool = Field(
True,
description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.",
)
# TODO: implement support for these
# reasoning_summary: Optional[Literal["none", "short", "detailed"]] = Field(
# None, description="The reasoning summary level to use when generating text reasoning models"
# )
# max_tool_calls: int = Field(10, description="The maximum number of tool calls the model can make.")
# parallel_tool_calls: bool = Field(False, description="Whether the model supports parallel tool calls.")
# top_logprobs: int = Field(10, description="The number of top logprobs to return.")
# top_p: float = Field(1.0, description="The top-p value to use when generating text.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"reasoning_effort": self.reasoning.reasoning_effort,
"response_format": self.response_format,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": self.strict,
}
# "thinking": {
# "type": "enabled",
# "budget_tokens": 10000
# }
class AnthropicThinking(BaseModel):
type: Literal["enabled", "disabled"] = Field("enabled", description="The type of thinking to use.")
budget_tokens: int = Field(1024, description="The maximum number of tokens the model can use for extended thinking.")
class AnthropicModelSettings(ModelSettings):
provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
temperature: float = Field(1.0, description="The temperature of the model.")
thinking: AnthropicThinking = Field(
AnthropicThinking(type="enabled", budget_tokens=1024), description="The thinking configuration for the model."
)
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
# gpt-5 models only
verbosity: Optional[Literal["low", "medium", "high"]] = Field(
None,
description="Soft control for how verbose model output should be, used for GPT-5 models.",
)
# Effort parameter for Opus 4.5, Opus 4.6, and Sonnet 4.6
effort: Optional[Literal["low", "medium", "high", "max"]] = Field(
None,
description="Effort level for supported Anthropic models (controls token spending). 'max' is only available on Opus 4.6. Not setting this gives similar performance to 'high'.",
)
# Anthropic supports strict mode for tool calling - defaults to False
strict: bool = Field(
False,
description="Enable strict mode for tool calling. When true, tool outputs are guaranteed to match JSON schemas.",
)
# TODO: implement support for these
# top_k: Optional[int] = Field(None, description="The number of top tokens to return.")
# top_p: Optional[float] = Field(None, description="The top-p value to use when generating text.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"extended_thinking": self.thinking.type == "enabled",
"max_reasoning_tokens": self.thinking.budget_tokens,
"verbosity": self.verbosity,
"parallel_tool_calls": self.parallel_tool_calls,
"effort": self.effort,
"response_format": self.response_format,
"strict": self.strict,
}
class GeminiThinkingConfig(BaseModel):
include_thoughts: bool = Field(True, description="Whether to include thoughts in the model's response.")
thinking_budget: int = Field(1024, description="The thinking budget for the model.")
class GoogleAIModelSettings(ModelSettings):
provider_type: Literal[ProviderType.google_ai] = Field(ProviderType.google_ai, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
thinking_config: GeminiThinkingConfig = Field(
GeminiThinkingConfig(include_thoughts=True, thinking_budget=1024), description="The thinking configuration for the model."
)
response_schema: Optional[ResponseFormatUnion] = Field(None, description="The response schema for the model.")
max_output_tokens: int = Field(65536, description="The maximum number of tokens the model can generate.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"max_reasoning_tokens": self.thinking_config.thinking_budget if self.thinking_config.include_thoughts else 0,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": False, # Google AI does not support strict mode
}
class GoogleVertexModelSettings(GoogleAIModelSettings):
provider_type: Literal[ProviderType.google_vertex] = Field(ProviderType.google_vertex, description="The type of the provider.")
class AzureModelSettings(ModelSettings):
"""Azure OpenAI model configuration (OpenAI-compatible)."""
provider_type: Literal[ProviderType.azure] = Field(ProviderType.azure, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"response_format": self.response_format,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": False, # Azure does not support strict mode
}
class XAIModelSettings(ModelSettings):
"""xAI model configuration (OpenAI-compatible)."""
provider_type: Literal[ProviderType.xai] = Field(ProviderType.xai, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"response_format": self.response_format,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": False, # xAI does not support strict mode
}
class ZAIThinking(BaseModel):
"""Thinking configuration for ZAI GLM-4.5+ models."""
type: Literal["enabled", "disabled"] = Field("enabled", description="Whether thinking is enabled or disabled.")
clear_thinking: bool = Field(False, description="If False, preserved thinking is used (recommended for agents).")
class ZAIModelSettings(ModelSettings):
"""Z.ai (ZhipuAI) model configuration (OpenAI-compatible)."""
provider_type: Literal[ProviderType.zai] = Field(ProviderType.zai, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
thinking: ZAIThinking = Field(
ZAIThinking(type="enabled", clear_thinking=False), description="The thinking configuration for GLM-4.5+ models."
)
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"response_format": self.response_format,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": False, # ZAI does not support strict mode
"extended_thinking": self.thinking.type == "enabled",
}
class GroqModelSettings(ModelSettings):
"""Groq model configuration (OpenAI-compatible)."""
provider_type: Literal[ProviderType.groq] = Field(ProviderType.groq, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"response_format": self.response_format,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": False, # Groq does not support strict mode
}
class DeepseekModelSettings(ModelSettings):
"""Deepseek model configuration (OpenAI-compatible)."""
provider_type: Literal[ProviderType.deepseek] = Field(ProviderType.deepseek, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"response_format": self.response_format,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": False, # Deepseek does not support strict mode
}
class TogetherModelSettings(ModelSettings):
"""Together AI model configuration (OpenAI-compatible)."""
provider_type: Literal[ProviderType.together] = Field(ProviderType.together, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"response_format": self.response_format,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": False, # Together does not support strict mode
}
class BedrockModelSettings(ModelSettings):
"""AWS Bedrock model configuration."""
provider_type: Literal[ProviderType.bedrock] = Field(ProviderType.bedrock, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"response_format": self.response_format,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": False, # Bedrock does not support strict mode
}
class OpenRouterModelSettings(ModelSettings):
"""OpenRouter model configuration (OpenAI-compatible)."""
provider_type: Literal[ProviderType.openrouter] = Field(ProviderType.openrouter, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
response_format: Optional[ResponseFormatUnion] = Field(None, description="The response format for the model.")
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"response_format": self.response_format,
"parallel_tool_calls": self.parallel_tool_calls,
"strict": False, # OpenRouter does not support strict mode
}
class ChatGPTOAuthReasoning(BaseModel):
"""Reasoning configuration for ChatGPT OAuth models (GPT-5.x, o-series)."""
reasoning_effort: Literal["none", "low", "medium", "high", "xhigh"] = Field(
"medium", description="The reasoning effort level for GPT-5.x and o-series models."
)
class ChatGPTOAuthModelSettings(ModelSettings):
"""ChatGPT OAuth model configuration (uses ChatGPT backend API)."""
provider_type: Literal[ProviderType.chatgpt_oauth] = Field(ProviderType.chatgpt_oauth, description="The type of the provider.")
temperature: float = Field(0.7, description="The temperature of the model.")
reasoning: ChatGPTOAuthReasoning = Field(
ChatGPTOAuthReasoning(reasoning_effort="medium"), description="The reasoning configuration for the model."
)
def _to_legacy_config_params(self) -> dict:
return {
"temperature": self.temperature,
"max_tokens": self.max_output_tokens,
"reasoning_effort": self.reasoning.reasoning_effort,
"parallel_tool_calls": self.parallel_tool_calls,
}
ModelSettingsUnion = Annotated[
Union[
OpenAIModelSettings,
AnthropicModelSettings,
GoogleAIModelSettings,
GoogleVertexModelSettings,
AzureModelSettings,
XAIModelSettings,
ZAIModelSettings,
GroqModelSettings,
DeepseekModelSettings,
TogetherModelSettings,
BedrockModelSettings,
OpenRouterModelSettings,
ChatGPTOAuthModelSettings,
],
Field(discriminator="provider_type"),
]