672 lines
30 KiB
Python
672 lines
30 KiB
Python
from typing import TYPE_CHECKING, Literal, Optional
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
|
|
from letta.constants import LETTA_MODEL_ENDPOINT
|
|
from letta.errors import LettaInvalidArgumentError
|
|
from letta.log import get_logger
|
|
from letta.schemas.enums import AgentType, ProviderCategory
|
|
from letta.schemas.response_format import ResponseFormatUnion
|
|
|
|
if TYPE_CHECKING:
|
|
from letta.schemas.model import ModelSettings
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class LLMConfig(BaseModel):
|
|
"""Configuration for Language Model (LLM) connection and generation parameters.
|
|
|
|
.. deprecated::
|
|
LLMConfig is deprecated and should not be used as an input or return type in API calls.
|
|
Use the schemas in letta.schemas.model (ModelSettings, OpenAIModelSettings, etc.) instead.
|
|
For conversion, use the _to_model() method or Model._from_llm_config() method.
|
|
"""
|
|
|
|
model: str = Field(..., description="LLM model name. ")
|
|
display_name: Optional[str] = Field(None, description="A human-friendly display name for the model.")
|
|
|
|
model_endpoint_type: Literal[
|
|
"openai",
|
|
"anthropic",
|
|
"google_ai",
|
|
"google_vertex",
|
|
"azure",
|
|
"groq",
|
|
"ollama",
|
|
"webui",
|
|
"webui-legacy",
|
|
"lmstudio",
|
|
"lmstudio-legacy",
|
|
"lmstudio-chatcompletions",
|
|
"llamacpp",
|
|
"koboldcpp",
|
|
"vllm",
|
|
"hugging-face",
|
|
"minimax",
|
|
"mistral",
|
|
"together", # completions endpoint
|
|
"bedrock",
|
|
"deepseek",
|
|
"xai",
|
|
"zai",
|
|
"openrouter",
|
|
"chatgpt_oauth",
|
|
] = Field(..., description="The endpoint type for the model.")
|
|
model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
|
|
provider_name: Optional[str] = Field(None, description="The provider name for the model.")
|
|
provider_category: Optional[ProviderCategory] = Field(None, description="The provider category for the model.")
|
|
model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
|
|
context_window: int = Field(..., description="The context window size for the model.")
|
|
put_inner_thoughts_in_kwargs: Optional[bool] = Field(
|
|
False,
|
|
description="Puts 'inner_thoughts' as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.",
|
|
)
|
|
handle: Optional[str] = Field(None, description="The handle for this config, in the format provider/model-name.")
|
|
temperature: float = Field(
|
|
1.0,
|
|
description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
|
|
)
|
|
max_tokens: Optional[int] = Field(
|
|
None,
|
|
description="The maximum number of tokens to generate. If not set, the model will use its default value.",
|
|
)
|
|
enable_reasoner: bool = Field(
|
|
True, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
|
|
)
|
|
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] = Field(
|
|
None,
|
|
description="The reasoning effort to use when generating text reasoning models",
|
|
)
|
|
max_reasoning_tokens: int = Field(
|
|
0,
|
|
description="Configurable thinking budget for extended thinking. Used for enable_reasoner and also for Google Vertex models like Gemini 2.5 Flash. Minimum value is 1024 when used with enable_reasoner.",
|
|
)
|
|
effort: Optional[Literal["low", "medium", "high", "max"]] = Field(
|
|
None,
|
|
description="The effort level for Anthropic models that support it (Opus 4.5, Opus 4.6). Controls token spending and thinking behavior. Not setting this gives similar performance to 'high'.",
|
|
)
|
|
frequency_penalty: Optional[float] = Field(
|
|
None, # Can also deafult to 0.0?
|
|
description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.",
|
|
)
|
|
compatibility_type: Optional[Literal["gguf", "mlx"]] = Field(None, description="The framework compatibility type for the model.")
|
|
verbosity: Optional[Literal["low", "medium", "high"]] = Field(
|
|
None,
|
|
description="Soft control for how verbose model output should be, used for GPT-5 models.",
|
|
)
|
|
tier: Optional[str] = Field(None, description="The cost tier for the model (cloud only).")
|
|
|
|
# FIXME hack to silence pydantic protected namespace warning
|
|
model_config = ConfigDict(protected_namespaces=())
|
|
parallel_tool_calls: Optional[bool] = Field(
|
|
False,
|
|
description="Deprecated: Use model_settings to configure parallel tool calls instead. If set to True, enables parallel tool calling. Defaults to False.",
|
|
deprecated=True,
|
|
)
|
|
response_format: Optional[ResponseFormatUnion] = Field(
|
|
None,
|
|
description="The response format for the model's output. Supports text, json_object, and json_schema (structured outputs). Can be set via model_settings.",
|
|
)
|
|
strict: bool = Field(
|
|
False,
|
|
description="Enable strict mode for tool calling. When true, tool schemas include strict: true and additionalProperties: false, guaranteeing tool outputs match JSON schemas.",
|
|
)
|
|
return_logprobs: bool = Field(
|
|
False,
|
|
description="Whether to return log probabilities of the output tokens. Useful for RL training.",
|
|
)
|
|
top_logprobs: Optional[int] = Field(
|
|
None,
|
|
description="Number of most likely tokens to return at each position (0-20). Requires return_logprobs=True.",
|
|
)
|
|
return_token_ids: bool = Field(
|
|
False,
|
|
description="Whether to return token IDs for all LLM generations via SGLang native endpoint. "
|
|
"Required for multi-turn RL training with loss masking. Only works with SGLang provider.",
|
|
)
|
|
|
|
@model_validator(mode="before")
|
|
@classmethod
|
|
def set_model_specific_defaults(cls, values):
|
|
"""
|
|
Set model-specific default values for fields like max_tokens, context_window, etc.
|
|
This ensures the same defaults from default_config are applied automatically.
|
|
"""
|
|
model = values.get("model")
|
|
if model is None:
|
|
return values
|
|
|
|
# Set max_tokens defaults based on model
|
|
if values.get("max_tokens") is None:
|
|
if model.startswith("gpt-5"): # Covers both gpt-5 and gpt-5.1
|
|
values["max_tokens"] = 16384
|
|
elif model == "gpt-4.1":
|
|
values["max_tokens"] = 8192
|
|
# For other models, the field default of 4096 will be used
|
|
|
|
# Set context_window defaults if not provided
|
|
if values.get("context_window") is None:
|
|
if model.startswith("gpt-5"): # Covers both gpt-5 and gpt-5.1
|
|
values["context_window"] = 272000
|
|
elif model == "gpt-4.1":
|
|
values["context_window"] = 256000
|
|
elif model == "gpt-4o" or model == "gpt-4o-mini":
|
|
values["context_window"] = 128000
|
|
elif model == "gpt-4":
|
|
values["context_window"] = 8192
|
|
|
|
# Set verbosity defaults for GPT-5 models
|
|
if model.startswith("gpt-5") and values.get("verbosity") is None:
|
|
values["verbosity"] = "medium"
|
|
|
|
return values
|
|
|
|
@model_validator(mode="before")
|
|
@classmethod
|
|
def set_default_enable_reasoner(cls, values):
|
|
# NOTE: this is really only applicable for models that can toggle reasoning on-and-off, like 3.7
|
|
# We can also use this field to identify if a model is a "reasoning" model (o1/o3, etc.) if we want
|
|
# if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]):
|
|
# values["enable_reasoner"] = True
|
|
# values["put_inner_thoughts_in_kwargs"] = False
|
|
return values
|
|
|
|
@model_validator(mode="before")
|
|
@classmethod
|
|
def set_default_put_inner_thoughts(cls, values):
|
|
"""
|
|
Dynamically set the default for put_inner_thoughts_in_kwargs based on the model field,
|
|
falling back to True if no specific rule is defined.
|
|
"""
|
|
model = values.get("model")
|
|
|
|
if model is None:
|
|
return values
|
|
|
|
# Default put_inner_thoughts_in_kwargs to False for all models
|
|
# Reasoning models (o1, o3, o4, claude-sonnet-4, etc.) will have this set to False below
|
|
# Non-reasoner models should also default to False to avoid unwanted reasoning token generation
|
|
if values.get("put_inner_thoughts_in_kwargs") is None:
|
|
values["put_inner_thoughts_in_kwargs"] = False
|
|
|
|
# For the o1/o3 series from OpenAI, set to False by default
|
|
# We can set this flag to `true` if desired, which will enable "double-think"
|
|
from letta.llm_api.openai_client import is_openai_reasoning_model
|
|
|
|
if is_openai_reasoning_model(model):
|
|
values["put_inner_thoughts_in_kwargs"] = False
|
|
|
|
if values.get("model_endpoint_type") in ("anthropic", "bedrock") and (
|
|
model.startswith("claude-3-7-sonnet")
|
|
or model.startswith("claude-sonnet-4")
|
|
or model.startswith("claude-opus-4")
|
|
or model.startswith("claude-haiku-4-5")
|
|
or model.startswith("claude-opus-4-5")
|
|
or model.startswith("claude-opus-4-6")
|
|
):
|
|
values["put_inner_thoughts_in_kwargs"] = False
|
|
|
|
return values
|
|
|
|
@model_validator(mode="before")
|
|
@classmethod
|
|
def validate_codex_reasoning_effort(cls, values):
|
|
"""
|
|
Validate that gpt-5-codex models do not use 'minimal' reasoning effort.
|
|
Codex models require at least 'low' reasoning effort.
|
|
"""
|
|
from letta.llm_api.openai_client import does_not_support_minimal_reasoning
|
|
|
|
model = values.get("model")
|
|
reasoning_effort = values.get("reasoning_effort")
|
|
|
|
if model and does_not_support_minimal_reasoning(model) and reasoning_effort == "minimal":
|
|
raise LettaInvalidArgumentError(
|
|
f"Model '{model}' does not support 'minimal' reasoning effort. Please use 'low', 'medium', or 'high' instead."
|
|
)
|
|
return values
|
|
|
|
@classmethod
|
|
def default_config(cls, model_name: str):
|
|
"""
|
|
Convenience function to generate a default `LLMConfig` from a model name. Only some models are supported in this function.
|
|
|
|
Args:
|
|
model_name (str): The name of the model (gpt-4, gpt-4o-mini, letta).
|
|
"""
|
|
if model_name == "gpt-4":
|
|
return cls(
|
|
model="gpt-4",
|
|
model_endpoint_type="openai",
|
|
model_endpoint="https://api.openai.com/v1",
|
|
model_wrapper=None,
|
|
context_window=8192,
|
|
put_inner_thoughts_in_kwargs=True,
|
|
)
|
|
elif model_name == "gpt-4o-mini":
|
|
return cls(
|
|
model="gpt-4o-mini",
|
|
model_endpoint_type="openai",
|
|
model_endpoint="https://api.openai.com/v1",
|
|
model_wrapper=None,
|
|
context_window=128000,
|
|
)
|
|
elif model_name == "gpt-4o":
|
|
return cls(
|
|
model="gpt-4o",
|
|
model_endpoint_type="openai",
|
|
model_endpoint="https://api.openai.com/v1",
|
|
model_wrapper=None,
|
|
context_window=128000,
|
|
)
|
|
elif model_name == "gpt-4.1":
|
|
return cls(
|
|
model="gpt-4.1",
|
|
model_endpoint_type="openai",
|
|
model_endpoint="https://api.openai.com/v1",
|
|
model_wrapper=None,
|
|
context_window=256000,
|
|
max_tokens=8192,
|
|
)
|
|
elif model_name == "gpt-5":
|
|
return cls(
|
|
model="gpt-5",
|
|
model_endpoint_type="openai",
|
|
model_endpoint="https://api.openai.com/v1",
|
|
model_wrapper=None,
|
|
context_window=272000,
|
|
reasoning_effort="minimal",
|
|
verbosity="medium",
|
|
max_tokens=16384,
|
|
)
|
|
elif model_name == "gpt-5.1":
|
|
return cls(
|
|
model="gpt-5.1",
|
|
model_endpoint_type="openai",
|
|
model_endpoint="https://api.openai.com/v1",
|
|
model_wrapper=None,
|
|
context_window=272000, # Same as GPT-5
|
|
reasoning_effort="none", # Default to "none" for GPT-5.1
|
|
verbosity="medium",
|
|
max_tokens=16384,
|
|
)
|
|
elif model_name == "gpt-5.2":
|
|
return cls(
|
|
model="gpt-5.2",
|
|
model_endpoint_type="openai",
|
|
model_endpoint="https://api.openai.com/v1",
|
|
model_wrapper=None,
|
|
context_window=272000,
|
|
reasoning_effort="none", # Default to "none" for GPT-5.2
|
|
verbosity="medium",
|
|
max_tokens=16384,
|
|
)
|
|
elif model_name == "letta":
|
|
return cls(
|
|
model="memgpt-openai",
|
|
model_endpoint_type="openai",
|
|
model_endpoint=LETTA_MODEL_ENDPOINT,
|
|
context_window=30000,
|
|
)
|
|
else:
|
|
raise ValueError(f"Model {model_name} not supported.")
|
|
|
|
def pretty_print(self) -> str:
|
|
return (
|
|
f"{self.model}"
|
|
+ (f" [type={self.model_endpoint_type}]" if self.model_endpoint_type else "")
|
|
+ (f" [ip={self.model_endpoint}]" if self.model_endpoint else "")
|
|
)
|
|
|
|
def _to_model_settings(self) -> "ModelSettings":
|
|
"""
|
|
Convert LLMConfig back into a Model schema (OpenAIModelSettings, AnthropicModelSettings, etc.).
|
|
This is the inverse of the _to_legacy_config_params() methods in model.py.
|
|
"""
|
|
from letta.schemas.model import (
|
|
AnthropicModelSettings,
|
|
AnthropicThinking,
|
|
AzureModelSettings,
|
|
BedrockModelSettings,
|
|
ChatGPTOAuthModelSettings,
|
|
ChatGPTOAuthReasoning,
|
|
DeepseekModelSettings,
|
|
GeminiThinkingConfig,
|
|
GoogleAIModelSettings,
|
|
GoogleVertexModelSettings,
|
|
GroqModelSettings,
|
|
ModelSettings,
|
|
OpenAIModelSettings,
|
|
OpenAIReasoning,
|
|
OpenRouterModelSettings,
|
|
TogetherModelSettings,
|
|
XAIModelSettings,
|
|
ZAIModelSettings,
|
|
)
|
|
|
|
if self.model_endpoint_type == "openai":
|
|
return OpenAIModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
reasoning=OpenAIReasoning(reasoning_effort=self.reasoning_effort or "minimal"),
|
|
strict=self.strict,
|
|
)
|
|
elif self.model_endpoint_type == "anthropic":
|
|
thinking_type = "enabled" if self.enable_reasoner else "disabled"
|
|
return AnthropicModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024),
|
|
verbosity=self.verbosity,
|
|
strict=self.strict,
|
|
)
|
|
elif self.model_endpoint_type == "google_ai":
|
|
return GoogleAIModelSettings(
|
|
max_output_tokens=self.max_tokens or 65536,
|
|
temperature=self.temperature,
|
|
thinking_config=GeminiThinkingConfig(
|
|
include_thoughts=self.max_reasoning_tokens > 0, thinking_budget=self.max_reasoning_tokens or 1024
|
|
),
|
|
)
|
|
elif self.model_endpoint_type == "google_vertex":
|
|
return GoogleVertexModelSettings(
|
|
max_output_tokens=self.max_tokens or 65536,
|
|
temperature=self.temperature,
|
|
thinking_config=GeminiThinkingConfig(
|
|
include_thoughts=self.max_reasoning_tokens > 0, thinking_budget=self.max_reasoning_tokens or 1024
|
|
),
|
|
)
|
|
elif self.model_endpoint_type == "azure":
|
|
return AzureModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
)
|
|
elif self.model_endpoint_type == "xai":
|
|
return XAIModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
)
|
|
elif self.model_endpoint_type == "zai":
|
|
from letta.schemas.model import ZAIThinking
|
|
|
|
thinking_type = "enabled" if self.enable_reasoner else "disabled"
|
|
return ZAIModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
thinking=ZAIThinking(type=thinking_type, clear_thinking=False),
|
|
)
|
|
elif self.model_endpoint_type == "groq":
|
|
return GroqModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
)
|
|
elif self.model_endpoint_type == "deepseek":
|
|
return DeepseekModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
)
|
|
elif self.model_endpoint_type == "together":
|
|
return TogetherModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
)
|
|
elif self.model_endpoint_type == "bedrock":
|
|
return BedrockModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
)
|
|
elif self.model_endpoint_type == "openrouter":
|
|
return OpenRouterModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
)
|
|
elif self.model_endpoint_type == "chatgpt_oauth":
|
|
return ChatGPTOAuthModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
reasoning=ChatGPTOAuthReasoning(reasoning_effort=self.reasoning_effort or "medium"),
|
|
)
|
|
elif self.model_endpoint_type == "minimax":
|
|
# MiniMax uses Anthropic-compatible API
|
|
thinking_type = "enabled" if self.enable_reasoner else "disabled"
|
|
return AnthropicModelSettings(
|
|
max_output_tokens=self.max_tokens or 4096,
|
|
temperature=self.temperature,
|
|
thinking=AnthropicThinking(type=thinking_type, budget_tokens=self.max_reasoning_tokens or 1024),
|
|
verbosity=self.verbosity,
|
|
strict=self.strict,
|
|
)
|
|
else:
|
|
# If we don't know the model type, use the base ModelSettings schema
|
|
return ModelSettings(max_output_tokens=self.max_tokens or 4096)
|
|
|
|
@classmethod
|
|
def is_openai_reasoning_model(cls, config: "LLMConfig") -> bool:
|
|
from letta.llm_api.openai_client import is_openai_reasoning_model
|
|
|
|
return config.model_endpoint_type == "openai" and is_openai_reasoning_model(config.model)
|
|
|
|
@classmethod
|
|
def is_anthropic_reasoning_model(cls, config: "LLMConfig") -> bool:
|
|
return config.model_endpoint_type in ("anthropic", "bedrock") and (
|
|
config.model.startswith("claude-opus-4")
|
|
or config.model.startswith("claude-sonnet-4")
|
|
or config.model.startswith("claude-3-7-sonnet")
|
|
or config.model.startswith("claude-haiku-4-5")
|
|
or config.model.startswith("claude-opus-4-5")
|
|
or config.model.startswith("claude-opus-4-6")
|
|
)
|
|
|
|
@classmethod
|
|
def is_google_vertex_reasoning_model(cls, config: "LLMConfig") -> bool:
|
|
return config.model_endpoint_type == "google_vertex" and (
|
|
config.model.startswith("gemini-2.5-flash") or config.model.startswith("gemini-2.5-pro")
|
|
)
|
|
|
|
@classmethod
|
|
def is_google_ai_reasoning_model(cls, config: "LLMConfig") -> bool:
|
|
return config.model_endpoint_type == "google_ai" and (
|
|
config.model.startswith("gemini-2.5-flash") or config.model.startswith("gemini-2.5-pro")
|
|
)
|
|
|
|
@classmethod
|
|
def is_zai_reasoning_model(cls, config: "LLMConfig") -> bool:
|
|
return config.model_endpoint_type == "zai" and (
|
|
config.model.startswith("glm-4.5")
|
|
or config.model.startswith("glm-4.6")
|
|
or config.model.startswith("glm-4.7")
|
|
or config.model.startswith("glm-5")
|
|
)
|
|
|
|
@classmethod
|
|
def is_openrouter_reasoning_model(cls, config: "LLMConfig") -> bool:
|
|
"""Check if this is an OpenRouter model that supports reasoning.
|
|
|
|
OpenRouter model names include provider prefix, e.g.:
|
|
- anthropic/claude-sonnet-4
|
|
- openai/o3-mini
|
|
- moonshotai/kimi-k2-thinking
|
|
- deepseek/deepseek-r1
|
|
"""
|
|
if config.model_endpoint_type != "openrouter":
|
|
return False
|
|
model = config.model.lower()
|
|
# OpenAI reasoning models
|
|
if "/o1" in model or "/o3" in model or "/o4" in model or "/gpt-5" in model:
|
|
return True
|
|
# Anthropic Claude reasoning models
|
|
if "claude-3-7-sonnet" in model or "claude-sonnet-4" in model or "claude-opus-4" in model or "claude-haiku-4" in model:
|
|
return True
|
|
# Google Gemini reasoning models
|
|
if "gemini" in model:
|
|
return True
|
|
# ZAI GLM reasoning models
|
|
if "glm-4.5" in model or "glm-4.6" in model or "glm-4.7" in model or "glm-5" in model:
|
|
return True
|
|
# DeepSeek reasoning models
|
|
if "deepseek-r1" in model or "deepseek-reasoner" in model:
|
|
return True
|
|
# Moonshot Kimi reasoning models
|
|
if "kimi" in model:
|
|
return True
|
|
return False
|
|
|
|
@classmethod
|
|
def supports_verbosity(cls, config: "LLMConfig") -> bool:
|
|
"""Check if the model supports verbosity control."""
|
|
return config.model_endpoint_type == "openai" and config.model.startswith("gpt-5")
|
|
|
|
@classmethod
|
|
def apply_reasoning_setting_to_config(cls, config: "LLMConfig", reasoning: bool, agent_type: Optional["AgentType"] = None):
|
|
"""
|
|
Normalize reasoning-related flags on the config based on the requested
|
|
"reasoning" setting, model capabilities, and optionally the agent type.
|
|
|
|
For AgentType.letta_v1_agent, we enforce stricter semantics:
|
|
- OpenAI native reasoning (o1/o3/o4/gpt-5): force enabled (non-togglable)
|
|
- Anthropic (claude 3.7 / 4): toggle honored (default on elsewhere)
|
|
- Google Gemini (2.5 family): force disabled until native reasoning supported
|
|
- All others: disabled (no simulated reasoning via kwargs)
|
|
"""
|
|
from letta.llm_api.openai_client import does_not_support_minimal_reasoning, supports_none_reasoning_effort
|
|
|
|
# V1 agent policy: do not allow simulated reasoning for non-native models
|
|
if agent_type is not None and agent_type == AgentType.letta_v1_agent:
|
|
# OpenAI native reasoning models: always on
|
|
if cls.is_openai_reasoning_model(config):
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
config.enable_reasoner = True
|
|
if config.reasoning_effort is None:
|
|
# GPT-5.1 models default to "none" reasoning effort (their unique feature)
|
|
if supports_none_reasoning_effort(config.model):
|
|
config.reasoning_effort = "none" # Always default to "none" for GPT-5.1
|
|
# Codex models cannot use "minimal" reasoning effort
|
|
elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
|
|
config.reasoning_effort = "minimal"
|
|
else:
|
|
config.reasoning_effort = "medium"
|
|
if config.model.startswith("gpt-5") and config.verbosity is None:
|
|
config.verbosity = "medium"
|
|
return config
|
|
|
|
# Anthropic 3.7/4 and Gemini: toggle honored
|
|
is_google_reasoner_with_configurable_thinking = (
|
|
(cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config))
|
|
and not config.model.startswith("gemini-2.5-pro")
|
|
and not config.model.startswith("gemini-3")
|
|
)
|
|
if cls.is_anthropic_reasoning_model(config) or is_google_reasoner_with_configurable_thinking:
|
|
config.enable_reasoner = bool(reasoning)
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
if config.enable_reasoner and config.max_reasoning_tokens == 0:
|
|
config.max_reasoning_tokens = 1024
|
|
# Set default effort level for Claude Opus 4.5 and Opus 4.6
|
|
if (
|
|
config.model.startswith("claude-opus-4-5")
|
|
or config.model.startswith("claude-opus-4-6")
|
|
or config.model.startswith("claude-sonnet-4-6")
|
|
) and config.effort is None:
|
|
config.effort = "medium"
|
|
return config
|
|
|
|
# ZAI GLM-4.5+ models: toggle honored (similar to Anthropic)
|
|
if cls.is_zai_reasoning_model(config):
|
|
config.enable_reasoner = bool(reasoning)
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
return config
|
|
|
|
# OpenRouter reasoning models: toggle honored
|
|
if cls.is_openrouter_reasoning_model(config):
|
|
config.enable_reasoner = bool(reasoning)
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
return config
|
|
|
|
# Google Gemini 2.5 Pro and Gemini 3: not possible to disable
|
|
if config.model.startswith("gemini-2.5-pro") or config.model.startswith("gemini-3"):
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
config.enable_reasoner = True
|
|
if config.max_reasoning_tokens == 0:
|
|
config.max_reasoning_tokens = 1024
|
|
return config
|
|
|
|
# Everything else: disabled (no inner_thoughts-in-kwargs simulation)
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
config.enable_reasoner = False
|
|
config.max_reasoning_tokens = 0
|
|
return config
|
|
|
|
if not reasoning:
|
|
if cls.is_openai_reasoning_model(config):
|
|
# GPT-5.1 models can actually disable reasoning using "none" effort
|
|
if supports_none_reasoning_effort(config.model):
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
config.enable_reasoner = True
|
|
config.reasoning_effort = "none"
|
|
else:
|
|
logger.warning("Reasoning cannot be disabled for OpenAI o1/o3/gpt-5 models")
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
config.enable_reasoner = True
|
|
if config.reasoning_effort is None:
|
|
# GPT-5 models default to minimal, others to medium
|
|
# Codex models cannot use "minimal" reasoning effort
|
|
if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
|
|
config.reasoning_effort = "minimal"
|
|
else:
|
|
config.reasoning_effort = "medium"
|
|
# Set verbosity for GPT-5 models
|
|
if config.model.startswith("gpt-5") and config.verbosity is None:
|
|
config.verbosity = "medium"
|
|
elif config.model.startswith("gemini-2.5-pro") or config.model.startswith("gemini-3"):
|
|
logger.warning(f"Reasoning cannot be disabled for {config.model} model")
|
|
# Handle as non-reasoner until we support summary
|
|
config.put_inner_thoughts_in_kwargs = True
|
|
config.enable_reasoner = True
|
|
if config.max_reasoning_tokens == 0:
|
|
config.max_reasoning_tokens = 1024
|
|
else:
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
config.enable_reasoner = False
|
|
|
|
else:
|
|
config.enable_reasoner = True
|
|
if cls.is_anthropic_reasoning_model(config):
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
if config.max_reasoning_tokens == 0:
|
|
config.max_reasoning_tokens = 1024
|
|
# Set default effort level for Claude Opus 4.5 and Opus 4.6
|
|
if (
|
|
config.model.startswith("claude-opus-4-5")
|
|
or config.model.startswith("claude-opus-4-6")
|
|
or config.model.startswith("claude-sonnet-4-6")
|
|
) and config.effort is None:
|
|
config.effort = "medium"
|
|
elif cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config):
|
|
# Handle as non-reasoner until we support summary
|
|
config.put_inner_thoughts_in_kwargs = True
|
|
if config.max_reasoning_tokens == 0:
|
|
config.max_reasoning_tokens = 1024
|
|
elif cls.is_zai_reasoning_model(config):
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
elif cls.is_openrouter_reasoning_model(config):
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
elif cls.is_openai_reasoning_model(config):
|
|
config.put_inner_thoughts_in_kwargs = False
|
|
if config.reasoning_effort is None:
|
|
# GPT-5.1 models default to "none" even when reasoning is enabled
|
|
if supports_none_reasoning_effort(config.model):
|
|
config.reasoning_effort = "none" # Default to "none" for GPT-5.1
|
|
# GPT-5 models default to minimal, others to medium
|
|
# Codex models cannot use "minimal" reasoning effort
|
|
elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
|
|
config.reasoning_effort = "minimal"
|
|
else:
|
|
config.reasoning_effort = "medium"
|
|
# Set verbosity for GPT-5 models
|
|
if config.model.startswith("gpt-5") and config.verbosity is None:
|
|
config.verbosity = "medium"
|
|
else:
|
|
config.put_inner_thoughts_in_kwargs = True
|
|
|
|
return config
|