feat: enable gpt5.1 models [LET-6178] (#6175)
* hack at gpt51 * revert package lock * first hack * default context window --------- Co-authored-by: Ari Webb <ari@letta.com>
This commit is contained in:
@@ -17020,7 +17020,13 @@
|
||||
},
|
||||
"reasoning_effort": {
|
||||
"type": "string",
|
||||
"enum": ["minimal", "low", "medium", "high"],
|
||||
"enum": [
|
||||
"none",
|
||||
"minimal",
|
||||
"low",
|
||||
"medium",
|
||||
"high"
|
||||
],
|
||||
"nullable": true
|
||||
},
|
||||
"per_file_view_window_char_limit": {
|
||||
@@ -28619,7 +28625,7 @@
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["minimal", "low", "medium", "high"]
|
||||
"enum": ["none", "minimal", "low", "medium", "high"]
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
@@ -30729,7 +30735,7 @@
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["minimal", "low", "medium", "high"]
|
||||
"enum": ["none", "minimal", "low", "medium", "high"]
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
@@ -31000,7 +31006,7 @@
|
||||
"properties": {
|
||||
"reasoning_effort": {
|
||||
"type": "string",
|
||||
"enum": ["minimal", "low", "medium", "high"],
|
||||
"enum": ["none", "minimal", "low", "medium", "high"],
|
||||
"title": "Reasoning Effort",
|
||||
"description": "The reasoning effort to use when generating text reasoning models",
|
||||
"default": "minimal"
|
||||
|
||||
@@ -239,6 +239,11 @@ LLM_MAX_TOKENS = {
|
||||
"gpt-5-nano": 272000,
|
||||
"gpt-5-nano-2025-08-07": 272000,
|
||||
"gpt-5-codex": 272000,
|
||||
# gpt-5.1
|
||||
"gpt-5.1": 272000,
|
||||
"gpt-5.1-2025-11-13": 272000,
|
||||
"gpt-5.1-codex": 272000,
|
||||
"gpt-5.1-codex-mini": 272000,
|
||||
# reasoners
|
||||
"o1": 200000,
|
||||
# "o1-pro": 200000, # responses API only
|
||||
|
||||
@@ -72,6 +72,14 @@ def does_not_support_minimal_reasoning(model: str) -> bool:
|
||||
return "codex" in model.lower()
|
||||
|
||||
|
||||
def supports_none_reasoning_effort(model: str) -> bool:
|
||||
"""Check if the model supports 'none' reasoning effort.
|
||||
|
||||
Currently, only GPT-5.1 models support the 'none' reasoning effort level.
|
||||
"""
|
||||
return model.startswith("gpt-5.1")
|
||||
|
||||
|
||||
def is_openai_5_model(model: str) -> bool:
|
||||
"""Utility function to check if the model is a '5' model"""
|
||||
return model.startswith("gpt-5")
|
||||
@@ -337,7 +345,8 @@ class OpenAIClient(LLMClientBase):
|
||||
data.text = ResponseTextConfigParam(verbosity=llm_config.verbosity)
|
||||
|
||||
# Add reasoning effort control for reasoning models
|
||||
if is_openai_reasoning_model(model) and llm_config.reasoning_effort:
|
||||
# Only set reasoning if effort is not "none" (GPT-5.1 uses "none" to disable reasoning)
|
||||
if is_openai_reasoning_model(model) and llm_config.reasoning_effort and llm_config.reasoning_effort != "none":
|
||||
# data.reasoning_effort = llm_config.reasoning_effort
|
||||
data.reasoning = Reasoning(
|
||||
effort=llm_config.reasoning_effort,
|
||||
@@ -481,7 +490,8 @@ class OpenAIClient(LLMClientBase):
|
||||
data.verbosity = llm_config.verbosity
|
||||
|
||||
# Add reasoning effort control for reasoning models
|
||||
if is_openai_reasoning_model(model) and llm_config.reasoning_effort:
|
||||
# Only set reasoning_effort if it's not "none" (GPT-5.1 uses "none" to disable reasoning)
|
||||
if is_openai_reasoning_model(model) and llm_config.reasoning_effort and llm_config.reasoning_effort != "none":
|
||||
data.reasoning_effort = llm_config.reasoning_effort
|
||||
|
||||
if llm_config.frequency_penalty is not None:
|
||||
|
||||
@@ -69,7 +69,7 @@ class LLMConfig(BaseModel):
|
||||
enable_reasoner: bool = Field(
|
||||
True, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
|
||||
)
|
||||
reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
|
||||
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = Field(
|
||||
None,
|
||||
description="The reasoning effort to use when generating text reasoning models",
|
||||
)
|
||||
@@ -105,7 +105,7 @@ class LLMConfig(BaseModel):
|
||||
|
||||
# Set max_tokens defaults based on model
|
||||
if values.get("max_tokens") is None:
|
||||
if model == "gpt-5":
|
||||
if model.startswith("gpt-5"): # Covers both gpt-5 and gpt-5.1
|
||||
values["max_tokens"] = 16384
|
||||
elif model == "gpt-4.1":
|
||||
values["max_tokens"] = 8192
|
||||
@@ -113,8 +113,8 @@ class LLMConfig(BaseModel):
|
||||
|
||||
# Set context_window defaults if not provided
|
||||
if values.get("context_window") is None:
|
||||
if model == "gpt-5":
|
||||
values["context_window"] = 128000
|
||||
if model.startswith("gpt-5"): # Covers both gpt-5 and gpt-5.1
|
||||
values["context_window"] = 272000
|
||||
elif model == "gpt-4.1":
|
||||
values["context_window"] = 256000
|
||||
elif model == "gpt-4o" or model == "gpt-4o-mini":
|
||||
@@ -123,7 +123,7 @@ class LLMConfig(BaseModel):
|
||||
values["context_window"] = 8192
|
||||
|
||||
# Set verbosity defaults for GPT-5 models
|
||||
if model == "gpt-5" and values.get("verbosity") is None:
|
||||
if model.startswith("gpt-5") and values.get("verbosity") is None:
|
||||
values["verbosity"] = "medium"
|
||||
|
||||
return values
|
||||
@@ -239,11 +239,22 @@ class LLMConfig(BaseModel):
|
||||
model_endpoint_type="openai",
|
||||
model_endpoint="https://api.openai.com/v1",
|
||||
model_wrapper=None,
|
||||
context_window=128000,
|
||||
context_window=272000,
|
||||
reasoning_effort="minimal",
|
||||
verbosity="medium",
|
||||
max_tokens=16384,
|
||||
)
|
||||
elif model_name == "gpt-5.1":
|
||||
return cls(
|
||||
model="gpt-5.1",
|
||||
model_endpoint_type="openai",
|
||||
model_endpoint="https://api.openai.com/v1",
|
||||
model_wrapper=None,
|
||||
context_window=272000, # Same as GPT-5
|
||||
reasoning_effort="none", # Default to "none" for GPT-5.1
|
||||
verbosity="medium",
|
||||
max_tokens=16384,
|
||||
)
|
||||
elif model_name == "letta":
|
||||
return cls(
|
||||
model="memgpt-openai",
|
||||
@@ -388,7 +399,7 @@ class LLMConfig(BaseModel):
|
||||
- Google Gemini (2.5 family): force disabled until native reasoning supported
|
||||
- All others: disabled (no simulated reasoning via kwargs)
|
||||
"""
|
||||
from letta.llm_api.openai_client import does_not_support_minimal_reasoning
|
||||
from letta.llm_api.openai_client import does_not_support_minimal_reasoning, supports_none_reasoning_effort
|
||||
|
||||
# V1 agent policy: do not allow simulated reasoning for non-native models
|
||||
if agent_type is not None and agent_type == AgentType.letta_v1_agent:
|
||||
@@ -397,8 +408,11 @@ class LLMConfig(BaseModel):
|
||||
config.put_inner_thoughts_in_kwargs = False
|
||||
config.enable_reasoner = True
|
||||
if config.reasoning_effort is None:
|
||||
# GPT-5.1 models default to "none" reasoning effort (their unique feature)
|
||||
if supports_none_reasoning_effort(config.model):
|
||||
config.reasoning_effort = "none" # Always default to "none" for GPT-5.1
|
||||
# Codex models cannot use "minimal" reasoning effort
|
||||
if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
|
||||
elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
|
||||
config.reasoning_effort = "minimal"
|
||||
else:
|
||||
config.reasoning_effort = "medium"
|
||||
@@ -433,16 +447,22 @@ class LLMConfig(BaseModel):
|
||||
|
||||
if not reasoning:
|
||||
if cls.is_openai_reasoning_model(config):
|
||||
logger.warning("Reasoning cannot be disabled for OpenAI o1/o3/gpt-5 models")
|
||||
config.put_inner_thoughts_in_kwargs = False
|
||||
config.enable_reasoner = True
|
||||
if config.reasoning_effort is None:
|
||||
# GPT-5 models default to minimal, others to medium
|
||||
# Codex models cannot use "minimal" reasoning effort
|
||||
if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
|
||||
config.reasoning_effort = "minimal"
|
||||
else:
|
||||
config.reasoning_effort = "medium"
|
||||
# GPT-5.1 models can actually disable reasoning using "none" effort
|
||||
if supports_none_reasoning_effort(config.model):
|
||||
config.put_inner_thoughts_in_kwargs = False
|
||||
config.enable_reasoner = True
|
||||
config.reasoning_effort = "none"
|
||||
else:
|
||||
logger.warning("Reasoning cannot be disabled for OpenAI o1/o3/gpt-5 models")
|
||||
config.put_inner_thoughts_in_kwargs = False
|
||||
config.enable_reasoner = True
|
||||
if config.reasoning_effort is None:
|
||||
# GPT-5 models default to minimal, others to medium
|
||||
# Codex models cannot use "minimal" reasoning effort
|
||||
if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
|
||||
config.reasoning_effort = "minimal"
|
||||
else:
|
||||
config.reasoning_effort = "medium"
|
||||
# Set verbosity for GPT-5 models
|
||||
if config.model.startswith("gpt-5") and config.verbosity is None:
|
||||
config.verbosity = "medium"
|
||||
@@ -471,9 +491,12 @@ class LLMConfig(BaseModel):
|
||||
elif cls.is_openai_reasoning_model(config):
|
||||
config.put_inner_thoughts_in_kwargs = False
|
||||
if config.reasoning_effort is None:
|
||||
# GPT-5.1 models default to "none" even when reasoning is enabled
|
||||
if supports_none_reasoning_effort(config.model):
|
||||
config.reasoning_effort = "none" # Default to "none" for GPT-5.1
|
||||
# GPT-5 models default to minimal, others to medium
|
||||
# Codex models cannot use "minimal" reasoning effort
|
||||
if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
|
||||
elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
|
||||
config.reasoning_effort = "minimal"
|
||||
else:
|
||||
config.reasoning_effort = "medium"
|
||||
|
||||
@@ -65,7 +65,7 @@ class Model(LLMConfig, ModelBase):
|
||||
description="Deprecated: Whether or not the model should use extended thinking if it is a 'reasoning' style model.",
|
||||
deprecated=True,
|
||||
)
|
||||
reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
|
||||
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = Field(
|
||||
None, description="Deprecated: The reasoning effort to use when generating text reasoning models.", deprecated=True
|
||||
)
|
||||
max_reasoning_tokens: int = Field(0, description="Deprecated: Configurable thinking budget for extended thinking.", deprecated=True)
|
||||
@@ -209,7 +209,7 @@ class ModelSettings(BaseModel):
|
||||
|
||||
|
||||
class OpenAIReasoning(BaseModel):
|
||||
reasoning_effort: Literal["minimal", "low", "medium", "high"] = Field(
|
||||
reasoning_effort: Literal["none", "minimal", "low", "medium", "high"] = Field(
|
||||
"minimal", description="The reasoning effort to use when generating text reasoning models"
|
||||
)
|
||||
|
||||
|
||||
@@ -136,7 +136,9 @@ class ChatCompletionRequest(BaseModel):
|
||||
parallel_tool_calls: Optional[bool] = None
|
||||
instructions: Optional[str] = None
|
||||
verbosity: Optional[Literal["low", "medium", "high"]] = None # For verbosity control in GPT-5 models
|
||||
reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = None # For reasoning effort control in reasoning models
|
||||
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = (
|
||||
None # For reasoning effort control in reasoning models
|
||||
)
|
||||
|
||||
# function-calling related
|
||||
tools: Optional[List[Tool]] = None
|
||||
|
||||
@@ -367,7 +367,7 @@ def test_reasoning_toggle_by_provider(
|
||||
expected_enable_reasoner: bool,
|
||||
expected_put_inner_thoughts_in_kwargs: bool,
|
||||
expected_max_reasoning_tokens: int,
|
||||
expected_reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]],
|
||||
expected_reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]],
|
||||
):
|
||||
model_endpoint_type, model = handle.split("/")
|
||||
config = LLMConfig(
|
||||
|
||||
Reference in New Issue
Block a user