feat: enable gpt5.1 models [LET-6178] (#6175)

* hack at gpt51

* revert package lock

* first hack

* default context window

---------

Co-authored-by: Ari Webb <ari@letta.com>
This commit is contained in:
Ari Webb
2025-11-18 14:59:16 -08:00
committed by Caren Thomas
parent 7ce33c4f22
commit 7380eaec13
7 changed files with 75 additions and 29 deletions

View File

@@ -17020,7 +17020,13 @@
},
"reasoning_effort": {
"type": "string",
"enum": ["minimal", "low", "medium", "high"],
"enum": [
"none",
"minimal",
"low",
"medium",
"high"
],
"nullable": true
},
"per_file_view_window_char_limit": {
@@ -28619,7 +28625,7 @@
"anyOf": [
{
"type": "string",
"enum": ["minimal", "low", "medium", "high"]
"enum": ["none", "minimal", "low", "medium", "high"]
},
{
"type": "null"
@@ -30729,7 +30735,7 @@
"anyOf": [
{
"type": "string",
"enum": ["minimal", "low", "medium", "high"]
"enum": ["none", "minimal", "low", "medium", "high"]
},
{
"type": "null"
@@ -31000,7 +31006,7 @@
"properties": {
"reasoning_effort": {
"type": "string",
"enum": ["minimal", "low", "medium", "high"],
"enum": ["none", "minimal", "low", "medium", "high"],
"title": "Reasoning Effort",
"description": "The reasoning effort to use when generating text reasoning models",
"default": "minimal"

View File

@@ -239,6 +239,11 @@ LLM_MAX_TOKENS = {
"gpt-5-nano": 272000,
"gpt-5-nano-2025-08-07": 272000,
"gpt-5-codex": 272000,
# gpt-5.1
"gpt-5.1": 272000,
"gpt-5.1-2025-11-13": 272000,
"gpt-5.1-codex": 272000,
"gpt-5.1-codex-mini": 272000,
# reasoners
"o1": 200000,
# "o1-pro": 200000, # responses API only

View File

@@ -72,6 +72,14 @@ def does_not_support_minimal_reasoning(model: str) -> bool:
return "codex" in model.lower()
def supports_none_reasoning_effort(model: str) -> bool:
"""Check if the model supports 'none' reasoning effort.
Currently, only GPT-5.1 models support the 'none' reasoning effort level.
"""
return model.startswith("gpt-5.1")
def is_openai_5_model(model: str) -> bool:
"""Utility function to check if the model is a '5' model"""
return model.startswith("gpt-5")
@@ -337,7 +345,8 @@ class OpenAIClient(LLMClientBase):
data.text = ResponseTextConfigParam(verbosity=llm_config.verbosity)
# Add reasoning effort control for reasoning models
if is_openai_reasoning_model(model) and llm_config.reasoning_effort:
# Only set reasoning if effort is not "none" (GPT-5.1 uses "none" to disable reasoning)
if is_openai_reasoning_model(model) and llm_config.reasoning_effort and llm_config.reasoning_effort != "none":
# data.reasoning_effort = llm_config.reasoning_effort
data.reasoning = Reasoning(
effort=llm_config.reasoning_effort,
@@ -481,7 +490,8 @@ class OpenAIClient(LLMClientBase):
data.verbosity = llm_config.verbosity
# Add reasoning effort control for reasoning models
if is_openai_reasoning_model(model) and llm_config.reasoning_effort:
# Only set reasoning_effort if it's not "none" (GPT-5.1 uses "none" to disable reasoning)
if is_openai_reasoning_model(model) and llm_config.reasoning_effort and llm_config.reasoning_effort != "none":
data.reasoning_effort = llm_config.reasoning_effort
if llm_config.frequency_penalty is not None:

View File

@@ -69,7 +69,7 @@ class LLMConfig(BaseModel):
enable_reasoner: bool = Field(
True, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
)
reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = Field(
None,
description="The reasoning effort to use when generating text reasoning models",
)
@@ -105,7 +105,7 @@ class LLMConfig(BaseModel):
# Set max_tokens defaults based on model
if values.get("max_tokens") is None:
if model == "gpt-5":
if model.startswith("gpt-5"): # Covers both gpt-5 and gpt-5.1
values["max_tokens"] = 16384
elif model == "gpt-4.1":
values["max_tokens"] = 8192
@@ -113,8 +113,8 @@ class LLMConfig(BaseModel):
# Set context_window defaults if not provided
if values.get("context_window") is None:
if model == "gpt-5":
values["context_window"] = 128000
if model.startswith("gpt-5"): # Covers both gpt-5 and gpt-5.1
values["context_window"] = 272000
elif model == "gpt-4.1":
values["context_window"] = 256000
elif model == "gpt-4o" or model == "gpt-4o-mini":
@@ -123,7 +123,7 @@ class LLMConfig(BaseModel):
values["context_window"] = 8192
# Set verbosity defaults for GPT-5 models
if model == "gpt-5" and values.get("verbosity") is None:
if model.startswith("gpt-5") and values.get("verbosity") is None:
values["verbosity"] = "medium"
return values
@@ -239,11 +239,22 @@ class LLMConfig(BaseModel):
model_endpoint_type="openai",
model_endpoint="https://api.openai.com/v1",
model_wrapper=None,
context_window=128000,
context_window=272000,
reasoning_effort="minimal",
verbosity="medium",
max_tokens=16384,
)
elif model_name == "gpt-5.1":
return cls(
model="gpt-5.1",
model_endpoint_type="openai",
model_endpoint="https://api.openai.com/v1",
model_wrapper=None,
context_window=272000, # Same as GPT-5
reasoning_effort="none", # Default to "none" for GPT-5.1
verbosity="medium",
max_tokens=16384,
)
elif model_name == "letta":
return cls(
model="memgpt-openai",
@@ -388,7 +399,7 @@ class LLMConfig(BaseModel):
- Google Gemini (2.5 family): force disabled until native reasoning supported
- All others: disabled (no simulated reasoning via kwargs)
"""
from letta.llm_api.openai_client import does_not_support_minimal_reasoning
from letta.llm_api.openai_client import does_not_support_minimal_reasoning, supports_none_reasoning_effort
# V1 agent policy: do not allow simulated reasoning for non-native models
if agent_type is not None and agent_type == AgentType.letta_v1_agent:
@@ -397,8 +408,11 @@ class LLMConfig(BaseModel):
config.put_inner_thoughts_in_kwargs = False
config.enable_reasoner = True
if config.reasoning_effort is None:
# GPT-5.1 models default to "none" reasoning effort (their unique feature)
if supports_none_reasoning_effort(config.model):
config.reasoning_effort = "none" # Always default to "none" for GPT-5.1
# Codex models cannot use "minimal" reasoning effort
if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
config.reasoning_effort = "minimal"
else:
config.reasoning_effort = "medium"
@@ -433,16 +447,22 @@ class LLMConfig(BaseModel):
if not reasoning:
if cls.is_openai_reasoning_model(config):
logger.warning("Reasoning cannot be disabled for OpenAI o1/o3/gpt-5 models")
config.put_inner_thoughts_in_kwargs = False
config.enable_reasoner = True
if config.reasoning_effort is None:
# GPT-5 models default to minimal, others to medium
# Codex models cannot use "minimal" reasoning effort
if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
config.reasoning_effort = "minimal"
else:
config.reasoning_effort = "medium"
# GPT-5.1 models can actually disable reasoning using "none" effort
if supports_none_reasoning_effort(config.model):
config.put_inner_thoughts_in_kwargs = False
config.enable_reasoner = True
config.reasoning_effort = "none"
else:
logger.warning("Reasoning cannot be disabled for OpenAI o1/o3/gpt-5 models")
config.put_inner_thoughts_in_kwargs = False
config.enable_reasoner = True
if config.reasoning_effort is None:
# GPT-5 models default to minimal, others to medium
# Codex models cannot use "minimal" reasoning effort
if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
config.reasoning_effort = "minimal"
else:
config.reasoning_effort = "medium"
# Set verbosity for GPT-5 models
if config.model.startswith("gpt-5") and config.verbosity is None:
config.verbosity = "medium"
@@ -471,9 +491,12 @@ class LLMConfig(BaseModel):
elif cls.is_openai_reasoning_model(config):
config.put_inner_thoughts_in_kwargs = False
if config.reasoning_effort is None:
# GPT-5.1 models default to "none" even when reasoning is enabled
if supports_none_reasoning_effort(config.model):
config.reasoning_effort = "none" # Default to "none" for GPT-5.1
# GPT-5 models default to minimal, others to medium
# Codex models cannot use "minimal" reasoning effort
if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
config.reasoning_effort = "minimal"
else:
config.reasoning_effort = "medium"

View File

@@ -65,7 +65,7 @@ class Model(LLMConfig, ModelBase):
description="Deprecated: Whether or not the model should use extended thinking if it is a 'reasoning' style model.",
deprecated=True,
)
reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = Field(
None, description="Deprecated: The reasoning effort to use when generating text reasoning models.", deprecated=True
)
max_reasoning_tokens: int = Field(0, description="Deprecated: Configurable thinking budget for extended thinking.", deprecated=True)
@@ -209,7 +209,7 @@ class ModelSettings(BaseModel):
class OpenAIReasoning(BaseModel):
reasoning_effort: Literal["minimal", "low", "medium", "high"] = Field(
reasoning_effort: Literal["none", "minimal", "low", "medium", "high"] = Field(
"minimal", description="The reasoning effort to use when generating text reasoning models"
)

View File

@@ -136,7 +136,9 @@ class ChatCompletionRequest(BaseModel):
parallel_tool_calls: Optional[bool] = None
instructions: Optional[str] = None
verbosity: Optional[Literal["low", "medium", "high"]] = None # For verbosity control in GPT-5 models
reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = None # For reasoning effort control in reasoning models
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = (
None # For reasoning effort control in reasoning models
)
# function-calling related
tools: Optional[List[Tool]] = None

View File

@@ -367,7 +367,7 @@ def test_reasoning_toggle_by_provider(
expected_enable_reasoner: bool,
expected_put_inner_thoughts_in_kwargs: bool,
expected_max_reasoning_tokens: int,
expected_reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]],
expected_reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]],
):
model_endpoint_type, model = handle.split("/")
config = LLMConfig(