diff --git a/fern/openapi.json b/fern/openapi.json index 83797320..99cf0c3e 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -17020,7 +17020,13 @@ }, "reasoning_effort": { "type": "string", - "enum": ["minimal", "low", "medium", "high"], + "enum": [ + "none", + "minimal", + "low", + "medium", + "high" + ], "nullable": true }, "per_file_view_window_char_limit": { @@ -28619,7 +28625,7 @@ "anyOf": [ { "type": "string", - "enum": ["minimal", "low", "medium", "high"] + "enum": ["none", "minimal", "low", "medium", "high"] }, { "type": "null" @@ -30729,7 +30735,7 @@ "anyOf": [ { "type": "string", - "enum": ["minimal", "low", "medium", "high"] + "enum": ["none", "minimal", "low", "medium", "high"] }, { "type": "null" @@ -31000,7 +31006,7 @@ "properties": { "reasoning_effort": { "type": "string", - "enum": ["minimal", "low", "medium", "high"], + "enum": ["none", "minimal", "low", "medium", "high"], "title": "Reasoning Effort", "description": "The reasoning effort to use when generating text reasoning models", "default": "minimal" diff --git a/letta/constants.py b/letta/constants.py index 392bba54..ba105e8f 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -239,6 +239,11 @@ LLM_MAX_TOKENS = { "gpt-5-nano": 272000, "gpt-5-nano-2025-08-07": 272000, "gpt-5-codex": 272000, + # gpt-5.1 + "gpt-5.1": 272000, + "gpt-5.1-2025-11-13": 272000, + "gpt-5.1-codex": 272000, + "gpt-5.1-codex-mini": 272000, # reasoners "o1": 200000, # "o1-pro": 200000, # responses API only diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 14dee8a8..232eb6d0 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -72,6 +72,14 @@ def does_not_support_minimal_reasoning(model: str) -> bool: return "codex" in model.lower() +def supports_none_reasoning_effort(model: str) -> bool: + """Check if the model supports 'none' reasoning effort. + + Currently, only GPT-5.1 models support the 'none' reasoning effort level. + """ + return model.startswith("gpt-5.1") + + def is_openai_5_model(model: str) -> bool: """Utility function to check if the model is a '5' model""" return model.startswith("gpt-5") @@ -337,7 +345,8 @@ class OpenAIClient(LLMClientBase): data.text = ResponseTextConfigParam(verbosity=llm_config.verbosity) # Add reasoning effort control for reasoning models - if is_openai_reasoning_model(model) and llm_config.reasoning_effort: + # Only set reasoning if effort is not "none" (GPT-5.1 uses "none" to disable reasoning) + if is_openai_reasoning_model(model) and llm_config.reasoning_effort and llm_config.reasoning_effort != "none": # data.reasoning_effort = llm_config.reasoning_effort data.reasoning = Reasoning( effort=llm_config.reasoning_effort, @@ -481,7 +490,8 @@ class OpenAIClient(LLMClientBase): data.verbosity = llm_config.verbosity # Add reasoning effort control for reasoning models - if is_openai_reasoning_model(model) and llm_config.reasoning_effort: + # Only set reasoning_effort if it's not "none" (GPT-5.1 uses "none" to disable reasoning) + if is_openai_reasoning_model(model) and llm_config.reasoning_effort and llm_config.reasoning_effort != "none": data.reasoning_effort = llm_config.reasoning_effort if llm_config.frequency_penalty is not None: diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index a0e59c8b..338cb477 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -69,7 +69,7 @@ class LLMConfig(BaseModel): enable_reasoner: bool = Field( True, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model" ) - reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field( + reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = Field( None, description="The reasoning effort to use when generating text reasoning models", ) @@ -105,7 +105,7 @@ class LLMConfig(BaseModel): # Set max_tokens defaults based on model if values.get("max_tokens") is None: - if model == "gpt-5": + if model.startswith("gpt-5"): # Covers both gpt-5 and gpt-5.1 values["max_tokens"] = 16384 elif model == "gpt-4.1": values["max_tokens"] = 8192 @@ -113,8 +113,8 @@ class LLMConfig(BaseModel): # Set context_window defaults if not provided if values.get("context_window") is None: - if model == "gpt-5": - values["context_window"] = 128000 + if model.startswith("gpt-5"): # Covers both gpt-5 and gpt-5.1 + values["context_window"] = 272000 elif model == "gpt-4.1": values["context_window"] = 256000 elif model == "gpt-4o" or model == "gpt-4o-mini": @@ -123,7 +123,7 @@ class LLMConfig(BaseModel): values["context_window"] = 8192 # Set verbosity defaults for GPT-5 models - if model == "gpt-5" and values.get("verbosity") is None: + if model.startswith("gpt-5") and values.get("verbosity") is None: values["verbosity"] = "medium" return values @@ -239,11 +239,22 @@ class LLMConfig(BaseModel): model_endpoint_type="openai", model_endpoint="https://api.openai.com/v1", model_wrapper=None, - context_window=128000, + context_window=272000, reasoning_effort="minimal", verbosity="medium", max_tokens=16384, ) + elif model_name == "gpt-5.1": + return cls( + model="gpt-5.1", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + model_wrapper=None, + context_window=272000, # Same as GPT-5 + reasoning_effort="none", # Default to "none" for GPT-5.1 + verbosity="medium", + max_tokens=16384, + ) elif model_name == "letta": return cls( model="memgpt-openai", @@ -388,7 +399,7 @@ class LLMConfig(BaseModel): - Google Gemini (2.5 family): force disabled until native reasoning supported - All others: disabled (no simulated reasoning via kwargs) """ - from letta.llm_api.openai_client import does_not_support_minimal_reasoning + from letta.llm_api.openai_client import does_not_support_minimal_reasoning, supports_none_reasoning_effort # V1 agent policy: do not allow simulated reasoning for non-native models if agent_type is not None and agent_type == AgentType.letta_v1_agent: @@ -397,8 +408,11 @@ class LLMConfig(BaseModel): config.put_inner_thoughts_in_kwargs = False config.enable_reasoner = True if config.reasoning_effort is None: + # GPT-5.1 models default to "none" reasoning effort (their unique feature) + if supports_none_reasoning_effort(config.model): + config.reasoning_effort = "none" # Always default to "none" for GPT-5.1 # Codex models cannot use "minimal" reasoning effort - if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model): + elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model): config.reasoning_effort = "minimal" else: config.reasoning_effort = "medium" @@ -433,16 +447,22 @@ class LLMConfig(BaseModel): if not reasoning: if cls.is_openai_reasoning_model(config): - logger.warning("Reasoning cannot be disabled for OpenAI o1/o3/gpt-5 models") - config.put_inner_thoughts_in_kwargs = False - config.enable_reasoner = True - if config.reasoning_effort is None: - # GPT-5 models default to minimal, others to medium - # Codex models cannot use "minimal" reasoning effort - if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model): - config.reasoning_effort = "minimal" - else: - config.reasoning_effort = "medium" + # GPT-5.1 models can actually disable reasoning using "none" effort + if supports_none_reasoning_effort(config.model): + config.put_inner_thoughts_in_kwargs = False + config.enable_reasoner = True + config.reasoning_effort = "none" + else: + logger.warning("Reasoning cannot be disabled for OpenAI o1/o3/gpt-5 models") + config.put_inner_thoughts_in_kwargs = False + config.enable_reasoner = True + if config.reasoning_effort is None: + # GPT-5 models default to minimal, others to medium + # Codex models cannot use "minimal" reasoning effort + if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model): + config.reasoning_effort = "minimal" + else: + config.reasoning_effort = "medium" # Set verbosity for GPT-5 models if config.model.startswith("gpt-5") and config.verbosity is None: config.verbosity = "medium" @@ -471,9 +491,12 @@ class LLMConfig(BaseModel): elif cls.is_openai_reasoning_model(config): config.put_inner_thoughts_in_kwargs = False if config.reasoning_effort is None: + # GPT-5.1 models default to "none" even when reasoning is enabled + if supports_none_reasoning_effort(config.model): + config.reasoning_effort = "none" # Default to "none" for GPT-5.1 # GPT-5 models default to minimal, others to medium # Codex models cannot use "minimal" reasoning effort - if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model): + elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model): config.reasoning_effort = "minimal" else: config.reasoning_effort = "medium" diff --git a/letta/schemas/model.py b/letta/schemas/model.py index 3b8839cb..63731e70 100644 --- a/letta/schemas/model.py +++ b/letta/schemas/model.py @@ -65,7 +65,7 @@ class Model(LLMConfig, ModelBase): description="Deprecated: Whether or not the model should use extended thinking if it is a 'reasoning' style model.", deprecated=True, ) - reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field( + reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = Field( None, description="Deprecated: The reasoning effort to use when generating text reasoning models.", deprecated=True ) max_reasoning_tokens: int = Field(0, description="Deprecated: Configurable thinking budget for extended thinking.", deprecated=True) @@ -209,7 +209,7 @@ class ModelSettings(BaseModel): class OpenAIReasoning(BaseModel): - reasoning_effort: Literal["minimal", "low", "medium", "high"] = Field( + reasoning_effort: Literal["none", "minimal", "low", "medium", "high"] = Field( "minimal", description="The reasoning effort to use when generating text reasoning models" ) diff --git a/letta/schemas/openai/chat_completion_request.py b/letta/schemas/openai/chat_completion_request.py index 35ddf702..89c67e6b 100644 --- a/letta/schemas/openai/chat_completion_request.py +++ b/letta/schemas/openai/chat_completion_request.py @@ -136,7 +136,9 @@ class ChatCompletionRequest(BaseModel): parallel_tool_calls: Optional[bool] = None instructions: Optional[str] = None verbosity: Optional[Literal["low", "medium", "high"]] = None # For verbosity control in GPT-5 models - reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = None # For reasoning effort control in reasoning models + reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = ( + None # For reasoning effort control in reasoning models + ) # function-calling related tools: Optional[List[Tool]] = None diff --git a/tests/test_providers.py b/tests/test_providers.py index 14e491cd..e1062d6b 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -367,7 +367,7 @@ def test_reasoning_toggle_by_provider( expected_enable_reasoner: bool, expected_put_inner_thoughts_in_kwargs: bool, expected_max_reasoning_tokens: int, - expected_reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]], + expected_reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]], ): model_endpoint_type, model = handle.split("/") config = LLMConfig(