feat: enable gpt5.1 models [LET-6178] (#6175)

* hack at gpt51 * revert package lock * first hack * default context window --------- Co-authored-by: Ari Webb <ari@letta.com>
2025-11-18 14:59:16 -08:00
parent 7ce33c4f22
commit 7380eaec13
7 changed files with 75 additions and 29 deletions
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -17020,7 +17020,13 @@
                              },
                              "reasoning_effort": {
                                "type": "string",
-                                "enum": ["minimal", "low", "medium", "high"],
+                                "enum": [
+                                  "none",
+                                  "minimal",
+                                  "low",
+                                  "medium",
+                                  "high"
+                                ],
                                "nullable": true
                              },
                              "per_file_view_window_char_limit": {
@@ -28619,7 +28625,7 @@
            "anyOf": [
              {
                "type": "string",
-                "enum": ["minimal", "low", "medium", "high"]
+                "enum": ["none", "minimal", "low", "medium", "high"]
              },
              {
                "type": "null"
@@ -30729,7 +30735,7 @@
            "anyOf": [
              {
                "type": "string",
-                "enum": ["minimal", "low", "medium", "high"]
+                "enum": ["none", "minimal", "low", "medium", "high"]
              },
              {
                "type": "null"
@@ -31000,7 +31006,7 @@
        "properties": {
          "reasoning_effort": {
            "type": "string",
-            "enum": ["minimal", "low", "medium", "high"],
+            "enum": ["none", "minimal", "low", "medium", "high"],
            "title": "Reasoning Effort",
            "description": "The reasoning effort to use when generating text reasoning models",
            "default": "minimal"
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -239,6 +239,11 @@ LLM_MAX_TOKENS = {
    "gpt-5-nano": 272000,
    "gpt-5-nano-2025-08-07": 272000,
    "gpt-5-codex": 272000,
+    # gpt-5.1
+    "gpt-5.1": 272000,
+    "gpt-5.1-2025-11-13": 272000,
+    "gpt-5.1-codex": 272000,
+    "gpt-5.1-codex-mini": 272000,
    # reasoners
    "o1": 200000,
    # "o1-pro": 200000,  # responses API only
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -72,6 +72,14 @@ def does_not_support_minimal_reasoning(model: str) -> bool:
    return "codex" in model.lower()


+def supports_none_reasoning_effort(model: str) -> bool:
+    """Check if the model supports 'none' reasoning effort.
+
+    Currently, only GPT-5.1 models support the 'none' reasoning effort level.
+    """
+    return model.startswith("gpt-5.1")
+
+
 def is_openai_5_model(model: str) -> bool:
    """Utility function to check if the model is a '5' model"""
    return model.startswith("gpt-5")
@@ -337,7 +345,8 @@ class OpenAIClient(LLMClientBase):
            data.text = ResponseTextConfigParam(verbosity=llm_config.verbosity)

        # Add reasoning effort control for reasoning models
-        if is_openai_reasoning_model(model) and llm_config.reasoning_effort:
+        # Only set reasoning if effort is not "none" (GPT-5.1 uses "none" to disable reasoning)
+        if is_openai_reasoning_model(model) and llm_config.reasoning_effort and llm_config.reasoning_effort != "none":
            # data.reasoning_effort = llm_config.reasoning_effort
            data.reasoning = Reasoning(
                effort=llm_config.reasoning_effort,
@@ -481,7 +490,8 @@ class OpenAIClient(LLMClientBase):
            data.verbosity = llm_config.verbosity

        # Add reasoning effort control for reasoning models
-        if is_openai_reasoning_model(model) and llm_config.reasoning_effort:
+        # Only set reasoning_effort if it's not "none" (GPT-5.1 uses "none" to disable reasoning)
+        if is_openai_reasoning_model(model) and llm_config.reasoning_effort and llm_config.reasoning_effort != "none":
            data.reasoning_effort = llm_config.reasoning_effort

        if llm_config.frequency_penalty is not None:
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -69,7 +69,7 @@ class LLMConfig(BaseModel):
    enable_reasoner: bool = Field(
        True, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
    )
-    reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
+    reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = Field(
        None,
        description="The reasoning effort to use when generating text reasoning models",
    )
@@ -105,7 +105,7 @@ class LLMConfig(BaseModel):

        # Set max_tokens defaults based on model
        if values.get("max_tokens") is None:
-            if model == "gpt-5":
+            if model.startswith("gpt-5"):  # Covers both gpt-5 and gpt-5.1
                values["max_tokens"] = 16384
            elif model == "gpt-4.1":
                values["max_tokens"] = 8192
@@ -113,8 +113,8 @@ class LLMConfig(BaseModel):

        # Set context_window defaults if not provided
        if values.get("context_window") is None:
-            if model == "gpt-5":
-                values["context_window"] = 128000
+            if model.startswith("gpt-5"):  # Covers both gpt-5 and gpt-5.1
+                values["context_window"] = 272000
            elif model == "gpt-4.1":
                values["context_window"] = 256000
            elif model == "gpt-4o" or model == "gpt-4o-mini":
@@ -123,7 +123,7 @@ class LLMConfig(BaseModel):
                values["context_window"] = 8192

        # Set verbosity defaults for GPT-5 models
-        if model == "gpt-5" and values.get("verbosity") is None:
+        if model.startswith("gpt-5") and values.get("verbosity") is None:
            values["verbosity"] = "medium"

        return values
@@ -239,11 +239,22 @@ class LLMConfig(BaseModel):
                model_endpoint_type="openai",
                model_endpoint="https://api.openai.com/v1",
                model_wrapper=None,
-                context_window=128000,
+                context_window=272000,
                reasoning_effort="minimal",
                verbosity="medium",
                max_tokens=16384,
            )
+        elif model_name == "gpt-5.1":
+            return cls(
+                model="gpt-5.1",
+                model_endpoint_type="openai",
+                model_endpoint="https://api.openai.com/v1",
+                model_wrapper=None,
+                context_window=272000,  # Same as GPT-5
+                reasoning_effort="none",  # Default to "none" for GPT-5.1
+                verbosity="medium",
+                max_tokens=16384,
+            )
        elif model_name == "letta":
            return cls(
                model="memgpt-openai",
@@ -388,7 +399,7 @@ class LLMConfig(BaseModel):
        - Google Gemini (2.5 family): force disabled until native reasoning supported
        - All others: disabled (no simulated reasoning via kwargs)
        """
-        from letta.llm_api.openai_client import does_not_support_minimal_reasoning
+        from letta.llm_api.openai_client import does_not_support_minimal_reasoning, supports_none_reasoning_effort

        # V1 agent policy: do not allow simulated reasoning for non-native models
        if agent_type is not None and agent_type == AgentType.letta_v1_agent:
@@ -397,8 +408,11 @@ class LLMConfig(BaseModel):
                config.put_inner_thoughts_in_kwargs = False
                config.enable_reasoner = True
                if config.reasoning_effort is None:
+                    # GPT-5.1 models default to "none" reasoning effort (their unique feature)
+                    if supports_none_reasoning_effort(config.model):
+                        config.reasoning_effort = "none"  # Always default to "none" for GPT-5.1
                    # Codex models cannot use "minimal" reasoning effort
-                    if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
+                    elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
                        config.reasoning_effort = "minimal"
                    else:
                        config.reasoning_effort = "medium"
@@ -433,16 +447,22 @@ class LLMConfig(BaseModel):

        if not reasoning:
            if cls.is_openai_reasoning_model(config):
-                logger.warning("Reasoning cannot be disabled for OpenAI o1/o3/gpt-5 models")
-                config.put_inner_thoughts_in_kwargs = False
-                config.enable_reasoner = True
-                if config.reasoning_effort is None:
-                    # GPT-5 models default to minimal, others to medium
-                    # Codex models cannot use "minimal" reasoning effort
-                    if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
-                        config.reasoning_effort = "minimal"
-                    else:
-                        config.reasoning_effort = "medium"
+                # GPT-5.1 models can actually disable reasoning using "none" effort
+                if supports_none_reasoning_effort(config.model):
+                    config.put_inner_thoughts_in_kwargs = False
+                    config.enable_reasoner = True
+                    config.reasoning_effort = "none"
+                else:
+                    logger.warning("Reasoning cannot be disabled for OpenAI o1/o3/gpt-5 models")
+                    config.put_inner_thoughts_in_kwargs = False
+                    config.enable_reasoner = True
+                    if config.reasoning_effort is None:
+                        # GPT-5 models default to minimal, others to medium
+                        # Codex models cannot use "minimal" reasoning effort
+                        if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
+                            config.reasoning_effort = "minimal"
+                        else:
+                            config.reasoning_effort = "medium"
                # Set verbosity for GPT-5 models
                if config.model.startswith("gpt-5") and config.verbosity is None:
                    config.verbosity = "medium"
@@ -471,9 +491,12 @@ class LLMConfig(BaseModel):
            elif cls.is_openai_reasoning_model(config):
                config.put_inner_thoughts_in_kwargs = False
                if config.reasoning_effort is None:
+                    # GPT-5.1 models default to "none" even when reasoning is enabled
+                    if supports_none_reasoning_effort(config.model):
+                        config.reasoning_effort = "none"  # Default to "none" for GPT-5.1
                    # GPT-5 models default to minimal, others to medium
                    # Codex models cannot use "minimal" reasoning effort
-                    if config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
+                    elif config.model.startswith("gpt-5") and not does_not_support_minimal_reasoning(config.model):
                        config.reasoning_effort = "minimal"
                    else:
                        config.reasoning_effort = "medium"
--- a/letta/schemas/model.py
+++ b/letta/schemas/model.py
@@ -65,7 +65,7 @@ class Model(LLMConfig, ModelBase):
        description="Deprecated: Whether or not the model should use extended thinking if it is a 'reasoning' style model.",
        deprecated=True,
    )
-    reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
+    reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = Field(
        None, description="Deprecated: The reasoning effort to use when generating text reasoning models.", deprecated=True
    )
    max_reasoning_tokens: int = Field(0, description="Deprecated: Configurable thinking budget for extended thinking.", deprecated=True)
@@ -209,7 +209,7 @@ class ModelSettings(BaseModel):


 class OpenAIReasoning(BaseModel):
-    reasoning_effort: Literal["minimal", "low", "medium", "high"] = Field(
+    reasoning_effort: Literal["none", "minimal", "low", "medium", "high"] = Field(
        "minimal", description="The reasoning effort to use when generating text reasoning models"
    )

--- a/letta/schemas/openai/chat_completion_request.py
+++ b/letta/schemas/openai/chat_completion_request.py
@@ -136,7 +136,9 @@ class ChatCompletionRequest(BaseModel):
    parallel_tool_calls: Optional[bool] = None
    instructions: Optional[str] = None
    verbosity: Optional[Literal["low", "medium", "high"]] = None  # For verbosity control in GPT-5 models
-    reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = None  # For reasoning effort control in reasoning models
+    reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]] = (
+        None  # For reasoning effort control in reasoning models
+    )

    # function-calling related
    tools: Optional[List[Tool]] = None
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -367,7 +367,7 @@ def test_reasoning_toggle_by_provider(
    expected_enable_reasoner: bool,
    expected_put_inner_thoughts_in_kwargs: bool,
    expected_max_reasoning_tokens: int,
-    expected_reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]],
+    expected_reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high"]],
 ):
    model_endpoint_type, model = handle.split("/")
    config = LLMConfig(