fix: patch o1 and o3-mini (#1657)

2025-04-10 13:50:38 -07:00
parent 63a6c6b499
commit 1453ba47c3
3 changed files with 15 additions and 3 deletions
--- a/letta/llm_api/openai.py
+++ b/letta/llm_api/openai.py
@@ -135,7 +135,7 @@ def build_openai_chat_completions_request(
            tool_choice=tool_choice,
            user=str(user_id),
            max_completion_tokens=llm_config.max_tokens,
-            temperature=llm_config.temperature,
+            temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
        )
    else:
        data = ChatCompletionRequest(
@@ -145,7 +145,7 @@ def build_openai_chat_completions_request(
            function_call=function_call,
            user=str(user_id),
            max_completion_tokens=llm_config.max_tokens,
-            temperature=llm_config.temperature,
+            temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature,
        )
        # https://platform.openai.com/docs/guides/text-generation/json-mode
        # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
@@ -168,7 +168,6 @@ def build_openai_chat_completions_request(
                tool.function = FunctionSchema(**structured_output_version)
            except ValueError as e:
                warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
-
    return data


@@ -488,4 +487,6 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest):
    #         except ValueError as e:
    #             warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}")

+    if "o3-mini" in chat_completion_request.model or "o1" in chat_completion_request.model:
+        data.pop("parallel_tool_calls", None)
    return data
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -74,6 +74,13 @@ class LLMConfig(BaseModel):
    # FIXME hack to silence pydantic protected namespace warning
    model_config = ConfigDict(protected_namespaces=())

+    @model_validator(mode="before")
+    @classmethod
+    def set_default_enable_reasoner(cls, values):
+        if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]):
+            values["enable_reasoner"] = True
+        return values
+
    @model_validator(mode="before")
    @classmethod
    def set_default_put_inner_thoughts(cls, values):
@@ -100,6 +107,9 @@ class LLMConfig(BaseModel):
                logger.warning("max_tokens must be greater than max_reasoning_tokens (thinking budget)")
            if self.put_inner_thoughts_in_kwargs:
                logger.warning("Extended thinking is not compatible with put_inner_thoughts_in_kwargs")
+        elif self.max_reasoning_tokens and not self.enable_reasoner:
+            logger.warning("model will not use reasoning unless enable_reasoner is set to True")
+
        return self

    @classmethod
--- a/letta/schemas/openai/chat_completion_request.py
+++ b/letta/schemas/openai/chat_completion_request.py
@@ -133,6 +133,7 @@ class ChatCompletionRequest(BaseModel):
    temperature: Optional[float] = 1
    top_p: Optional[float] = 1
    user: Optional[str] = None  # unique ID of the end-user (for monitoring)
+    parallel_tool_calls: Optional[bool] = False

    # function-calling related
    tools: Optional[List[Tool]] = None