diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py index 7e1499d9..6179d544 100644 --- a/letta/llm_api/openai.py +++ b/letta/llm_api/openai.py @@ -135,7 +135,7 @@ def build_openai_chat_completions_request( tool_choice=tool_choice, user=str(user_id), max_completion_tokens=llm_config.max_tokens, - temperature=llm_config.temperature, + temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature, ) else: data = ChatCompletionRequest( @@ -145,7 +145,7 @@ def build_openai_chat_completions_request( function_call=function_call, user=str(user_id), max_completion_tokens=llm_config.max_tokens, - temperature=llm_config.temperature, + temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature, ) # https://platform.openai.com/docs/guides/text-generation/json-mode # only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo @@ -168,7 +168,6 @@ def build_openai_chat_completions_request( tool.function = FunctionSchema(**structured_output_version) except ValueError as e: warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}") - return data @@ -488,4 +487,6 @@ def prepare_openai_payload(chat_completion_request: ChatCompletionRequest): # except ValueError as e: # warnings.warn(f"Failed to convert tool function to structured output, tool={tool}, error={e}") + if "o3-mini" in chat_completion_request.model or "o1" in chat_completion_request.model: + data.pop("parallel_tool_calls", None) return data diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index e5c24540..d4493dee 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -74,6 +74,13 @@ class LLMConfig(BaseModel): # FIXME hack to silence pydantic protected namespace warning model_config = ConfigDict(protected_namespaces=()) + @model_validator(mode="before") + @classmethod + def set_default_enable_reasoner(cls, values): + if any(openai_reasoner_model in values.get("model", "") for openai_reasoner_model in ["o3-mini", "o1"]): + values["enable_reasoner"] = True + return values + @model_validator(mode="before") @classmethod def set_default_put_inner_thoughts(cls, values): @@ -100,6 +107,9 @@ class LLMConfig(BaseModel): logger.warning("max_tokens must be greater than max_reasoning_tokens (thinking budget)") if self.put_inner_thoughts_in_kwargs: logger.warning("Extended thinking is not compatible with put_inner_thoughts_in_kwargs") + elif self.max_reasoning_tokens and not self.enable_reasoner: + logger.warning("model will not use reasoning unless enable_reasoner is set to True") + return self @classmethod diff --git a/letta/schemas/openai/chat_completion_request.py b/letta/schemas/openai/chat_completion_request.py index 4592ec1c..92dcb2fa 100644 --- a/letta/schemas/openai/chat_completion_request.py +++ b/letta/schemas/openai/chat_completion_request.py @@ -133,6 +133,7 @@ class ChatCompletionRequest(BaseModel): temperature: Optional[float] = 1 top_p: Optional[float] = 1 user: Optional[str] = None # unique ID of the end-user (for monitoring) + parallel_tool_calls: Optional[bool] = False # function-calling related tools: Optional[List[Tool]] = None