From 2199d8fdda42926c4173291bc20a1807b02b0bda Mon Sep 17 00:00:00 2001 From: Shangyin Tan Date: Sat, 24 May 2025 21:34:18 -0700 Subject: [PATCH] fix: do not pass temperature to request if model is oai reasoning model (#2189) Co-authored-by: Charles Packer --- letta/interfaces/openai_streaming_interface.py | 3 ++- letta/llm_api/openai.py | 4 ++-- letta/llm_api/openai_client.py | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py index eea1b3b2..77ec570d 100644 --- a/letta/interfaces/openai_streaming_interface.py +++ b/letta/interfaces/openai_streaming_interface.py @@ -58,9 +58,10 @@ class OpenAIStreamingInterface: def get_tool_call_object(self) -> ToolCall: """Useful for agent loop""" + function_name = self.last_flushed_function_name if self.last_flushed_function_name else self.function_name_buffer return ToolCall( id=self.letta_tool_message_id, - function=FunctionCall(arguments=self.current_function_arguments, name=self.last_flushed_function_name), + function=FunctionCall(arguments=self.current_function_arguments, name=function_name), ) async def process(self, stream: AsyncStream[ChatCompletionChunk]) -> AsyncGenerator[LettaMessage, None]: diff --git a/letta/llm_api/openai.py b/letta/llm_api/openai.py index 045ab1f4..fe3b77cc 100644 --- a/letta/llm_api/openai.py +++ b/letta/llm_api/openai.py @@ -226,7 +226,7 @@ def build_openai_chat_completions_request( tool_choice=tool_choice, user=str(user_id), max_completion_tokens=llm_config.max_tokens, - temperature=llm_config.temperature if supports_temperature_param(model) else None, + temperature=llm_config.temperature if supports_temperature_param(model) else 1.0, reasoning_effort=llm_config.reasoning_effort, ) else: @@ -237,7 +237,7 @@ def build_openai_chat_completions_request( function_call=function_call, user=str(user_id), max_completion_tokens=llm_config.max_tokens, - temperature=1.0 if llm_config.enable_reasoner else llm_config.temperature, + temperature=llm_config.temperature if supports_temperature_param(model) else 1.0, reasoning_effort=llm_config.reasoning_effort, ) # https://platform.openai.com/docs/guides/text-generation/json-mode diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index d144d03c..3872d851 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -41,7 +41,7 @@ def is_openai_reasoning_model(model: str) -> bool: """Utility function to check if the model is a 'reasoner'""" # NOTE: needs to be updated with new model releases - is_reasoning = model.startswith("o1") or model.startswith("o3") + is_reasoning = model.startswith("o1") or model.startswith("o3") or model.startswith("o4") return is_reasoning @@ -187,9 +187,9 @@ class OpenAIClient(LLMClientBase): tool_choice=tool_choice, user=str(), max_completion_tokens=llm_config.max_tokens, - temperature=llm_config.temperature if supports_temperature_param(model) else None, + # NOTE: the reasoners that don't support temperature require 1.0, not None + temperature=llm_config.temperature if supports_temperature_param(model) else 1.0, ) - # always set user id for openai requests if self.actor: data.user = self.actor.id