From 08da1a64bb554f2578a6ab77147f3fab04c85b7f Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Mon, 13 Oct 2025 15:14:40 -0700 Subject: [PATCH] feat: parse `reasoning_content` from OAI proxies (eg. vLLM / OpenRouter) (#5372) * reasonig_content support * fix * comment * fix * rm comment --------- Co-authored-by: Charles Packer --- .../interfaces/openai_streaming_interface.py | 42 +++++++++++++++++-- letta/llm_api/openai_client.py | 18 ++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py index 5d6ad42d..0cd7a244 100644 --- a/letta/interfaces/openai_streaming_interface.py +++ b/letta/interfaces/openai_streaming_interface.py @@ -43,6 +43,7 @@ from letta.schemas.letta_message import ( ) from letta.schemas.letta_message_content import ( OmittedReasoningContent, + ReasoningContent, SummarizedReasoningContent, SummarizedReasoningContentPart, TextContent, @@ -532,20 +533,31 @@ class SimpleOpenAIStreamingInterface: self.requires_approval_tools = requires_approval_tools - def get_content(self) -> list[TextContent | OmittedReasoningContent]: + def get_content(self) -> list[TextContent | OmittedReasoningContent | ReasoningContent]: shown_omitted = False concat_content = "" merged_messages = [] + reasoning_content = [] + for msg in self.content_messages: if isinstance(msg, HiddenReasoningMessage) and not shown_omitted: merged_messages.append(OmittedReasoningContent()) shown_omitted = True + elif isinstance(msg, ReasoningMessage): + reasoning_content.append(msg.reasoning) elif isinstance(msg, AssistantMessage): if isinstance(msg.content, list): concat_content += "".join([c.text for c in msg.content]) else: concat_content += msg.content - merged_messages.append(TextContent(text=concat_content)) + + if reasoning_content: + combined_reasoning = "".join(reasoning_content) + merged_messages.append(ReasoningContent(is_native=True, reasoning=combined_reasoning, signature=None)) + + if concat_content: + merged_messages.append(TextContent(text=concat_content)) + return merged_messages def get_tool_call_object(self) -> ToolCall: @@ -674,9 +686,33 @@ class SimpleOpenAIStreamingInterface: ) self.content_messages.append(assistant_msg) prev_message_type = assistant_msg.message_type - message_index += 1 # Increment for the next message + message_index += 1 yield assistant_msg + if ( + hasattr(chunk, "choices") + and len(chunk.choices) > 0 + and hasattr(chunk.choices[0], "delta") + and hasattr(chunk.choices[0].delta, "reasoning_content") + ): + delta = chunk.choices[0].delta + reasoning_content = getattr(delta, "reasoning_content", None) + if reasoning_content is not None and reasoning_content != "": + reasoning_msg = ReasoningMessage( + id=self.letta_message_id, + date=datetime.now(timezone.utc).isoformat(), + otid=Message.generate_otid_from_id(self.letta_message_id, message_index), + source="reasoner_model", + reasoning=reasoning_content, + signature=None, + run_id=self.run_id, + step_id=self.step_id, + ) + self.content_messages.append(reasoning_msg) + prev_message_type = reasoning_msg.message_type + message_index += 1 + yield reasoning_msg + if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0: tool_call = message_delta.tool_calls[0] diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index b09c96e9..0e8b8e39 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -649,6 +649,24 @@ class OpenAIClient(LLMClientBase): # We just need to instantiate the Pydantic model for validation and type safety. chat_completion_response = ChatCompletionResponse(**response_data) chat_completion_response = self._fix_truncated_json_response(chat_completion_response) + + # Parse reasoning_content from vLLM/OpenRouter/OpenAI proxies that return this field + # This handles cases where the proxy returns .reasoning_content in the response + if ( + chat_completion_response.choices + and len(chat_completion_response.choices) > 0 + and chat_completion_response.choices[0].message + and not chat_completion_response.choices[0].message.reasoning_content + ): + if "choices" in response_data and len(response_data["choices"]) > 0: + choice_data = response_data["choices"][0] + if "message" in choice_data and "reasoning_content" in choice_data["message"]: + reasoning_content = choice_data["message"]["reasoning_content"] + if reasoning_content: + chat_completion_response.choices[0].message.reasoning_content = reasoning_content + + chat_completion_response.choices[0].message.reasoning_content_signature = None + # Unpack inner thoughts if they were embedded in function arguments if llm_config.put_inner_thoughts_in_kwargs: chat_completion_response = unpack_all_inner_thoughts_from_kwargs(