From 08da1a64bb554f2578a6ab77147f3fab04c85b7f Mon Sep 17 00:00:00 2001
From: Kevin Lin <klin5061@gmail.com>
Date: Mon, 13 Oct 2025 15:14:40 -0700
Subject: [PATCH] feat: parse `reasoning_content` from OAI proxies (eg. vLLM /
 OpenRouter) (#5372)

* reasonig_content support

* fix

* comment

* fix

* rm comment

---------

Co-authored-by: Charles Packer <packercharles@gmail.com>
---
 .../interfaces/openai_streaming_interface.py  | 42 +++++++++++++++++--
 letta/llm_api/openai_client.py                | 18 ++++++++
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py
index 5d6ad42d..0cd7a244 100644
--- a/letta/interfaces/openai_streaming_interface.py
+++ b/letta/interfaces/openai_streaming_interface.py
@@ -43,6 +43,7 @@ from letta.schemas.letta_message import (
 )
 from letta.schemas.letta_message_content import (
     OmittedReasoningContent,
+    ReasoningContent,
     SummarizedReasoningContent,
     SummarizedReasoningContentPart,
     TextContent,
@@ -532,20 +533,31 @@ class SimpleOpenAIStreamingInterface:
 
         self.requires_approval_tools = requires_approval_tools
 
-    def get_content(self) -> list[TextContent | OmittedReasoningContent]:
+    def get_content(self) -> list[TextContent | OmittedReasoningContent | ReasoningContent]:
         shown_omitted = False
         concat_content = ""
         merged_messages = []
+        reasoning_content = []
+
         for msg in self.content_messages:
             if isinstance(msg, HiddenReasoningMessage) and not shown_omitted:
                 merged_messages.append(OmittedReasoningContent())
                 shown_omitted = True
+            elif isinstance(msg, ReasoningMessage):
+                reasoning_content.append(msg.reasoning)
             elif isinstance(msg, AssistantMessage):
                 if isinstance(msg.content, list):
                     concat_content += "".join([c.text for c in msg.content])
                 else:
                     concat_content += msg.content
-        merged_messages.append(TextContent(text=concat_content))
+
+        if reasoning_content:
+            combined_reasoning = "".join(reasoning_content)
+            merged_messages.append(ReasoningContent(is_native=True, reasoning=combined_reasoning, signature=None))
+
+        if concat_content:
+            merged_messages.append(TextContent(text=concat_content))
+
         return merged_messages
 
     def get_tool_call_object(self) -> ToolCall:
@@ -674,9 +686,33 @@ class SimpleOpenAIStreamingInterface:
                 )
                 self.content_messages.append(assistant_msg)
                 prev_message_type = assistant_msg.message_type
-                message_index += 1  # Increment for the next message
+                message_index += 1
                 yield assistant_msg
 
+            if (
+                hasattr(chunk, "choices")
+                and len(chunk.choices) > 0
+                and hasattr(chunk.choices[0], "delta")
+                and hasattr(chunk.choices[0].delta, "reasoning_content")
+            ):
+                delta = chunk.choices[0].delta
+                reasoning_content = getattr(delta, "reasoning_content", None)
+                if reasoning_content is not None and reasoning_content != "":
+                    reasoning_msg = ReasoningMessage(
+                        id=self.letta_message_id,
+                        date=datetime.now(timezone.utc).isoformat(),
+                        otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
+                        source="reasoner_model",
+                        reasoning=reasoning_content,
+                        signature=None,
+                        run_id=self.run_id,
+                        step_id=self.step_id,
+                    )
+                    self.content_messages.append(reasoning_msg)
+                    prev_message_type = reasoning_msg.message_type
+                    message_index += 1
+                    yield reasoning_msg
+
             if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
                 tool_call = message_delta.tool_calls[0]
 
diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py
index b09c96e9..0e8b8e39 100644
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -649,6 +649,24 @@ class OpenAIClient(LLMClientBase):
         # We just need to instantiate the Pydantic model for validation and type safety.
         chat_completion_response = ChatCompletionResponse(**response_data)
         chat_completion_response = self._fix_truncated_json_response(chat_completion_response)
+
+        # Parse reasoning_content from vLLM/OpenRouter/OpenAI proxies that return this field
+        # This handles cases where the proxy returns .reasoning_content in the response
+        if (
+            chat_completion_response.choices
+            and len(chat_completion_response.choices) > 0
+            and chat_completion_response.choices[0].message
+            and not chat_completion_response.choices[0].message.reasoning_content
+        ):
+            if "choices" in response_data and len(response_data["choices"]) > 0:
+                choice_data = response_data["choices"][0]
+                if "message" in choice_data and "reasoning_content" in choice_data["message"]:
+                    reasoning_content = choice_data["message"]["reasoning_content"]
+                    if reasoning_content:
+                        chat_completion_response.choices[0].message.reasoning_content = reasoning_content
+
+                        chat_completion_response.choices[0].message.reasoning_content_signature = None
+
         # Unpack inner thoughts if they were embedded in function arguments
         if llm_config.put_inner_thoughts_in_kwargs:
             chat_completion_response = unpack_all_inner_thoughts_from_kwargs(