From 82e5d70807917043b98dc8fd3c85cc0198f72a28 Mon Sep 17 00:00:00 2001
From: Kian Jones <11655409+kianjones9@users.noreply.github.com>
Date: Wed, 17 Dec 2025 20:51:52 -0500
Subject: [PATCH] fix: prevent empty reasoning messages in streaming interfaces
 (#7207)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: prevent empty reasoning messages in streaming interfaces

Prevents empty "Thinking..." indicators from appearing in clients by
filtering out reasoning messages with no content at the source.

Changes:
- Gemini: Don't emit ReasoningMessage when only thought_signature exists
- Gemini: Only emit reasoning content if text is non-empty
- Anthropic: Don't emit ReasoningMessage for BetaSignatureDelta
- Anthropic: Only emit reasoning content if thinking text is non-empty

This fixes the issue where providers send signature metadata before
actual thinking content, causing empty reasoning blocks to appear
in the UI after responses complete.

Affects: Gemini reasoning, Anthropic extended thinking

👾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix: handle Anthropic thinking signature correctly

- Only include 'signature' in Anthropic message payload if it is not None (fixes BadRequestError).
- Capture and attach 'signature' to ReasoningMessage in streaming interface.

* fix(anthropic): attach signature to last reasoning message in stream

---------

Co-authored-by: Letta <noreply@letta.com>
---
 ..._parallel_tool_call_streaming_interface.py | 56 +++++++++----------
 .../interfaces/gemini_streaming_interface.py  | 48 +++++++---------
 letta/schemas/message.py                      | 28 +++++-----
 3 files changed, 61 insertions(+), 71 deletions(-)

diff --git a/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py b/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py
index 58d84517..ffe6ac63 100644
--- a/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py
+++ b/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py
@@ -88,6 +88,7 @@ class SimpleAnthropicStreamingInterface:
         self.tool_call_name = None
         self.accumulated_tool_call_args = ""
         self.previous_parse = {}
+        self.thinking_signature = None
 
         # usage trackers
         self.input_tokens = 0
@@ -426,20 +427,23 @@ class SimpleAnthropicStreamingInterface:
                         f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
                     )
 
-                if prev_message_type and prev_message_type != "reasoning_message":
-                    message_index += 1
-                reasoning_message = ReasoningMessage(
-                    id=self.letta_message_id,
-                    source="reasoner_model",
-                    reasoning=delta.thinking,
-                    date=datetime.now(timezone.utc).isoformat(),
-                    otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
-                    run_id=self.run_id,
-                    step_id=self.step_id,
-                )
-                self.reasoning_messages.append(reasoning_message)
-                prev_message_type = reasoning_message.message_type
-                yield reasoning_message
+                # Only emit reasoning message if we have actual content
+                if delta.thinking and delta.thinking.strip():
+                    if prev_message_type and prev_message_type != "reasoning_message":
+                        message_index += 1
+                    reasoning_message = ReasoningMessage(
+                        id=self.letta_message_id,
+                        source="reasoner_model",
+                        reasoning=delta.thinking,
+                        signature=self.thinking_signature,
+                        date=datetime.now(timezone.utc).isoformat(),
+                        otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
+                        run_id=self.run_id,
+                        step_id=self.step_id,
+                    )
+                    self.reasoning_messages.append(reasoning_message)
+                    prev_message_type = reasoning_message.message_type
+                    yield reasoning_message
 
             elif isinstance(delta, BetaSignatureDelta):
                 # Safety check
@@ -448,21 +452,15 @@ class SimpleAnthropicStreamingInterface:
                         f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
                     )
 
-                if prev_message_type and prev_message_type != "reasoning_message":
-                    message_index += 1
-                reasoning_message = ReasoningMessage(
-                    id=self.letta_message_id,
-                    source="reasoner_model",
-                    reasoning="",
-                    date=datetime.now(timezone.utc).isoformat(),
-                    signature=delta.signature,
-                    otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
-                    run_id=self.run_id,
-                    step_id=self.step_id,
-                )
-                self.reasoning_messages.append(reasoning_message)
-                prev_message_type = reasoning_message.message_type
-                yield reasoning_message
+                # Store signature but don't emit empty reasoning message
+                # Signature will be attached when actual thinking content arrives
+                self.thinking_signature = delta.signature
+
+                # Update the last reasoning message with the signature so it gets persisted
+                if self.reasoning_messages:
+                    last_msg = self.reasoning_messages[-1]
+                    if isinstance(last_msg, ReasoningMessage):
+                        last_msg.signature = delta.signature
 
         elif isinstance(event, BetaRawMessageStartEvent):
             self.message_id = event.message.id
diff --git a/letta/interfaces/gemini_streaming_interface.py b/letta/interfaces/gemini_streaming_interface.py
index 15b84c50..91fbb502 100644
--- a/letta/interfaces/gemini_streaming_interface.py
+++ b/letta/interfaces/gemini_streaming_interface.py
@@ -224,40 +224,32 @@ class SimpleGeminiStreamingInterface:
                 # NOTE: the thought_signature comes on the Part with the function_call
                 thought_signature = part.thought_signature
                 self.thinking_signature = base64.b64encode(thought_signature).decode("utf-8")
-                if prev_message_type and prev_message_type != "reasoning_message":
-                    message_index += 1
-                yield ReasoningMessage(
-                    id=self.letta_message_id,
-                    date=datetime.now(timezone.utc).isoformat(),
-                    otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
-                    source="reasoner_model",
-                    reasoning="",
-                    signature=self.thinking_signature,
-                )
-                prev_message_type = "reasoning_message"
+                # Don't emit empty reasoning message - signature will be attached to actual reasoning content
 
             # Thinking summary content part (bool means text is thought part)
             if part.thought:
                 reasoning_summary = part.text
-                if prev_message_type and prev_message_type != "reasoning_message":
-                    message_index += 1
-                yield ReasoningMessage(
-                    id=self.letta_message_id,
-                    date=datetime.now(timezone.utc).isoformat(),
-                    otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
-                    source="reasoner_model",
-                    reasoning=reasoning_summary,
-                    run_id=self.run_id,
-                    step_id=self.step_id,
-                )
-                prev_message_type = "reasoning_message"
-                self.content_parts.append(
-                    ReasoningContent(
-                        is_native=True,
+                # Only emit reasoning message if we have actual content
+                if reasoning_summary and reasoning_summary.strip():
+                    if prev_message_type and prev_message_type != "reasoning_message":
+                        message_index += 1
+                    yield ReasoningMessage(
+                        id=self.letta_message_id,
+                        date=datetime.now(timezone.utc).isoformat(),
+                        otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
+                        source="reasoner_model",
                         reasoning=reasoning_summary,
-                        signature=self.thinking_signature,
+                        run_id=self.run_id,
+                        step_id=self.step_id,
+                    )
+                    prev_message_type = "reasoning_message"
+                    self.content_parts.append(
+                        ReasoningContent(
+                            is_native=True,
+                            reasoning=reasoning_summary,
+                            signature=self.thinking_signature,
+                        )
                     )
-                )
 
             # Plain text content part
             elif part.text:
diff --git a/letta/schemas/message.py b/letta/schemas/message.py
index 904b64a0..d09360dc 100644
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -1639,13 +1639,13 @@ class Message(BaseMessage):
                         # TextContent, ImageContent, ToolCallContent, ToolReturnContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent
                         if isinstance(content_part, ReasoningContent):
                             if current_model == self.model:
-                                content.append(
-                                    {
-                                        "type": "thinking",
-                                        "thinking": content_part.reasoning,
-                                        "signature": content_part.signature,
-                                    }
-                                )
+                                block = {
+                                    "type": "thinking",
+                                    "thinking": content_part.reasoning,
+                                }
+                                if content_part.signature:
+                                    block["signature"] = content_part.signature
+                                content.append(block)
                         elif isinstance(content_part, RedactedReasoningContent):
                             if current_model == self.model:
                                 content.append(
@@ -1671,13 +1671,13 @@ class Message(BaseMessage):
                     for content_part in self.content:
                         if isinstance(content_part, ReasoningContent):
                             if current_model == self.model:
-                                content.append(
-                                    {
-                                        "type": "thinking",
-                                        "thinking": content_part.reasoning,
-                                        "signature": content_part.signature,
-                                    }
-                                )
+                                block = {
+                                    "type": "thinking",
+                                    "thinking": content_part.reasoning,
+                                }
+                                if content_part.signature:
+                                    block["signature"] = content_part.signature
+                                content.append(block)
                         if isinstance(content_part, RedactedReasoningContent):
                             if current_model == self.model:
                                 content.append(