From 82e5d70807917043b98dc8fd3c85cc0198f72a28 Mon Sep 17 00:00:00 2001 From: Kian Jones <11655409+kianjones9@users.noreply.github.com> Date: Wed, 17 Dec 2025 20:51:52 -0500 Subject: [PATCH] fix: prevent empty reasoning messages in streaming interfaces (#7207) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: prevent empty reasoning messages in streaming interfaces Prevents empty "Thinking..." indicators from appearing in clients by filtering out reasoning messages with no content at the source. Changes: - Gemini: Don't emit ReasoningMessage when only thought_signature exists - Gemini: Only emit reasoning content if text is non-empty - Anthropic: Don't emit ReasoningMessage for BetaSignatureDelta - Anthropic: Only emit reasoning content if thinking text is non-empty This fixes the issue where providers send signature metadata before actual thinking content, causing empty reasoning blocks to appear in the UI after responses complete. Affects: Gemini reasoning, Anthropic extended thinking 👾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix: handle Anthropic thinking signature correctly - Only include 'signature' in Anthropic message payload if it is not None (fixes BadRequestError). - Capture and attach 'signature' to ReasoningMessage in streaming interface. * fix(anthropic): attach signature to last reasoning message in stream --------- Co-authored-by: Letta --- ..._parallel_tool_call_streaming_interface.py | 56 +++++++++---------- .../interfaces/gemini_streaming_interface.py | 48 +++++++--------- letta/schemas/message.py | 28 +++++----- 3 files changed, 61 insertions(+), 71 deletions(-) diff --git a/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py b/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py index 58d84517..ffe6ac63 100644 --- a/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py +++ b/letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py @@ -88,6 +88,7 @@ class SimpleAnthropicStreamingInterface: self.tool_call_name = None self.accumulated_tool_call_args = "" self.previous_parse = {} + self.thinking_signature = None # usage trackers self.input_tokens = 0 @@ -426,20 +427,23 @@ class SimpleAnthropicStreamingInterface: f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}" ) - if prev_message_type and prev_message_type != "reasoning_message": - message_index += 1 - reasoning_message = ReasoningMessage( - id=self.letta_message_id, - source="reasoner_model", - reasoning=delta.thinking, - date=datetime.now(timezone.utc).isoformat(), - otid=Message.generate_otid_from_id(self.letta_message_id, message_index), - run_id=self.run_id, - step_id=self.step_id, - ) - self.reasoning_messages.append(reasoning_message) - prev_message_type = reasoning_message.message_type - yield reasoning_message + # Only emit reasoning message if we have actual content + if delta.thinking and delta.thinking.strip(): + if prev_message_type and prev_message_type != "reasoning_message": + message_index += 1 + reasoning_message = ReasoningMessage( + id=self.letta_message_id, + source="reasoner_model", + reasoning=delta.thinking, + signature=self.thinking_signature, + date=datetime.now(timezone.utc).isoformat(), + otid=Message.generate_otid_from_id(self.letta_message_id, message_index), + run_id=self.run_id, + step_id=self.step_id, + ) + self.reasoning_messages.append(reasoning_message) + prev_message_type = reasoning_message.message_type + yield reasoning_message elif isinstance(delta, BetaSignatureDelta): # Safety check @@ -448,21 +452,15 @@ class SimpleAnthropicStreamingInterface: f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}" ) - if prev_message_type and prev_message_type != "reasoning_message": - message_index += 1 - reasoning_message = ReasoningMessage( - id=self.letta_message_id, - source="reasoner_model", - reasoning="", - date=datetime.now(timezone.utc).isoformat(), - signature=delta.signature, - otid=Message.generate_otid_from_id(self.letta_message_id, message_index), - run_id=self.run_id, - step_id=self.step_id, - ) - self.reasoning_messages.append(reasoning_message) - prev_message_type = reasoning_message.message_type - yield reasoning_message + # Store signature but don't emit empty reasoning message + # Signature will be attached when actual thinking content arrives + self.thinking_signature = delta.signature + + # Update the last reasoning message with the signature so it gets persisted + if self.reasoning_messages: + last_msg = self.reasoning_messages[-1] + if isinstance(last_msg, ReasoningMessage): + last_msg.signature = delta.signature elif isinstance(event, BetaRawMessageStartEvent): self.message_id = event.message.id diff --git a/letta/interfaces/gemini_streaming_interface.py b/letta/interfaces/gemini_streaming_interface.py index 15b84c50..91fbb502 100644 --- a/letta/interfaces/gemini_streaming_interface.py +++ b/letta/interfaces/gemini_streaming_interface.py @@ -224,40 +224,32 @@ class SimpleGeminiStreamingInterface: # NOTE: the thought_signature comes on the Part with the function_call thought_signature = part.thought_signature self.thinking_signature = base64.b64encode(thought_signature).decode("utf-8") - if prev_message_type and prev_message_type != "reasoning_message": - message_index += 1 - yield ReasoningMessage( - id=self.letta_message_id, - date=datetime.now(timezone.utc).isoformat(), - otid=Message.generate_otid_from_id(self.letta_message_id, message_index), - source="reasoner_model", - reasoning="", - signature=self.thinking_signature, - ) - prev_message_type = "reasoning_message" + # Don't emit empty reasoning message - signature will be attached to actual reasoning content # Thinking summary content part (bool means text is thought part) if part.thought: reasoning_summary = part.text - if prev_message_type and prev_message_type != "reasoning_message": - message_index += 1 - yield ReasoningMessage( - id=self.letta_message_id, - date=datetime.now(timezone.utc).isoformat(), - otid=Message.generate_otid_from_id(self.letta_message_id, message_index), - source="reasoner_model", - reasoning=reasoning_summary, - run_id=self.run_id, - step_id=self.step_id, - ) - prev_message_type = "reasoning_message" - self.content_parts.append( - ReasoningContent( - is_native=True, + # Only emit reasoning message if we have actual content + if reasoning_summary and reasoning_summary.strip(): + if prev_message_type and prev_message_type != "reasoning_message": + message_index += 1 + yield ReasoningMessage( + id=self.letta_message_id, + date=datetime.now(timezone.utc).isoformat(), + otid=Message.generate_otid_from_id(self.letta_message_id, message_index), + source="reasoner_model", reasoning=reasoning_summary, - signature=self.thinking_signature, + run_id=self.run_id, + step_id=self.step_id, + ) + prev_message_type = "reasoning_message" + self.content_parts.append( + ReasoningContent( + is_native=True, + reasoning=reasoning_summary, + signature=self.thinking_signature, + ) ) - ) # Plain text content part elif part.text: diff --git a/letta/schemas/message.py b/letta/schemas/message.py index 904b64a0..d09360dc 100644 --- a/letta/schemas/message.py +++ b/letta/schemas/message.py @@ -1639,13 +1639,13 @@ class Message(BaseMessage): # TextContent, ImageContent, ToolCallContent, ToolReturnContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent if isinstance(content_part, ReasoningContent): if current_model == self.model: - content.append( - { - "type": "thinking", - "thinking": content_part.reasoning, - "signature": content_part.signature, - } - ) + block = { + "type": "thinking", + "thinking": content_part.reasoning, + } + if content_part.signature: + block["signature"] = content_part.signature + content.append(block) elif isinstance(content_part, RedactedReasoningContent): if current_model == self.model: content.append( @@ -1671,13 +1671,13 @@ class Message(BaseMessage): for content_part in self.content: if isinstance(content_part, ReasoningContent): if current_model == self.model: - content.append( - { - "type": "thinking", - "thinking": content_part.reasoning, - "signature": content_part.signature, - } - ) + block = { + "type": "thinking", + "thinking": content_part.reasoning, + } + if content_part.signature: + block["signature"] = content_part.signature + content.append(block) if isinstance(content_part, RedactedReasoningContent): if current_model == self.model: content.append(