fix: patch bug w/ extended thinking mode involving text leaking into reasoning (#4341)

* fix: patch for bad native reasoning behavior w/ sonnet * fix: cleanup * fix: cleanup * fix: another prompt tune for less flaking
2025-09-01 20:26:24 -07:00
parent f3df0433ae
commit e741f84add
2 changed files with 8 additions and 0 deletions
--- a/letta/interfaces/anthropic_streaming_interface.py
+++ b/letta/interfaces/anthropic_streaming_interface.py
@@ -289,6 +289,13 @@ class AnthropicStreamingInterface:
                if not self.anthropic_mode == EventMode.TEXT:
                    raise RuntimeError(f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}")

+                # Weird bug happens with native thinking where a single response can contain:
+                # [reasoning, text, tool_call]
+                # In these cases, we should pipe text out to null / ignore it
+                # TODO this will have to be redone to support non-tool calling message sending
+                if not self.put_inner_thoughts_in_kwarg:
+                    return
+
                # Combine buffer with current text to handle tags split across chunks
                combined_text = self.partial_tag_buffer + delta.text

--- a/tests/integration_test_send_message.py
+++ b/tests/integration_test_send_message.py
@@ -136,6 +136,7 @@ USER_MESSAGE_ROLL_DICE_LONG_THINKING: List[MessageCreate] = [
            "Explain the concept of randomness and how true random number generation works. "
            "End with some interesting facts about polyhedral dice and their history in gaming. "
            "Remember, make your response detailed and at least 800 characters long."
+            "Absolutely do NOT violate this order of operations: (1) Think / reason, (2) Roll die, (3) Think / reason, (4) Call send_message tool."
        ),
        otid=USER_MESSAGE_OTID,
    )