From f20fdc73d1779d203de54f1e7507ef4602c708d5 Mon Sep 17 00:00:00 2001 From: Kian Jones <11655409+kianjones9@users.noreply.github.com> Date: Fri, 6 Feb 2026 17:23:06 -0800 Subject: [PATCH] fix(core): preserve Gemini thought_signature on function calls in non-streaming path (#9351) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(core): preserve Gemini thought_signature on function calls in non-streaming path The Google Gemini API requires thought_signature to be echoed back on function call parts in multi-turn conversations. In the non-streaming request path, the signature was only captured for subsequent function calls (else branch) but dropped for the first/only function call (if branch) in convert_response_to_chat_completion. This caused 400 INVALID_ARGUMENT errors on the next turn. Additionally, when no ReasoningContent existed to carry the signature (e.g. Gemini 2.5 Flash with include_thoughts=False), the signature was lost in the adapter layer. Now it falls through to TextContent. Datadog: https://us5.datadoghq.com/error-tracking/issue/17c4b114-d596-11f0-bcd6-da7ad0900000 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * fix(core): preserve Gemini thought_signature in non-temporal agent path Carry reasoning_content_signature on TextContent in letta_agent.py at both locations where content falls through from reasoning (same fix already applied to the adapter and temporal activity paths). Co-authored-by: Kian Jones 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta --------- Co-authored-by: Letta Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> --- letta/adapters/letta_llm_request_adapter.py | 8 +++++++- letta/adapters/simple_llm_request_adapter.py | 7 ++++++- letta/agents/letta_agent.py | 12 ++++++++++-- letta/llm_api/google_vertex_client.py | 3 +++ 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/letta/adapters/letta_llm_request_adapter.py b/letta/adapters/letta_llm_request_adapter.py index 17c3a77f..8ea95680 100644 --- a/letta/adapters/letta_llm_request_adapter.py +++ b/letta/adapters/letta_llm_request_adapter.py @@ -66,7 +66,13 @@ class LettaLLMRequestAdapter(LettaLLMAdapter): self.reasoning_content = [OmittedReasoningContent()] elif self.chat_completions_response.choices[0].message.content: # Reasoning placed into content for legacy reasons - self.reasoning_content = [TextContent(text=self.chat_completions_response.choices[0].message.content)] + # Carry thought_signature on TextContent when ReasoningContent doesn't exist to hold it + self.reasoning_content = [ + TextContent( + text=self.chat_completions_response.choices[0].message.content, + signature=self.chat_completions_response.choices[0].message.reasoning_content_signature, + ) + ] else: # logger.info("No reasoning content found.") self.reasoning_content = None diff --git a/letta/adapters/simple_llm_request_adapter.py b/letta/adapters/simple_llm_request_adapter.py index 7cf5b260..7cec9472 100644 --- a/letta/adapters/simple_llm_request_adapter.py +++ b/letta/adapters/simple_llm_request_adapter.py @@ -81,7 +81,12 @@ class SimpleLLMRequestAdapter(LettaLLMRequestAdapter): if self.chat_completions_response.choices[0].message.content: # NOTE: big difference - 'content' goes into 'content' # Reasoning placed into content for legacy reasons - self.content = [TextContent(text=self.chat_completions_response.choices[0].message.content)] + # Carry thought_signature on TextContent when ReasoningContent doesn't exist to hold it + # (e.g. Gemini 2.5 Flash with include_thoughts=False still returns thought_signature) + orphan_sig = ( + self.chat_completions_response.choices[0].message.reasoning_content_signature if not self.reasoning_content else None + ) + self.content = [TextContent(text=self.chat_completions_response.choices[0].message.content, signature=orphan_sig)] else: self.content = None diff --git a/letta/agents/letta_agent.py b/letta/agents/letta_agent.py index 246f6d3f..98e56205 100644 --- a/letta/agents/letta_agent.py +++ b/letta/agents/letta_agent.py @@ -370,8 +370,12 @@ class LettaAgent(BaseAgent): elif response.choices[0].message.omitted_reasoning_content: reasoning = [OmittedReasoningContent()] elif response.choices[0].message.content: + # Carry thought_signature on TextContent when ReasoningContent doesn't exist to hold it reasoning = [ - TextContent(text=response.choices[0].message.content) + TextContent( + text=response.choices[0].message.content, + signature=response.choices[0].message.reasoning_content_signature, + ) ] # reasoning placed into content for legacy reasons else: self.logger.info("No reasoning content found.") @@ -703,8 +707,12 @@ class LettaAgent(BaseAgent): ) ] elif response.choices[0].message.content: + # Carry thought_signature on TextContent when ReasoningContent doesn't exist to hold it reasoning = [ - TextContent(text=response.choices[0].message.content) + TextContent( + text=response.choices[0].message.content, + signature=response.choices[0].message.reasoning_content_signature, + ) ] # reasoning placed into content for legacy reasons elif response.choices[0].message.omitted_reasoning_content: reasoning = [OmittedReasoningContent()] diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index 56ed0ae1..ccbee0ca 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -593,6 +593,9 @@ class GoogleVertexClient(LLMClientBase): content=inner_thoughts, tool_calls=[tool_call], ) + if response_message.thought_signature: + thought_signature = base64.b64encode(response_message.thought_signature).decode("utf-8") + openai_response_message.reasoning_content_signature = thought_signature else: openai_response_message.content = inner_thoughts if openai_response_message.tool_calls is None: