From aa7093c585750bec81bc7ebc84d3172e31d3161b Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Mon, 3 Nov 2025 18:19:41 -0800
Subject: [PATCH] fix: patch hole in the fallback summarizer where we weren't
 actually truncating (#5919)

* fix: patch hole in the fallback summarizer where we weren't actually truncating

* fix: remove no-op

* chore: comment

* fix: simplify the new fallback

* fix: properly handle images in summarizer payload
---
 letta/schemas/message.py                |   9 +-
 letta/services/summarizer/summarizer.py | 125 +++++++++++++++++++++---
 2 files changed, 121 insertions(+), 13 deletions(-)

diff --git a/letta/schemas/message.py b/letta/schemas/message.py
index 57c18d66..a3461d6f 100644
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -1117,9 +1117,14 @@ class Message(BaseMessage):
             text_content = "[Image Here]"
         # Otherwise, check if we have TextContent and multiple other parts
         elif self.content and len(self.content) > 1:
-            text = [content for content in self.content if isinstance(content, TextContent)]
+            text_parts = [content for content in self.content if isinstance(content, TextContent)]
             # assert len(text) == 1, f"multiple text content parts found in a single message: {self.content}"
-            text_content = "\n\n".join([t.text for t in text])
+            text_content = "\n\n".join([t.text for t in text_parts])
+            # Summarizer transcripts use this OpenAI-style dict; include a compact image placeholder
+            image_count = len([c for c in self.content if isinstance(c, ImageContent)])
+            if image_count > 0:
+                placeholder = "[Image omitted]" if image_count == 1 else f"[{image_count} images omitted]"
+                text_content = (text_content + (" " if text_content else "")) + placeholder
             parse_content_parts = True
         else:
             text_content = None
diff --git a/letta/services/summarizer/summarizer.py b/letta/services/summarizer/summarizer.py
index 25a18dd4..cd8e7235 100644
--- a/letta/services/summarizer/summarizer.py
+++ b/letta/services/summarizer/summarizer.py
@@ -300,15 +300,60 @@ class Summarizer:
         return [all_in_context_messages[0]] + updated_in_context_messages, True
 
 
-def simple_formatter(messages: List[Message], include_system: bool = False) -> str:
-    """Go from an OpenAI-style list of messages to a concatenated string"""
+def simple_formatter(
+    messages: List[Message],
+    include_system: bool = False,
+    tool_return_truncation_chars: int | None = None,
+) -> str:
+    """Go from an OpenAI-style list of messages to a concatenated string.
+
+    Optionally clamps tool-return content to avoid ballooning the summarizer transcript.
+    """
 
     parsed_messages = Message.to_openai_dicts_from_list(
-        [message for message in messages if message.role != MessageRole.system or include_system]
+        [message for message in messages if message.role != MessageRole.system or include_system],
+        tool_return_truncation_chars=tool_return_truncation_chars,
     )
     return "\n".join(json.dumps(msg) for msg in parsed_messages)
 
 
+def middle_truncate_text(
+    text: str,
+    budget_chars: int,
+    head_frac: float = 0.3,
+    tail_frac: float = 0.3,
+) -> tuple[str, int]:
+    """Middle-truncate a string to fit within a character budget.
+
+    Keeps the first `head_frac` and last `tail_frac` portions (by budget chars)
+    and drops the middle. Returns (truncated_text, dropped_char_count).
+
+    Fractions are relative to budget, not original text length.
+    """
+    if budget_chars <= 0 or len(text) <= budget_chars:
+        return text, 0
+
+    head_len = max(0, int(budget_chars * head_frac))
+    tail_len = max(0, int(budget_chars * tail_frac))
+    # Ensure head + tail <= budget; allocate remainder to tail preferentially
+    if head_len + tail_len > budget_chars:
+        tail_len = max(0, budget_chars - head_len)
+
+    head = text[:head_len]
+    tail = text[-tail_len:] if tail_len > 0 else ""
+    dropped = max(0, len(text) - (len(head) + len(tail)))
+
+    marker = f"\n[TRUNCATED: dropped {dropped} middle chars due to context budget]\n"
+    # If marker would overflow budget, shrink tail to fit marker
+    available_for_marker = budget_chars - (len(head) + len(tail))
+    if available_for_marker < len(marker):
+        # reduce tail to free up space
+        over = len(marker) - available_for_marker
+        tail = tail[:-over] if over < len(tail) else ""
+
+    return head + marker + tail, dropped
+
+
 def build_summary_request_text(retain_count: int, evicted_messages: List[str], in_context_messages: List[str]) -> str:
     parts: List[str] = []
     if retain_count == 0:
@@ -376,6 +421,8 @@ async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor:
 
     # Prepare the messages payload to send to the LLM
     system_prompt = gpt_summarize.SYSTEM
+    # Build the initial transcript without clamping to preserve fidelity
+    # TODO proactively clip here?
     summary_transcript = simple_formatter(messages)
 
     if include_ack:
@@ -403,26 +450,77 @@ async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor:
         try:
             raise llm_client.handle_llm_error(e)
         except ContextWindowExceededError as context_error:
-            logger.warning(
-                f"Context window exceeded during summarization, falling back to truncated tool returns. Original error: {context_error}"
+            logger.warning(f"Context window exceeded during summarization. Applying clamping fallbacks. Original error: {context_error}")
+
+            # Fallback A: rebuild transcript with clamped tool returns to shrink payload
+            summary_transcript = simple_formatter(
+                messages,
+                tool_return_truncation_chars=TOOL_RETURN_TRUNCATION_CHARS,
             )
             logger.debug(f"Full summarization payload: {request_data}")
 
-            # Fallback: rebuild request with truncated tool returns
+            if include_ack:
+                input_messages = [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK},
+                    {"role": "user", "content": summary_transcript},
+                ]
+            else:
+                input_messages = [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": summary_transcript},
+                ]
+            input_messages_obj = [simple_message_wrapper(msg) for msg in input_messages]
+
             request_data = llm_client.build_request_data(
                 AgentType.letta_v1_agent,
                 input_messages_obj,
                 summarizer_llm_config,
                 tools=[],
-                tool_return_truncation_chars=TOOL_RETURN_TRUNCATION_CHARS,
             )
 
             try:
                 response_data = await llm_client.request_async(request_data, summarizer_llm_config)
-            except Exception as fallback_error:
-                logger.error(f"Fallback summarization also failed: {fallback_error}")
-                logger.debug(f"Full fallback summarization payload: {request_data}")
-                raise llm_client.handle_llm_error(fallback_error)
+            except Exception as fallback_error_a:
+                # Fallback B: hard-truncate the user transcript to fit a conservative char budget
+                logger.warning(f"Clamped tool returns still overflowed ({fallback_error_a}). Falling back to transcript truncation.")
+
+                # Compute a conservative char budget for the transcript based on context window
+                try:
+                    budget_chars = int(summarizer_llm_config.context_window * 0.6 * 4)
+                except Exception:
+                    budget_chars = 48000
+
+                overhead = len(system_prompt) + (len(MESSAGE_SUMMARY_REQUEST_ACK) if include_ack else 0) + 1024
+                budget_chars = max(2000, budget_chars - overhead)
+
+                truncated_transcript, _ = middle_truncate_text(summary_transcript, budget_chars=budget_chars, head_frac=0.3, tail_frac=0.3)
+
+                if include_ack:
+                    input_messages = [
+                        {"role": "system", "content": system_prompt},
+                        {"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK},
+                        {"role": "user", "content": truncated_transcript},
+                    ]
+                else:
+                    input_messages = [
+                        {"role": "system", "content": system_prompt},
+                        {"role": "user", "content": truncated_transcript},
+                    ]
+                input_messages_obj = [simple_message_wrapper(msg) for msg in input_messages]
+
+                request_data = llm_client.build_request_data(
+                    AgentType.letta_v1_agent,
+                    input_messages_obj,
+                    summarizer_llm_config,
+                    tools=[],
+                )
+                try:
+                    response_data = await llm_client.request_async(request_data, summarizer_llm_config)
+                except Exception as fallback_error_b:
+                    logger.error(f"Transcript truncation fallback also failed: {fallback_error_b}. Propagating error.")
+                    logger.debug(f"Full fallback summarization payload: {request_data}")
+                    raise llm_client.handle_llm_error(fallback_error_b)
 
     response = llm_client.convert_response_to_chat_completion(response_data, input_messages_obj, summarizer_llm_config)
     if response.choices[0].message.content is None:
@@ -465,6 +563,11 @@ def format_transcript(messages: List[Message], include_system: bool = False) ->
                 continue
 
             text = "".join(c.text for c in msg.content if isinstance(c, TextContent)).strip()
+            # Append a compact placeholder for any images
+            image_count = len([c for c in msg.content if isinstance(c, ImageContent)])
+            if image_count > 0:
+                placeholder = "[Image omitted]" if image_count == 1 else f"[{image_count} images omitted]"
+                text = (text + (" " if text else "")) + placeholder
 
         # 2) Otherwise, try extracting from function calls
         elif msg.tool_calls: