From a7639a53eb964b841334c456ff8982ebf5e28201 Mon Sep 17 00:00:00 2001
From: Sarah Wooders <sarahwooders@gmail.com>
Date: Wed, 17 Dec 2025 21:55:07 -0800
Subject: [PATCH] fix: fix summary message return for compaction  (#7402)

---
 fern/openapi.json                          | 10 +++-------
 letta/agents/letta_agent_v3.py             |  8 ++++----
 letta/server/rest_api/routers/v1/agents.py | 12 +++++++++---
 tests/integration_test_summarizer.py       | 20 ++++++++++----------
 4 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/fern/openapi.json b/fern/openapi.json
index 34b62e94..ad21b85d 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -24669,9 +24669,9 @@
       },
       "CompactionResponse": {
         "properties": {
-          "summary_message": {
+          "summary": {
             "type": "string",
-            "title": "Summary Message"
+            "title": "Summary"
           },
           "num_messages_before": {
             "type": "integer",
@@ -24683,11 +24683,7 @@
           }
         },
         "type": "object",
-        "required": [
-          "summary_message",
-          "num_messages_before",
-          "num_messages_after"
-        ],
+        "required": ["summary", "num_messages_before", "num_messages_after"],
         "title": "CompactionResponse"
       },
       "CompactionSettings-Input": {
diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py
index 91fc4d25..397fd88f 100644
--- a/letta/agents/letta_agent_v3.py
+++ b/letta/agents/letta_agent_v3.py
@@ -684,7 +684,7 @@ class LettaAgentV3(LettaAgentV2):
                             # checkpoint summarized messages
                             # TODO: might want to delay this checkpoint in case of corrupated state
                             try:
-                                summary_message, messages = await self.compact(
+                                summary_message, messages, _ = await self.compact(
                                     messages, trigger_threshold=self.agent_state.llm_config.context_window
                                 )
                                 self.logger.info("Summarization succeeded, continuing to retry LLM request")
@@ -795,7 +795,7 @@ class LettaAgentV3(LettaAgentV2):
                 self.logger.info(
                     f"Context window exceeded (current: {self.context_token_estimate}, threshold: {self.agent_state.llm_config.context_window}), trying to compact messages"
                 )
-                summary_message, messages = await self.compact(messages, trigger_threshold=self.agent_state.llm_config.context_window)
+                summary_message, messages, _ = await self.compact(messages, trigger_threshold=self.agent_state.llm_config.context_window)
                 # TODO: persist + return the summary message
                 # TODO: convert this to a SummaryMessage
                 self.response_messages.append(summary_message)
@@ -1334,7 +1334,7 @@ class LettaAgentV3(LettaAgentV2):
     @trace_method
     async def compact(
         self, messages, trigger_threshold: Optional[int] = None, compaction_settings: Optional["CompactionSettings"] = None
-    ) -> Message:
+    ) -> tuple[Message, list[Message], str]:
         """Compact the current in-context messages for this agent.
 
         Compaction uses a summarizer LLM configuration derived from
@@ -1470,7 +1470,7 @@ class LettaAgentV3(LettaAgentV2):
         if len(compacted_messages) > 1:
             final_messages += compacted_messages[1:]
 
-        return summary_message_obj, final_messages
+        return summary_message_obj, final_messages, summary
 
     @staticmethod
     def _build_summarizer_llm_config(
diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py
index e621e12b..47de5d42 100644
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -2100,7 +2100,7 @@ class CompactionRequest(BaseModel):
 
 
 class CompactionResponse(BaseModel):
-    summary_message: str
+    summary: str
     num_messages_before: int
     num_messages_after: int
 
@@ -2138,7 +2138,7 @@ async def summarize_messages(
         in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
         compaction_settings = request.compaction_settings if request else None
         num_messages_before = len(in_context_messages)
-        summary_message, messages = await agent_loop.compact(
+        summary_message, messages, summary = await agent_loop.compact(
             messages=in_context_messages,
             compaction_settings=compaction_settings,
         )
@@ -2146,8 +2146,14 @@ async def summarize_messages(
 
         # update the agent state
         await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
+        logger.info(f"Summarized {num_messages_before} messages to {num_messages_after}")
+        if num_messages_before <= num_messages_after:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Summarization failed to reduce the number of messages. You may need to use a different CompactionSettings (e.g. using `all` mode).",
+            )
         return CompactionResponse(
-            summary_message=summary_message,
+            summary=summary,
             num_messages_before=num_messages_before,
             num_messages_after=num_messages_after,
         )
diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py
index c2a7441f..39998108 100644
--- a/tests/integration_test_summarizer.py
+++ b/tests/integration_test_summarizer.py
@@ -184,9 +184,9 @@ async def run_summarization(server: SyncServer, agent_state, in_context_messages
     agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)
 
     # Run summarization with force parameter
-    summary_message, messages = await agent_loop.compact(messages=in_context_messages)
+    summary_message, messages, summary = await agent_loop.compact(messages=in_context_messages)
 
-    return summary_message, messages
+    return summary_message, messages, summary
 
 
 # ======================================================================================================================
@@ -219,7 +219,7 @@ async def test_summarize_empty_message_buffer(server: SyncServer, actor, llm_con
 
     # Run summarization - this may fail with empty buffer, which is acceptable behavior
     try:
-        summary, result = await run_summarization(server, agent_state, in_context_messages, actor)
+        summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor)
         # If it succeeds, verify result
         assert isinstance(result, list)
 
@@ -312,7 +312,7 @@ async def test_summarize_initialization_messages_only(server: SyncServer, actor,
 
     # Run summarization - force=True with system messages only may fail
     try:
-        summary, result = await run_summarization(server, agent_state, in_context_messages, actor, force=True)
+        summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor, force=True)
 
         # Verify result
         assert isinstance(result, list)
@@ -368,7 +368,7 @@ async def test_summarize_small_conversation(server: SyncServer, actor, llm_confi
     # Run summarization with force=True
     # Note: force=True with clear=True can be very aggressive and may fail on small message sets
     try:
-        summary, result = await run_summarization(server, agent_state, in_context_messages, actor, force=True)
+        summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor, force=True)
 
         # Verify result
         assert isinstance(result, list)
@@ -461,7 +461,7 @@ async def test_summarize_large_tool_calls(server: SyncServer, actor, llm_config:
     assert total_content_size > 40000, f"Expected large messages, got {total_content_size} chars"
 
     # Run summarization
-    summary, result = await run_summarization(server, agent_state, in_context_messages, actor)
+    summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor)
 
     # Verify result
     assert isinstance(result, list)
@@ -565,7 +565,7 @@ async def test_summarize_multiple_large_tool_calls(server: SyncServer, actor, ll
     assert total_content_size > 40000, f"Expected large messages, got {total_content_size} chars"
 
     # Run summarization
-    summary, result = await run_summarization(server, agent_state, in_context_messages, actor)
+    summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor)
 
     # Verify result
     assert isinstance(result, list)
@@ -725,7 +725,7 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon
 
     agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)
 
-    summary, result = await agent_loop.compact(messages=in_context_messages)
+    summary, result, _ = await agent_loop.compact(messages=in_context_messages)
 
     assert isinstance(result, list)
 
@@ -823,7 +823,7 @@ async def test_v3_compact_uses_compaction_settings_model_and_model_settings(serv
     # Patch simple_summary so we don't hit the real LLM and can inspect llm_config
     with patch.object(summarizer_all, "simple_summary", new=fake_simple_summary):
         agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)
-        summary_msg, compacted = await agent_loop.compact(messages=in_context_messages)
+        summary_msg, compacted, _ = await agent_loop.compact(messages=in_context_messages)
 
     assert summary_msg is not None
     assert "value" in captured_llm_config
@@ -911,7 +911,7 @@ async def test_v3_summarize_hard_eviction_when_still_over_threshold(
 
         caplog.set_level("ERROR")
 
-        summary, result = await agent_loop.compact(
+        summary, result, _ = await agent_loop.compact(
             messages=in_context_messages,
             trigger_threshold=context_limit,
         )