From a7639a53eb964b841334c456ff8982ebf5e28201 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Wed, 17 Dec 2025 21:55:07 -0800 Subject: [PATCH] fix: fix summary message return for compaction (#7402) --- fern/openapi.json | 10 +++------- letta/agents/letta_agent_v3.py | 8 ++++---- letta/server/rest_api/routers/v1/agents.py | 12 +++++++++--- tests/integration_test_summarizer.py | 20 ++++++++++---------- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/fern/openapi.json b/fern/openapi.json index 34b62e94..ad21b85d 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -24669,9 +24669,9 @@ }, "CompactionResponse": { "properties": { - "summary_message": { + "summary": { "type": "string", - "title": "Summary Message" + "title": "Summary" }, "num_messages_before": { "type": "integer", @@ -24683,11 +24683,7 @@ } }, "type": "object", - "required": [ - "summary_message", - "num_messages_before", - "num_messages_after" - ], + "required": ["summary", "num_messages_before", "num_messages_after"], "title": "CompactionResponse" }, "CompactionSettings-Input": { diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index 91fc4d25..397fd88f 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -684,7 +684,7 @@ class LettaAgentV3(LettaAgentV2): # checkpoint summarized messages # TODO: might want to delay this checkpoint in case of corrupated state try: - summary_message, messages = await self.compact( + summary_message, messages, _ = await self.compact( messages, trigger_threshold=self.agent_state.llm_config.context_window ) self.logger.info("Summarization succeeded, continuing to retry LLM request") @@ -795,7 +795,7 @@ class LettaAgentV3(LettaAgentV2): self.logger.info( f"Context window exceeded (current: {self.context_token_estimate}, threshold: {self.agent_state.llm_config.context_window}), trying to compact messages" ) - summary_message, messages = await self.compact(messages, trigger_threshold=self.agent_state.llm_config.context_window) + summary_message, messages, _ = await self.compact(messages, trigger_threshold=self.agent_state.llm_config.context_window) # TODO: persist + return the summary message # TODO: convert this to a SummaryMessage self.response_messages.append(summary_message) @@ -1334,7 +1334,7 @@ class LettaAgentV3(LettaAgentV2): @trace_method async def compact( self, messages, trigger_threshold: Optional[int] = None, compaction_settings: Optional["CompactionSettings"] = None - ) -> Message: + ) -> tuple[Message, list[Message], str]: """Compact the current in-context messages for this agent. Compaction uses a summarizer LLM configuration derived from @@ -1470,7 +1470,7 @@ class LettaAgentV3(LettaAgentV2): if len(compacted_messages) > 1: final_messages += compacted_messages[1:] - return summary_message_obj, final_messages + return summary_message_obj, final_messages, summary @staticmethod def _build_summarizer_llm_config( diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py index e621e12b..47de5d42 100644 --- a/letta/server/rest_api/routers/v1/agents.py +++ b/letta/server/rest_api/routers/v1/agents.py @@ -2100,7 +2100,7 @@ class CompactionRequest(BaseModel): class CompactionResponse(BaseModel): - summary_message: str + summary: str num_messages_before: int num_messages_after: int @@ -2138,7 +2138,7 @@ async def summarize_messages( in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor) compaction_settings = request.compaction_settings if request else None num_messages_before = len(in_context_messages) - summary_message, messages = await agent_loop.compact( + summary_message, messages, summary = await agent_loop.compact( messages=in_context_messages, compaction_settings=compaction_settings, ) @@ -2146,8 +2146,14 @@ async def summarize_messages( # update the agent state await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages) + logger.info(f"Summarized {num_messages_before} messages to {num_messages_after}") + if num_messages_before <= num_messages_after: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Summarization failed to reduce the number of messages. You may need to use a different CompactionSettings (e.g. using `all` mode).", + ) return CompactionResponse( - summary_message=summary_message, + summary=summary, num_messages_before=num_messages_before, num_messages_after=num_messages_after, ) diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py index c2a7441f..39998108 100644 --- a/tests/integration_test_summarizer.py +++ b/tests/integration_test_summarizer.py @@ -184,9 +184,9 @@ async def run_summarization(server: SyncServer, agent_state, in_context_messages agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor) # Run summarization with force parameter - summary_message, messages = await agent_loop.compact(messages=in_context_messages) + summary_message, messages, summary = await agent_loop.compact(messages=in_context_messages) - return summary_message, messages + return summary_message, messages, summary # ====================================================================================================================== @@ -219,7 +219,7 @@ async def test_summarize_empty_message_buffer(server: SyncServer, actor, llm_con # Run summarization - this may fail with empty buffer, which is acceptable behavior try: - summary, result = await run_summarization(server, agent_state, in_context_messages, actor) + summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor) # If it succeeds, verify result assert isinstance(result, list) @@ -312,7 +312,7 @@ async def test_summarize_initialization_messages_only(server: SyncServer, actor, # Run summarization - force=True with system messages only may fail try: - summary, result = await run_summarization(server, agent_state, in_context_messages, actor, force=True) + summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor, force=True) # Verify result assert isinstance(result, list) @@ -368,7 +368,7 @@ async def test_summarize_small_conversation(server: SyncServer, actor, llm_confi # Run summarization with force=True # Note: force=True with clear=True can be very aggressive and may fail on small message sets try: - summary, result = await run_summarization(server, agent_state, in_context_messages, actor, force=True) + summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor, force=True) # Verify result assert isinstance(result, list) @@ -461,7 +461,7 @@ async def test_summarize_large_tool_calls(server: SyncServer, actor, llm_config: assert total_content_size > 40000, f"Expected large messages, got {total_content_size} chars" # Run summarization - summary, result = await run_summarization(server, agent_state, in_context_messages, actor) + summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor) # Verify result assert isinstance(result, list) @@ -565,7 +565,7 @@ async def test_summarize_multiple_large_tool_calls(server: SyncServer, actor, ll assert total_content_size > 40000, f"Expected large messages, got {total_content_size} chars" # Run summarization - summary, result = await run_summarization(server, agent_state, in_context_messages, actor) + summary, result, _ = await run_summarization(server, agent_state, in_context_messages, actor) # Verify result assert isinstance(result, list) @@ -725,7 +725,7 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor) - summary, result = await agent_loop.compact(messages=in_context_messages) + summary, result, _ = await agent_loop.compact(messages=in_context_messages) assert isinstance(result, list) @@ -823,7 +823,7 @@ async def test_v3_compact_uses_compaction_settings_model_and_model_settings(serv # Patch simple_summary so we don't hit the real LLM and can inspect llm_config with patch.object(summarizer_all, "simple_summary", new=fake_simple_summary): agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor) - summary_msg, compacted = await agent_loop.compact(messages=in_context_messages) + summary_msg, compacted, _ = await agent_loop.compact(messages=in_context_messages) assert summary_msg is not None assert "value" in captured_llm_config @@ -911,7 +911,7 @@ async def test_v3_summarize_hard_eviction_when_still_over_threshold( caplog.set_level("ERROR") - summary, result = await agent_loop.compact( + summary, result, _ = await agent_loop.compact( messages=in_context_messages, trigger_threshold=context_limit, )