From bd9f3aca9b0560cd2acb3a9ec7044083d7b0cb69 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Sun, 14 Dec 2025 21:06:46 -0800 Subject: [PATCH] fix: fix `prompt_acknowledgement` usage and update summarization prompts (#7012) --- fern/openapi.json | 26 +++--- letta/agents/letta_agent_v3.py | 4 +- letta/prompts/summarizer_prompt.py | 50 +++++++++++ letta/services/summarizer/summarizer.py | 4 + .../services/summarizer/summarizer_config.py | 37 ++------ tests/integration_test_summarizer.py | 86 ++++++++----------- tests/managers/test_agent_manager.py | 7 +- 7 files changed, 117 insertions(+), 97 deletions(-) create mode 100644 letta/prompts/summarizer_prompt.py diff --git a/fern/openapi.json b/fern/openapi.json index 335c832d..62e31a06 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -24372,12 +24372,14 @@ "prompt": { "type": "string", "title": "Prompt", - "description": "The prompt to use for summarization." + "description": "The prompt to use for summarization.", + "default": "You have been interacting with a human user, and are in the middle of a conversation or a task. Write a summary that will allow you (or another instance of yourself) to resume without distruption, even after the conversation history is replaced with this summary. Your summary should be structured, concise, and actionable (if you are in the middle of a task). Include:\n\n1. Task or conversational overview\nThe user's core request and success criteria you are currently working on.\nAny clarifications or constraints they specified.\nAny details about the topic of messages that originated the current conversation or task.\n\n2. Current State\nWhat has been completed or discussed so far\nFiles created, modified, or analyzed (with paths if relevant)\nResources explored or referenced (with URLs if relevant)\nWhat has been discussed or explored so far with the user\n\n3. Next Steps\nThe next actions or steps you would have taken, if you were to continue the conversation or task.\n\nKeep your summary less than 100 words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output." }, "prompt_acknowledgement": { - "type": "string", + "type": "boolean", "title": "Prompt Acknowledgement", - "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)." + "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).", + "default": false }, "clip_chars": { "anyOf": [ @@ -24402,12 +24404,11 @@ "sliding_window_percentage": { "type": "number", "title": "Sliding Window Percentage", - "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).", - "default": 0.3 + "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)." } }, "type": "object", - "required": ["model", "prompt", "prompt_acknowledgement"], + "required": ["model"], "title": "CompactionSettings", "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." }, @@ -24479,12 +24480,14 @@ "prompt": { "type": "string", "title": "Prompt", - "description": "The prompt to use for summarization." + "description": "The prompt to use for summarization.", + "default": "You have been interacting with a human user, and are in the middle of a conversation or a task. Write a summary that will allow you (or another instance of yourself) to resume without distruption, even after the conversation history is replaced with this summary. Your summary should be structured, concise, and actionable (if you are in the middle of a task). Include:\n\n1. Task or conversational overview\nThe user's core request and success criteria you are currently working on.\nAny clarifications or constraints they specified.\nAny details about the topic of messages that originated the current conversation or task.\n\n2. Current State\nWhat has been completed or discussed so far\nFiles created, modified, or analyzed (with paths if relevant)\nResources explored or referenced (with URLs if relevant)\nWhat has been discussed or explored so far with the user\n\n3. Next Steps\nThe next actions or steps you would have taken, if you were to continue the conversation or task.\n\nKeep your summary less than 100 words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output." }, "prompt_acknowledgement": { - "type": "string", + "type": "boolean", "title": "Prompt Acknowledgement", - "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)." + "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).", + "default": false }, "clip_chars": { "anyOf": [ @@ -24509,12 +24512,11 @@ "sliding_window_percentage": { "type": "number", "title": "Sliding Window Percentage", - "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).", - "default": 0.3 + "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)." } }, "type": "object", - "required": ["model", "prompt", "prompt_acknowledgement"], + "required": ["model"], "title": "CompactionSettings", "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." }, diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index 2c37c208..ff8ce044 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -48,7 +48,7 @@ from letta.server.rest_api.utils import ( ) from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema from letta.services.summarizer.summarizer_all import summarize_all -from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_compaction_settings +from letta.services.summarizer.summarizer_config import CompactionSettings from letta.services.summarizer.summarizer_sliding_window import ( count_tokens, summarize_via_sliding_window, @@ -1351,7 +1351,7 @@ class LettaAgentV3(LettaAgentV2): llm_cfg = self.agent_state.llm_config handle = llm_cfg.handle or f"{llm_cfg.model_endpoint_type}/{llm_cfg.model}" - summarizer_config = get_default_compaction_settings(handle) + summarizer_config = CompactionSettings(model=handle) # Build the LLMConfig used for summarization summarizer_llm_config = self._build_summarizer_llm_config( diff --git a/letta/prompts/summarizer_prompt.py b/letta/prompts/summarizer_prompt.py new file mode 100644 index 00000000..f6ddc48a --- /dev/null +++ b/letta/prompts/summarizer_prompt.py @@ -0,0 +1,50 @@ +ANTHROPIC_SUMMARY_PROMPT = """You have been working on the task described above but have not yet completed it. Write a continuation summary that will allow you (or another instance of yourself) to resume work efficiently in a future context window where the conversation history will be replaced with this summary. Your summary should be structured, concise, and actionable. Include: + +1. Task Overview +The user's core request and success criteria +Any clarifications or constraints they specified + +2. Current State +What has been completed so far +Files created, modified, or analyzed (with paths if relevant) +Key outputs or artifacts produced + +3. Important Discoveries +Technical constraints or requirements uncovered +Decisions made and their rationale +Errors encountered and how they were resolved +What approaches were tried that didn't work (and why) + +4. Next Steps +Specific actions needed to complete the task +Any blockers or open questions to resolve +Priority order if multiple steps remain + +5. Context to Preserve +User preferences or style requirements +Domain-specific details that aren't obvious +Any promises made to the user + +Write the summary from the perspective of the AI (use the first person from the perspective of the AI). Be concise but complete—err on the side of including information that would prevent duplicate work or repeated mistakes. Write in a way that enables immediate resumption of the task. + +Only output the summary, do NOT include anything else in your output. +""" + +WORD_LIMIT = 100 +SHORTER_SUMMARY_PROMPT = f"""You have been interacting with a human user, and are in the middle of a conversation or a task. Write a summary that will allow you (or another instance of yourself) to resume without distruption, even after the conversation history is replaced with this summary. Your summary should be structured, concise, and actionable (if you are in the middle of a task). Include: + +1. Task or conversational overview +The user's core request and success criteria you are currently working on. +Any clarifications or constraints they specified. +Any details about the topic of messages that originated the current conversation or task. + +2. Current State +What has been completed or discussed so far +Files created, modified, or analyzed (with paths if relevant) +Resources explored or referenced (with URLs if relevant) +What has been discussed or explored so far with the user + +3. Next Steps +The next actions or steps you would have taken, if you were to continue the conversation or task. + +Keep your summary less than {WORD_LIMIT} words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output.""" diff --git a/letta/services/summarizer/summarizer.py b/letta/services/summarizer/summarizer.py index f82200e7..4f35b33a 100644 --- a/letta/services/summarizer/summarizer.py +++ b/letta/services/summarizer/summarizer.py @@ -436,12 +436,14 @@ async def simple_summary( summary_transcript = simple_formatter(messages) if include_ack: + logger.info(f"Summarizing with ACK for model {llm_config.model}") input_messages = [ {"role": "system", "content": system_prompt}, {"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK}, {"role": "user", "content": summary_transcript}, ] else: + logger.info(f"Summarizing without ACK for model {llm_config.model}") input_messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": summary_transcript}, @@ -473,12 +475,14 @@ async def simple_summary( logger.info(f"Full summarization payload: {request_data}") if include_ack: + logger.info(f"Fallback summarization with ACK for model {llm_config.model}") input_messages = [ {"role": "system", "content": system_prompt}, {"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK}, {"role": "user", "content": summary_transcript}, ] else: + logger.info(f"Fallback summarization without ACK for model {llm_config.model}") input_messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": summary_transcript}, diff --git a/letta/services/summarizer/summarizer_config.py b/letta/services/summarizer/summarizer_config.py index cbc40885..0a5f7878 100644 --- a/letta/services/summarizer/summarizer_config.py +++ b/letta/services/summarizer/summarizer_config.py @@ -2,7 +2,9 @@ from typing import Literal from pydantic import BaseModel, Field +from letta.prompts.summarizer_prompt import ANTHROPIC_SUMMARY_PROMPT, SHORTER_SUMMARY_PROMPT from letta.schemas.model import ModelSettingsUnion +from letta.settings import summarizer_settings class CompactionSettings(BaseModel): @@ -26,9 +28,9 @@ class CompactionSettings(BaseModel): description="Optional model settings used to override defaults for the summarizer model.", ) - prompt: str = Field(default=..., description="The prompt to use for summarization.") - prompt_acknowledgement: str = Field( - default=..., description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)." + prompt: str = Field(default=SHORTER_SUMMARY_PROMPT, description="The prompt to use for summarization.") + prompt_acknowledgement: bool = Field( + default=False, description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)." ) clip_chars: int | None = Field( default=2000, description="The maximum length of the summary in characters. If none, no clipping is performed." @@ -36,31 +38,6 @@ class CompactionSettings(BaseModel): mode: Literal["all", "sliding_window"] = Field(default="sliding_window", description="The type of summarization technique use.") sliding_window_percentage: float = Field( - default=0.3, description="The percentage of the context window to keep post-summarization (only used in sliding window mode)." - ) - - -def get_default_compaction_settings(model_handle: str) -> CompactionSettings: - """Build a default :class:`CompactionSettings` from a model handle. - - Args: - model_handle: The model handle to use for summarization - (format: provider/model-name). - - Returns: - A :class:`CompactionSettings` populated with sane defaults. - """ - - from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK - from letta.prompts import gpt_summarize - from letta.settings import summarizer_settings - - return CompactionSettings( - mode="sliding_window", - model=model_handle, - model_settings=None, - prompt=gpt_summarize.SYSTEM, - prompt_acknowledgement=MESSAGE_SUMMARY_REQUEST_ACK, - clip_chars=2000, - sliding_window_percentage=summarizer_settings.partial_evict_summarizer_percentage, + default_factory=lambda: summarizer_settings.partial_evict_summarizer_percentage, + description="The percentage of the context window to keep post-summarization (only used in sliding window mode).", ) diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py index 2fcaecb7..8736b8e7 100644 --- a/tests/integration_test_summarizer.py +++ b/tests/integration_test_summarizer.py @@ -618,12 +618,12 @@ async def test_summarize_multiple_large_tool_calls(server: SyncServer, actor, ll # # ====================================================================================================================== -# CompactionSettings Mode Tests (with pytest.patch) - Using LettaAgentV3 +# CompactionSettings Mode Tests - Using LettaAgentV3 # ====================================================================================================================== from unittest.mock import patch -from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_compaction_settings +from letta.services.summarizer.summarizer_config import CompactionSettings # Test both summarizer modes: "all" summarizes entire history, "sliding_window" keeps recent messages SUMMARIZER_CONFIG_MODES: list[Literal["all", "sliding_window"]] = ["all", "sliding_window"] @@ -674,54 +674,44 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon # Persist the new messages new_letta_messages = await server.message_manager.create_many_messages_async(new_letta_messages, actor=actor) - # Create a custom CompactionSettings with the desired mode - def mock_get_default_compaction_settings(llm_config_inner): - config = get_default_compaction_settings(llm_config_inner) - # Override the mode - return CompactionSettings( - model=config.model, - prompt=config.prompt, - prompt_acknowledgement=config.prompt_acknowledgement, - clip_chars=config.clip_chars, - mode=mode, - sliding_window_percentage=config.sliding_window_percentage, - ) + # Override compaction settings directly on the agent state + handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}" + agent_state.compaction_settings = CompactionSettings(model=handle, mode=mode) - with patch("letta.agents.letta_agent_v3.get_default_compaction_settings", mock_get_default_compaction_settings): - agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor) + agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor) - summary, result = await agent_loop.compact(messages=in_context_messages) + summary, result = await agent_loop.compact(messages=in_context_messages) - assert isinstance(result, list) + assert isinstance(result, list) - # Verify that the result contains valid messages - for msg in result: - assert hasattr(msg, "role") - assert hasattr(msg, "content") + # Verify that the result contains valid messages + for msg in result: + assert hasattr(msg, "role") + assert hasattr(msg, "content") - print() - print(f"RESULTS {mode} ======") - for msg in result: - print(f"MSG: {msg}") + print() + print(f"RESULTS {mode} ======") + for msg in result: + print(f"MSG: {msg}") - print() + print() - if mode == "all": - # For "all" mode, V3 keeps: - # 1. System prompt - # 2. A single user summary message (system_alert JSON) - # and no remaining historical messages. - assert len(result) == 2, f"Expected 2 messages for 'all' mode (system + summary), got {len(result)}" - assert result[0].role == MessageRole.system - assert result[1].role == MessageRole.user - else: - # For "sliding_window" mode, result should include: - # 1. System prompt - # 2. User summary message - # 3+. Recent user/assistant messages inside the window. - assert len(result) > 2, f"Expected >2 messages for 'sliding_window' mode, got {len(result)}" - assert result[0].role == MessageRole.system - assert result[1].role == MessageRole.user + if mode == "all": + # For "all" mode, V3 keeps: + # 1. System prompt + # 2. A single user summary message (system_alert JSON) + # and no remaining historical messages. + assert len(result) == 2, f"Expected 2 messages for 'all' mode (system + summary), got {len(result)}" + assert result[0].role == MessageRole.system + assert result[1].role == MessageRole.user + else: + # For "sliding_window" mode, result should include: + # 1. System prompt + # 2. User summary message + # 3+. Recent user/assistant messages inside the window. + assert len(result) > 2, f"Expected >2 messages for 'sliding_window' mode, got {len(result)}" + assert result[0].role == MessageRole.system + assert result[1].role == MessageRole.user @pytest.mark.asyncio @@ -773,7 +763,7 @@ async def test_v3_compact_uses_compaction_settings_model_and_model_settings(serv model=summarizer_handle, model_settings=summarizer_model_settings, prompt="You are a summarizer.", - prompt_acknowledgement="ack", + prompt_acknowledgement=True, clip_chars=2000, mode="all", sliding_window_percentage=0.3, @@ -927,13 +917,13 @@ async def test_sliding_window_cutoff_index_does_not_exceed_message_count(server: This test uses the real token counter (via create_token_counter) to verify the sliding window logic works with actual token counting. """ - from letta.services.summarizer.summarizer_config import get_default_compaction_settings + from letta.services.summarizer.summarizer_config import CompactionSettings from letta.services.summarizer.summarizer_sliding_window import summarize_via_sliding_window # Create a real summarizer config using the default factory # Override sliding_window_percentage to 0.3 for this test handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}" - summarizer_config = get_default_compaction_settings(handle) + summarizer_config = CompactionSettings(model=handle) summarizer_config.sliding_window_percentage = 0.3 # Create 65 messages (similar to the failing case in the bug report) @@ -1479,11 +1469,11 @@ async def test_summarize_all(server: SyncServer, actor, llm_config: LLMConfig): summarizing the entire conversation into a single summary string. """ from letta.services.summarizer.summarizer_all import summarize_all - from letta.services.summarizer.summarizer_config import get_default_compaction_settings + from letta.services.summarizer.summarizer_config import CompactionSettings # Create a summarizer config with "all" mode handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}" - summarizer_config = get_default_compaction_settings(handle) + summarizer_config = CompactionSettings(model=handle) summarizer_config.mode = "all" # Create test messages - a simple conversation diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py index a325af5a..9c3f5c51 100644 --- a/tests/managers/test_agent_manager.py +++ b/tests/managers/test_agent_manager.py @@ -337,7 +337,6 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat model=summarizer_handle, model_settings=summarizer_model_settings, prompt="You are a summarizer.", - prompt_acknowledgement="ack", clip_chars=2000, mode="all", sliding_window_percentage=0.3, @@ -713,7 +712,6 @@ async def test_create_agent_with_compaction_settings(server: SyncServer, default model="openai/gpt-4o-mini", model_settings=model_settings, prompt="Custom summarization prompt", - prompt_acknowledgement="Acknowledged", clip_chars=1500, mode="all", sliding_window_percentage=0.5, @@ -742,7 +740,6 @@ async def test_create_agent_with_compaction_settings(server: SyncServer, default assert created_agent.compaction_settings.clip_chars == 1500 assert created_agent.compaction_settings.sliding_window_percentage == 0.5 assert created_agent.compaction_settings.prompt == "Custom summarization prompt" - assert created_agent.compaction_settings.prompt_acknowledgement == "Acknowledged" # Clean up await server.agent_manager.delete_agent_async(agent_id=created_agent.id, actor=default_user) @@ -764,7 +761,7 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv model="openai/gpt-4o-mini", model_settings=model_settings, prompt="Updated summarization prompt", - prompt_acknowledgement="Updated acknowledgement", + prompt_acknowledgement=False, clip_chars=3000, mode="sliding_window", sliding_window_percentage=0.4, @@ -783,7 +780,7 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv assert updated_agent.compaction_settings.clip_chars == 3000 assert updated_agent.compaction_settings.sliding_window_percentage == 0.4 assert updated_agent.compaction_settings.prompt == "Updated summarization prompt" - assert updated_agent.compaction_settings.prompt_acknowledgement == "Updated acknowledgement" + assert updated_agent.compaction_settings.prompt_acknowledgement == False @pytest.mark.asyncio