fix: fix prompt_acknowledgement usage and update summarization prompts (#7012)

2025-12-14 21:06:46 -08:00
parent 812bfd16dd
commit bd9f3aca9b
7 changed files with 117 additions and 97 deletions
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -24372,12 +24372,14 @@
          "prompt": {
            "type": "string",
            "title": "Prompt",
-            "description": "The prompt to use for summarization."
+            "description": "The prompt to use for summarization.",
+            "default": "You have been interacting with a human user, and are in the middle of a conversation or a task. Write a summary that will allow you (or another instance of yourself) to resume without distruption, even after the conversation history is replaced with this summary. Your summary should be structured, concise, and actionable (if you are in the middle of a task). Include:\n\n1. Task or conversational overview\nThe user's core request and success criteria you are currently working on.\nAny clarifications or constraints they specified.\nAny details about the topic of messages that originated the current conversation or task.\n\n2. Current State\nWhat has been completed or discussed so far\nFiles created, modified, or analyzed (with paths if relevant)\nResources explored or referenced (with URLs if relevant)\nWhat has been discussed or explored so far with the user\n\n3. Next Steps\nThe next actions or steps you would have taken, if you were to continue the conversation or task.\n\nKeep your summary less than 100 words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output."
          },
          "prompt_acknowledgement": {
-            "type": "string",
+            "type": "boolean",
            "title": "Prompt Acknowledgement",
-            "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
+            "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).",
+            "default": false
          },
          "clip_chars": {
            "anyOf": [
@@ -24402,12 +24404,11 @@
          "sliding_window_percentage": {
            "type": "number",
            "title": "Sliding Window Percentage",
-            "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).",
-            "default": 0.3
+            "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)."
          }
        },
        "type": "object",
-        "required": ["model", "prompt", "prompt_acknowledgement"],
+        "required": ["model"],
        "title": "CompactionSettings",
        "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
      },
@@ -24479,12 +24480,14 @@
          "prompt": {
            "type": "string",
            "title": "Prompt",
-            "description": "The prompt to use for summarization."
+            "description": "The prompt to use for summarization.",
+            "default": "You have been interacting with a human user, and are in the middle of a conversation or a task. Write a summary that will allow you (or another instance of yourself) to resume without distruption, even after the conversation history is replaced with this summary. Your summary should be structured, concise, and actionable (if you are in the middle of a task). Include:\n\n1. Task or conversational overview\nThe user's core request and success criteria you are currently working on.\nAny clarifications or constraints they specified.\nAny details about the topic of messages that originated the current conversation or task.\n\n2. Current State\nWhat has been completed or discussed so far\nFiles created, modified, or analyzed (with paths if relevant)\nResources explored or referenced (with URLs if relevant)\nWhat has been discussed or explored so far with the user\n\n3. Next Steps\nThe next actions or steps you would have taken, if you were to continue the conversation or task.\n\nKeep your summary less than 100 words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output."
          },
          "prompt_acknowledgement": {
-            "type": "string",
+            "type": "boolean",
            "title": "Prompt Acknowledgement",
-            "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
+            "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).",
+            "default": false
          },
          "clip_chars": {
            "anyOf": [
@@ -24509,12 +24512,11 @@
          "sliding_window_percentage": {
            "type": "number",
            "title": "Sliding Window Percentage",
-            "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).",
-            "default": 0.3
+            "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)."
          }
        },
        "type": "object",
-        "required": ["model", "prompt", "prompt_acknowledgement"],
+        "required": ["model"],
        "title": "CompactionSettings",
        "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
      },
--- a/letta/agents/letta_agent_v3.py
+++ b/letta/agents/letta_agent_v3.py
@@ -48,7 +48,7 @@ from letta.server.rest_api.utils import (
 )
 from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
 from letta.services.summarizer.summarizer_all import summarize_all
-from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_compaction_settings
+from letta.services.summarizer.summarizer_config import CompactionSettings
 from letta.services.summarizer.summarizer_sliding_window import (
    count_tokens,
    summarize_via_sliding_window,
@@ -1351,7 +1351,7 @@ class LettaAgentV3(LettaAgentV2):
                llm_cfg = self.agent_state.llm_config
                handle = llm_cfg.handle or f"{llm_cfg.model_endpoint_type}/{llm_cfg.model}"

-            summarizer_config = get_default_compaction_settings(handle)
+            summarizer_config = CompactionSettings(model=handle)

        # Build the LLMConfig used for summarization
        summarizer_llm_config = self._build_summarizer_llm_config(
--- a/letta/prompts/summarizer_prompt.py
+++ b/letta/prompts/summarizer_prompt.py
@@ -0,0 +1,50 @@
+ANTHROPIC_SUMMARY_PROMPT = """You have been working on the task described above but have not yet completed it. Write a continuation summary that will allow you (or another instance of yourself) to resume work efficiently in a future context window where the conversation history will be replaced with this summary. Your summary should be structured, concise, and actionable. Include:
+
+1. Task Overview
+The user's core request and success criteria
+Any clarifications or constraints they specified
+
+2. Current State
+What has been completed so far
+Files created, modified, or analyzed (with paths if relevant)
+Key outputs or artifacts produced
+
+3. Important Discoveries
+Technical constraints or requirements uncovered
+Decisions made and their rationale
+Errors encountered and how they were resolved
+What approaches were tried that didn't work (and why)
+
+4. Next Steps
+Specific actions needed to complete the task
+Any blockers or open questions to resolve
+Priority order if multiple steps remain
+
+5. Context to Preserve
+User preferences or style requirements
+Domain-specific details that aren't obvious
+Any promises made to the user
+
+Write the summary from the perspective of the AI (use the first person from the perspective of the AI). Be concise but complete—err on the side of including information that would prevent duplicate work or repeated mistakes. Write in a way that enables immediate resumption of the task.
+
+Only output the summary, do NOT include anything else in your output.
+"""
+
+WORD_LIMIT = 100
+SHORTER_SUMMARY_PROMPT = f"""You have been interacting with a human user, and are in the middle of a conversation or a task. Write a summary that will allow you (or another instance of yourself) to resume without distruption, even after the conversation history is replaced with this summary. Your summary should be structured, concise, and actionable (if you are in the middle of a task). Include:
+
+1. Task or conversational overview
+The user's core request and success criteria you are currently working on.
+Any clarifications or constraints they specified.
+Any details about the topic of messages that originated the current conversation or task.
+
+2. Current State
+What has been completed or discussed so far
+Files created, modified, or analyzed (with paths if relevant)
+Resources explored or referenced (with URLs if relevant)
+What has been discussed or explored so far with the user
+
+3. Next Steps
+The next actions or steps you would have taken, if you were to continue the conversation or task.
+
+Keep your summary less than {WORD_LIMIT} words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output."""
--- a/letta/services/summarizer/summarizer.py
+++ b/letta/services/summarizer/summarizer.py
@@ -436,12 +436,14 @@ async def simple_summary(
    summary_transcript = simple_formatter(messages)

    if include_ack:
+        logger.info(f"Summarizing with ACK for model {llm_config.model}")
        input_messages = [
            {"role": "system", "content": system_prompt},
            {"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK},
            {"role": "user", "content": summary_transcript},
        ]
    else:
+        logger.info(f"Summarizing without ACK for model {llm_config.model}")
        input_messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": summary_transcript},
@@ -473,12 +475,14 @@ async def simple_summary(
            logger.info(f"Full summarization payload: {request_data}")

            if include_ack:
+                logger.info(f"Fallback summarization with ACK for model {llm_config.model}")
                input_messages = [
                    {"role": "system", "content": system_prompt},
                    {"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK},
                    {"role": "user", "content": summary_transcript},
                ]
            else:
+                logger.info(f"Fallback summarization without ACK for model {llm_config.model}")
                input_messages = [
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": summary_transcript},
--- a/letta/services/summarizer/summarizer_config.py
+++ b/letta/services/summarizer/summarizer_config.py
@@ -2,7 +2,9 @@ from typing import Literal

 from pydantic import BaseModel, Field

+from letta.prompts.summarizer_prompt import ANTHROPIC_SUMMARY_PROMPT, SHORTER_SUMMARY_PROMPT
 from letta.schemas.model import ModelSettingsUnion
+from letta.settings import summarizer_settings


 class CompactionSettings(BaseModel):
@@ -26,9 +28,9 @@ class CompactionSettings(BaseModel):
        description="Optional model settings used to override defaults for the summarizer model.",
    )

-    prompt: str = Field(default=..., description="The prompt to use for summarization.")
-    prompt_acknowledgement: str = Field(
-        default=..., description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
+    prompt: str = Field(default=SHORTER_SUMMARY_PROMPT, description="The prompt to use for summarization.")
+    prompt_acknowledgement: bool = Field(
+        default=False, description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
    )
    clip_chars: int | None = Field(
        default=2000, description="The maximum length of the summary in characters. If none, no clipping is performed."
@@ -36,31 +38,6 @@ class CompactionSettings(BaseModel):

    mode: Literal["all", "sliding_window"] = Field(default="sliding_window", description="The type of summarization technique use.")
    sliding_window_percentage: float = Field(
-        default=0.3, description="The percentage of the context window to keep post-summarization (only used in sliding window mode)."
-    )
-
-
-def get_default_compaction_settings(model_handle: str) -> CompactionSettings:
-    """Build a default :class:`CompactionSettings` from a model handle.
-
-    Args:
-        model_handle: The model handle to use for summarization
-            (format: provider/model-name).
-
-    Returns:
-        A :class:`CompactionSettings` populated with sane defaults.
-    """
-
-    from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
-    from letta.prompts import gpt_summarize
-    from letta.settings import summarizer_settings
-
-    return CompactionSettings(
-        mode="sliding_window",
-        model=model_handle,
-        model_settings=None,
-        prompt=gpt_summarize.SYSTEM,
-        prompt_acknowledgement=MESSAGE_SUMMARY_REQUEST_ACK,
-        clip_chars=2000,
-        sliding_window_percentage=summarizer_settings.partial_evict_summarizer_percentage,
+        default_factory=lambda: summarizer_settings.partial_evict_summarizer_percentage,
+        description="The percentage of the context window to keep post-summarization (only used in sliding window mode).",
    )
--- a/tests/integration_test_summarizer.py
+++ b/tests/integration_test_summarizer.py
@@ -618,12 +618,12 @@ async def test_summarize_multiple_large_tool_calls(server: SyncServer, actor, ll
 #

 # ======================================================================================================================
-# CompactionSettings Mode Tests (with pytest.patch) - Using LettaAgentV3
+# CompactionSettings Mode Tests - Using LettaAgentV3
 # ======================================================================================================================

 from unittest.mock import patch

-from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_compaction_settings
+from letta.services.summarizer.summarizer_config import CompactionSettings

 # Test both summarizer modes: "all" summarizes entire history, "sliding_window" keeps recent messages
 SUMMARIZER_CONFIG_MODES: list[Literal["all", "sliding_window"]] = ["all", "sliding_window"]
@@ -674,54 +674,44 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon
    # Persist the new messages
    new_letta_messages = await server.message_manager.create_many_messages_async(new_letta_messages, actor=actor)

-    # Create a custom CompactionSettings with the desired mode
-    def mock_get_default_compaction_settings(llm_config_inner):
-        config = get_default_compaction_settings(llm_config_inner)
-        # Override the mode
-        return CompactionSettings(
-            model=config.model,
-            prompt=config.prompt,
-            prompt_acknowledgement=config.prompt_acknowledgement,
-            clip_chars=config.clip_chars,
-            mode=mode,
-            sliding_window_percentage=config.sliding_window_percentage,
-        )
+    # Override compaction settings directly on the agent state
+    handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
+    agent_state.compaction_settings = CompactionSettings(model=handle, mode=mode)

-    with patch("letta.agents.letta_agent_v3.get_default_compaction_settings", mock_get_default_compaction_settings):
-        agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)
+    agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)

-        summary, result = await agent_loop.compact(messages=in_context_messages)
+    summary, result = await agent_loop.compact(messages=in_context_messages)

-        assert isinstance(result, list)
+    assert isinstance(result, list)

-        # Verify that the result contains valid messages
-        for msg in result:
-            assert hasattr(msg, "role")
-            assert hasattr(msg, "content")
+    # Verify that the result contains valid messages
+    for msg in result:
+        assert hasattr(msg, "role")
+        assert hasattr(msg, "content")

-        print()
-        print(f"RESULTS {mode} ======")
-        for msg in result:
-            print(f"MSG: {msg}")
+    print()
+    print(f"RESULTS {mode} ======")
+    for msg in result:
+        print(f"MSG: {msg}")

-        print()
+    print()

-        if mode == "all":
-            # For "all" mode, V3 keeps:
-            #   1. System prompt
-            #   2. A single user summary message (system_alert JSON)
-            # and no remaining historical messages.
-            assert len(result) == 2, f"Expected 2 messages for 'all' mode (system + summary), got {len(result)}"
-            assert result[0].role == MessageRole.system
-            assert result[1].role == MessageRole.user
-        else:
-            # For "sliding_window" mode, result should include:
-            #   1. System prompt
-            #   2. User summary message
-            #   3+. Recent user/assistant messages inside the window.
-            assert len(result) > 2, f"Expected >2 messages for 'sliding_window' mode, got {len(result)}"
-            assert result[0].role == MessageRole.system
-            assert result[1].role == MessageRole.user
+    if mode == "all":
+        # For "all" mode, V3 keeps:
+        #   1. System prompt
+        #   2. A single user summary message (system_alert JSON)
+        # and no remaining historical messages.
+        assert len(result) == 2, f"Expected 2 messages for 'all' mode (system + summary), got {len(result)}"
+        assert result[0].role == MessageRole.system
+        assert result[1].role == MessageRole.user
+    else:
+        # For "sliding_window" mode, result should include:
+        #   1. System prompt
+        #   2. User summary message
+        #   3+. Recent user/assistant messages inside the window.
+        assert len(result) > 2, f"Expected >2 messages for 'sliding_window' mode, got {len(result)}"
+        assert result[0].role == MessageRole.system
+        assert result[1].role == MessageRole.user


@pytest.mark.asyncio
@@ -773,7 +763,7 @@ async def test_v3_compact_uses_compaction_settings_model_and_model_settings(serv
        model=summarizer_handle,
        model_settings=summarizer_model_settings,
        prompt="You are a summarizer.",
-        prompt_acknowledgement="ack",
+        prompt_acknowledgement=True,
        clip_chars=2000,
        mode="all",
        sliding_window_percentage=0.3,
@@ -927,13 +917,13 @@ async def test_sliding_window_cutoff_index_does_not_exceed_message_count(server:
    This test uses the real token counter (via create_token_counter) to verify
    the sliding window logic works with actual token counting.
    """
-    from letta.services.summarizer.summarizer_config import get_default_compaction_settings
+    from letta.services.summarizer.summarizer_config import CompactionSettings
    from letta.services.summarizer.summarizer_sliding_window import summarize_via_sliding_window

    # Create a real summarizer config using the default factory
    # Override sliding_window_percentage to 0.3 for this test
    handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
-    summarizer_config = get_default_compaction_settings(handle)
+    summarizer_config = CompactionSettings(model=handle)
    summarizer_config.sliding_window_percentage = 0.3

    # Create 65 messages (similar to the failing case in the bug report)
@@ -1479,11 +1469,11 @@ async def test_summarize_all(server: SyncServer, actor, llm_config: LLMConfig):
    summarizing the entire conversation into a single summary string.
    """
    from letta.services.summarizer.summarizer_all import summarize_all
-    from letta.services.summarizer.summarizer_config import get_default_compaction_settings
+    from letta.services.summarizer.summarizer_config import CompactionSettings

    # Create a summarizer config with "all" mode
    handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
-    summarizer_config = get_default_compaction_settings(handle)
+    summarizer_config = CompactionSettings(model=handle)
    summarizer_config.mode = "all"

    # Create test messages - a simple conversation
--- a/tests/managers/test_agent_manager.py
+++ b/tests/managers/test_agent_manager.py
@@ -337,7 +337,6 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat
        model=summarizer_handle,
        model_settings=summarizer_model_settings,
        prompt="You are a summarizer.",
-        prompt_acknowledgement="ack",
        clip_chars=2000,
        mode="all",
        sliding_window_percentage=0.3,
@@ -713,7 +712,6 @@ async def test_create_agent_with_compaction_settings(server: SyncServer, default
        model="openai/gpt-4o-mini",
        model_settings=model_settings,
        prompt="Custom summarization prompt",
-        prompt_acknowledgement="Acknowledged",
        clip_chars=1500,
        mode="all",
        sliding_window_percentage=0.5,
@@ -742,7 +740,6 @@ async def test_create_agent_with_compaction_settings(server: SyncServer, default
    assert created_agent.compaction_settings.clip_chars == 1500
    assert created_agent.compaction_settings.sliding_window_percentage == 0.5
    assert created_agent.compaction_settings.prompt == "Custom summarization prompt"
-    assert created_agent.compaction_settings.prompt_acknowledgement == "Acknowledged"

    # Clean up
    await server.agent_manager.delete_agent_async(agent_id=created_agent.id, actor=default_user)
@@ -764,7 +761,7 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv
        model="openai/gpt-4o-mini",
        model_settings=model_settings,
        prompt="Updated summarization prompt",
-        prompt_acknowledgement="Updated acknowledgement",
+        prompt_acknowledgement=False,
        clip_chars=3000,
        mode="sliding_window",
        sliding_window_percentage=0.4,
@@ -783,7 +780,7 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv
    assert updated_agent.compaction_settings.clip_chars == 3000
    assert updated_agent.compaction_settings.sliding_window_percentage == 0.4
    assert updated_agent.compaction_settings.prompt == "Updated summarization prompt"
-    assert updated_agent.compaction_settings.prompt_acknowledgement == "Updated acknowledgement"
+    assert updated_agent.compaction_settings.prompt_acknowledgement == False


@pytest.mark.asyncio