fix: fix prompt_acknowledgement usage and update summarization prompts (#7012)

This commit is contained in:
Sarah Wooders
2025-12-14 21:06:46 -08:00
committed by Caren Thomas
parent 812bfd16dd
commit bd9f3aca9b
7 changed files with 117 additions and 97 deletions

View File

@@ -24372,12 +24372,14 @@
"prompt": {
"type": "string",
"title": "Prompt",
"description": "The prompt to use for summarization."
"description": "The prompt to use for summarization.",
"default": "You have been interacting with a human user, and are in the middle of a conversation or a task. Write a summary that will allow you (or another instance of yourself) to resume without distruption, even after the conversation history is replaced with this summary. Your summary should be structured, concise, and actionable (if you are in the middle of a task). Include:\n\n1. Task or conversational overview\nThe user's core request and success criteria you are currently working on.\nAny clarifications or constraints they specified.\nAny details about the topic of messages that originated the current conversation or task.\n\n2. Current State\nWhat has been completed or discussed so far\nFiles created, modified, or analyzed (with paths if relevant)\nResources explored or referenced (with URLs if relevant)\nWhat has been discussed or explored so far with the user\n\n3. Next Steps\nThe next actions or steps you would have taken, if you were to continue the conversation or task.\n\nKeep your summary less than 100 words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output."
},
"prompt_acknowledgement": {
"type": "string",
"type": "boolean",
"title": "Prompt Acknowledgement",
"description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
"description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).",
"default": false
},
"clip_chars": {
"anyOf": [
@@ -24402,12 +24404,11 @@
"sliding_window_percentage": {
"type": "number",
"title": "Sliding Window Percentage",
"description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).",
"default": 0.3
"description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)."
}
},
"type": "object",
"required": ["model", "prompt", "prompt_acknowledgement"],
"required": ["model"],
"title": "CompactionSettings",
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
},
@@ -24479,12 +24480,14 @@
"prompt": {
"type": "string",
"title": "Prompt",
"description": "The prompt to use for summarization."
"description": "The prompt to use for summarization.",
"default": "You have been interacting with a human user, and are in the middle of a conversation or a task. Write a summary that will allow you (or another instance of yourself) to resume without distruption, even after the conversation history is replaced with this summary. Your summary should be structured, concise, and actionable (if you are in the middle of a task). Include:\n\n1. Task or conversational overview\nThe user's core request and success criteria you are currently working on.\nAny clarifications or constraints they specified.\nAny details about the topic of messages that originated the current conversation or task.\n\n2. Current State\nWhat has been completed or discussed so far\nFiles created, modified, or analyzed (with paths if relevant)\nResources explored or referenced (with URLs if relevant)\nWhat has been discussed or explored so far with the user\n\n3. Next Steps\nThe next actions or steps you would have taken, if you were to continue the conversation or task.\n\nKeep your summary less than 100 words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output."
},
"prompt_acknowledgement": {
"type": "string",
"type": "boolean",
"title": "Prompt Acknowledgement",
"description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
"description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs).",
"default": false
},
"clip_chars": {
"anyOf": [
@@ -24509,12 +24512,11 @@
"sliding_window_percentage": {
"type": "number",
"title": "Sliding Window Percentage",
"description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).",
"default": 0.3
"description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)."
}
},
"type": "object",
"required": ["model", "prompt", "prompt_acknowledgement"],
"required": ["model"],
"title": "CompactionSettings",
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
},

View File

@@ -48,7 +48,7 @@ from letta.server.rest_api.utils import (
)
from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
from letta.services.summarizer.summarizer_all import summarize_all
from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_compaction_settings
from letta.services.summarizer.summarizer_config import CompactionSettings
from letta.services.summarizer.summarizer_sliding_window import (
count_tokens,
summarize_via_sliding_window,
@@ -1351,7 +1351,7 @@ class LettaAgentV3(LettaAgentV2):
llm_cfg = self.agent_state.llm_config
handle = llm_cfg.handle or f"{llm_cfg.model_endpoint_type}/{llm_cfg.model}"
summarizer_config = get_default_compaction_settings(handle)
summarizer_config = CompactionSettings(model=handle)
# Build the LLMConfig used for summarization
summarizer_llm_config = self._build_summarizer_llm_config(

View File

@@ -0,0 +1,50 @@
ANTHROPIC_SUMMARY_PROMPT = """You have been working on the task described above but have not yet completed it. Write a continuation summary that will allow you (or another instance of yourself) to resume work efficiently in a future context window where the conversation history will be replaced with this summary. Your summary should be structured, concise, and actionable. Include:
1. Task Overview
The user's core request and success criteria
Any clarifications or constraints they specified
2. Current State
What has been completed so far
Files created, modified, or analyzed (with paths if relevant)
Key outputs or artifacts produced
3. Important Discoveries
Technical constraints or requirements uncovered
Decisions made and their rationale
Errors encountered and how they were resolved
What approaches were tried that didn't work (and why)
4. Next Steps
Specific actions needed to complete the task
Any blockers or open questions to resolve
Priority order if multiple steps remain
5. Context to Preserve
User preferences or style requirements
Domain-specific details that aren't obvious
Any promises made to the user
Write the summary from the perspective of the AI (use the first person from the perspective of the AI). Be concise but complete—err on the side of including information that would prevent duplicate work or repeated mistakes. Write in a way that enables immediate resumption of the task.
Only output the summary, do NOT include anything else in your output.
"""
WORD_LIMIT = 100
SHORTER_SUMMARY_PROMPT = f"""You have been interacting with a human user, and are in the middle of a conversation or a task. Write a summary that will allow you (or another instance of yourself) to resume without distruption, even after the conversation history is replaced with this summary. Your summary should be structured, concise, and actionable (if you are in the middle of a task). Include:
1. Task or conversational overview
The user's core request and success criteria you are currently working on.
Any clarifications or constraints they specified.
Any details about the topic of messages that originated the current conversation or task.
2. Current State
What has been completed or discussed so far
Files created, modified, or analyzed (with paths if relevant)
Resources explored or referenced (with URLs if relevant)
What has been discussed or explored so far with the user
3. Next Steps
The next actions or steps you would have taken, if you were to continue the conversation or task.
Keep your summary less than {WORD_LIMIT} words, do NOT exceed this word limit. Only output the summary, do NOT include anything else in your output."""

View File

@@ -436,12 +436,14 @@ async def simple_summary(
summary_transcript = simple_formatter(messages)
if include_ack:
logger.info(f"Summarizing with ACK for model {llm_config.model}")
input_messages = [
{"role": "system", "content": system_prompt},
{"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK},
{"role": "user", "content": summary_transcript},
]
else:
logger.info(f"Summarizing without ACK for model {llm_config.model}")
input_messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": summary_transcript},
@@ -473,12 +475,14 @@ async def simple_summary(
logger.info(f"Full summarization payload: {request_data}")
if include_ack:
logger.info(f"Fallback summarization with ACK for model {llm_config.model}")
input_messages = [
{"role": "system", "content": system_prompt},
{"role": "assistant", "content": MESSAGE_SUMMARY_REQUEST_ACK},
{"role": "user", "content": summary_transcript},
]
else:
logger.info(f"Fallback summarization without ACK for model {llm_config.model}")
input_messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": summary_transcript},

View File

@@ -2,7 +2,9 @@ from typing import Literal
from pydantic import BaseModel, Field
from letta.prompts.summarizer_prompt import ANTHROPIC_SUMMARY_PROMPT, SHORTER_SUMMARY_PROMPT
from letta.schemas.model import ModelSettingsUnion
from letta.settings import summarizer_settings
class CompactionSettings(BaseModel):
@@ -26,9 +28,9 @@ class CompactionSettings(BaseModel):
description="Optional model settings used to override defaults for the summarizer model.",
)
prompt: str = Field(default=..., description="The prompt to use for summarization.")
prompt_acknowledgement: str = Field(
default=..., description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
prompt: str = Field(default=SHORTER_SUMMARY_PROMPT, description="The prompt to use for summarization.")
prompt_acknowledgement: bool = Field(
default=False, description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
)
clip_chars: int | None = Field(
default=2000, description="The maximum length of the summary in characters. If none, no clipping is performed."
@@ -36,31 +38,6 @@ class CompactionSettings(BaseModel):
mode: Literal["all", "sliding_window"] = Field(default="sliding_window", description="The type of summarization technique use.")
sliding_window_percentage: float = Field(
default=0.3, description="The percentage of the context window to keep post-summarization (only used in sliding window mode)."
)
def get_default_compaction_settings(model_handle: str) -> CompactionSettings:
"""Build a default :class:`CompactionSettings` from a model handle.
Args:
model_handle: The model handle to use for summarization
(format: provider/model-name).
Returns:
A :class:`CompactionSettings` populated with sane defaults.
"""
from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
from letta.prompts import gpt_summarize
from letta.settings import summarizer_settings
return CompactionSettings(
mode="sliding_window",
model=model_handle,
model_settings=None,
prompt=gpt_summarize.SYSTEM,
prompt_acknowledgement=MESSAGE_SUMMARY_REQUEST_ACK,
clip_chars=2000,
sliding_window_percentage=summarizer_settings.partial_evict_summarizer_percentage,
default_factory=lambda: summarizer_settings.partial_evict_summarizer_percentage,
description="The percentage of the context window to keep post-summarization (only used in sliding window mode).",
)

View File

@@ -618,12 +618,12 @@ async def test_summarize_multiple_large_tool_calls(server: SyncServer, actor, ll
#
# ======================================================================================================================
# CompactionSettings Mode Tests (with pytest.patch) - Using LettaAgentV3
# CompactionSettings Mode Tests - Using LettaAgentV3
# ======================================================================================================================
from unittest.mock import patch
from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_compaction_settings
from letta.services.summarizer.summarizer_config import CompactionSettings
# Test both summarizer modes: "all" summarizes entire history, "sliding_window" keeps recent messages
SUMMARIZER_CONFIG_MODES: list[Literal["all", "sliding_window"]] = ["all", "sliding_window"]
@@ -674,54 +674,44 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon
# Persist the new messages
new_letta_messages = await server.message_manager.create_many_messages_async(new_letta_messages, actor=actor)
# Create a custom CompactionSettings with the desired mode
def mock_get_default_compaction_settings(llm_config_inner):
config = get_default_compaction_settings(llm_config_inner)
# Override the mode
return CompactionSettings(
model=config.model,
prompt=config.prompt,
prompt_acknowledgement=config.prompt_acknowledgement,
clip_chars=config.clip_chars,
mode=mode,
sliding_window_percentage=config.sliding_window_percentage,
)
# Override compaction settings directly on the agent state
handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
agent_state.compaction_settings = CompactionSettings(model=handle, mode=mode)
with patch("letta.agents.letta_agent_v3.get_default_compaction_settings", mock_get_default_compaction_settings):
agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)
agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)
summary, result = await agent_loop.compact(messages=in_context_messages)
summary, result = await agent_loop.compact(messages=in_context_messages)
assert isinstance(result, list)
assert isinstance(result, list)
# Verify that the result contains valid messages
for msg in result:
assert hasattr(msg, "role")
assert hasattr(msg, "content")
# Verify that the result contains valid messages
for msg in result:
assert hasattr(msg, "role")
assert hasattr(msg, "content")
print()
print(f"RESULTS {mode} ======")
for msg in result:
print(f"MSG: {msg}")
print()
print(f"RESULTS {mode} ======")
for msg in result:
print(f"MSG: {msg}")
print()
print()
if mode == "all":
# For "all" mode, V3 keeps:
# 1. System prompt
# 2. A single user summary message (system_alert JSON)
# and no remaining historical messages.
assert len(result) == 2, f"Expected 2 messages for 'all' mode (system + summary), got {len(result)}"
assert result[0].role == MessageRole.system
assert result[1].role == MessageRole.user
else:
# For "sliding_window" mode, result should include:
# 1. System prompt
# 2. User summary message
# 3+. Recent user/assistant messages inside the window.
assert len(result) > 2, f"Expected >2 messages for 'sliding_window' mode, got {len(result)}"
assert result[0].role == MessageRole.system
assert result[1].role == MessageRole.user
if mode == "all":
# For "all" mode, V3 keeps:
# 1. System prompt
# 2. A single user summary message (system_alert JSON)
# and no remaining historical messages.
assert len(result) == 2, f"Expected 2 messages for 'all' mode (system + summary), got {len(result)}"
assert result[0].role == MessageRole.system
assert result[1].role == MessageRole.user
else:
# For "sliding_window" mode, result should include:
# 1. System prompt
# 2. User summary message
# 3+. Recent user/assistant messages inside the window.
assert len(result) > 2, f"Expected >2 messages for 'sliding_window' mode, got {len(result)}"
assert result[0].role == MessageRole.system
assert result[1].role == MessageRole.user
@pytest.mark.asyncio
@@ -773,7 +763,7 @@ async def test_v3_compact_uses_compaction_settings_model_and_model_settings(serv
model=summarizer_handle,
model_settings=summarizer_model_settings,
prompt="You are a summarizer.",
prompt_acknowledgement="ack",
prompt_acknowledgement=True,
clip_chars=2000,
mode="all",
sliding_window_percentage=0.3,
@@ -927,13 +917,13 @@ async def test_sliding_window_cutoff_index_does_not_exceed_message_count(server:
This test uses the real token counter (via create_token_counter) to verify
the sliding window logic works with actual token counting.
"""
from letta.services.summarizer.summarizer_config import get_default_compaction_settings
from letta.services.summarizer.summarizer_config import CompactionSettings
from letta.services.summarizer.summarizer_sliding_window import summarize_via_sliding_window
# Create a real summarizer config using the default factory
# Override sliding_window_percentage to 0.3 for this test
handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
summarizer_config = get_default_compaction_settings(handle)
summarizer_config = CompactionSettings(model=handle)
summarizer_config.sliding_window_percentage = 0.3
# Create 65 messages (similar to the failing case in the bug report)
@@ -1479,11 +1469,11 @@ async def test_summarize_all(server: SyncServer, actor, llm_config: LLMConfig):
summarizing the entire conversation into a single summary string.
"""
from letta.services.summarizer.summarizer_all import summarize_all
from letta.services.summarizer.summarizer_config import get_default_compaction_settings
from letta.services.summarizer.summarizer_config import CompactionSettings
# Create a summarizer config with "all" mode
handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
summarizer_config = get_default_compaction_settings(handle)
summarizer_config = CompactionSettings(model=handle)
summarizer_config.mode = "all"
# Create test messages - a simple conversation

View File

@@ -337,7 +337,6 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat
model=summarizer_handle,
model_settings=summarizer_model_settings,
prompt="You are a summarizer.",
prompt_acknowledgement="ack",
clip_chars=2000,
mode="all",
sliding_window_percentage=0.3,
@@ -713,7 +712,6 @@ async def test_create_agent_with_compaction_settings(server: SyncServer, default
model="openai/gpt-4o-mini",
model_settings=model_settings,
prompt="Custom summarization prompt",
prompt_acknowledgement="Acknowledged",
clip_chars=1500,
mode="all",
sliding_window_percentage=0.5,
@@ -742,7 +740,6 @@ async def test_create_agent_with_compaction_settings(server: SyncServer, default
assert created_agent.compaction_settings.clip_chars == 1500
assert created_agent.compaction_settings.sliding_window_percentage == 0.5
assert created_agent.compaction_settings.prompt == "Custom summarization prompt"
assert created_agent.compaction_settings.prompt_acknowledgement == "Acknowledged"
# Clean up
await server.agent_manager.delete_agent_async(agent_id=created_agent.id, actor=default_user)
@@ -764,7 +761,7 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv
model="openai/gpt-4o-mini",
model_settings=model_settings,
prompt="Updated summarization prompt",
prompt_acknowledgement="Updated acknowledgement",
prompt_acknowledgement=False,
clip_chars=3000,
mode="sliding_window",
sliding_window_percentage=0.4,
@@ -783,7 +780,7 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv
assert updated_agent.compaction_settings.clip_chars == 3000
assert updated_agent.compaction_settings.sliding_window_percentage == 0.4
assert updated_agent.compaction_settings.prompt == "Updated summarization prompt"
assert updated_agent.compaction_settings.prompt_acknowledgement == "Updated acknowledgement"
assert updated_agent.compaction_settings.prompt_acknowledgement == False
@pytest.mark.asyncio