Fix sliding window cutoff logic (#9261)

* fix sliding window cutoff calculations to use agent instead of summarizer config

* allow approval messages with tool_calls as valid cutoffs, prevent approval pairs from being split

* update tests with updated sliding window parameters

---------

Co-authored-by: Amy Guan <amy@letta.com>
This commit is contained in:
amysguan
2026-02-03 22:05:02 -08:00
committed by Caren Thomas
parent b89387bf38
commit 16c96cc3c0
4 changed files with 25 additions and 10 deletions

View File

@@ -192,6 +192,7 @@ async def compact_messages(
summary, compacted_messages = await summarize_via_sliding_window( summary, compacted_messages = await summarize_via_sliding_window(
actor=actor, actor=actor,
llm_config=summarizer_llm_config, llm_config=summarizer_llm_config,
agent_llm_config=agent_llm_config,
summarizer_config=summarizer_config, summarizer_config=summarizer_config,
in_context_messages=messages, in_context_messages=messages,
agent_id=agent_id, agent_id=agent_id,

View File

@@ -93,8 +93,10 @@ async def count_tokens_with_tools(
async def summarize_via_sliding_window( async def summarize_via_sliding_window(
# Required to tag LLM calls # Required to tag LLM calls
actor: User, actor: User,
# Actual summarization configuration # LLM config for the summarizer model (used to generate the summary)
llm_config: LLMConfig, llm_config: LLMConfig,
# LLM config for the agent model (used to determine context window cutoff for eviction)
agent_llm_config: LLMConfig,
summarizer_config: CompactionSettings, summarizer_config: CompactionSettings,
in_context_messages: List[Message], in_context_messages: List[Message],
# Telemetry context # Telemetry context
@@ -107,10 +109,10 @@ async def summarize_via_sliding_window(
If the total tokens is greater than the context window limit (or force=True), If the total tokens is greater than the context window limit (or force=True),
then summarize and rearrange the in-context messages (with the summary in front). then summarize and rearrange the in-context messages (with the summary in front).
Finding the summarization cutoff point (target of final post-summarize count is N% of configured context window): Finding the summarization cutoff point (target of final post-summarize count is N% of agent's context window):
1. Start at a message index cutoff (1-N%) 1. Start at a message index cutoff (1-N%)
2. Count tokens with system prompt, prior summary (if it exists), and messages past cutoff point (messages[0] + messages[cutoff:]) 2. Count tokens with system prompt, prior summary (if it exists), and messages past cutoff point (messages[0] + messages[cutoff:])
3. Is count(post_sum_messages) <= N% of configured context window? 3. Is count(post_sum_messages) <= N% of agent's context window?
3a. Yes -> create new summary with [prior summary, cutoff:], and safety truncate summary with char count 3a. Yes -> create new summary with [prior summary, cutoff:], and safety truncate summary with char count
3b. No -> increment cutoff by 10%, and repeat 3b. No -> increment cutoff by 10%, and repeat
@@ -134,16 +136,24 @@ async def summarize_via_sliding_window(
eviction_percentage = summarizer_config.sliding_window_percentage eviction_percentage = summarizer_config.sliding_window_percentage
assert summarizer_config.sliding_window_percentage <= 1.0, "Sliding window percentage must be less than or equal to 1.0" assert summarizer_config.sliding_window_percentage <= 1.0, "Sliding window percentage must be less than or equal to 1.0"
assistant_message_index = None assistant_message_index = None
approx_token_count = llm_config.context_window
# valid_cutoff_roles = {MessageRole.assistant, MessageRole.approval} goal_tokens = (1 - summarizer_config.sliding_window_percentage) * agent_llm_config.context_window
valid_cutoff_roles = {MessageRole.assistant} approx_token_count = agent_llm_config.context_window
# allow approvals to be cutoffs (for headless agents) but ensure proper grouping with tool calls
def is_valid_cutoff(message: Message):
if message.role == MessageRole.assistant:
return True
if message.role == MessageRole.approval:
return message.tool_calls is not None and len(message.tool_calls) > 0
return False
# simple version: summarize(in_context[1:round(summarizer_config.sliding_window_percentage * len(in_context_messages))]) # simple version: summarize(in_context[1:round(summarizer_config.sliding_window_percentage * len(in_context_messages))])
# this evicts 30% of the messages (via summarization) and keeps the remaining 70% # this evicts 30% of the messages (via summarization) and keeps the remaining 70%
# problem: we need the cutoff point to be an assistant message, so will grow the cutoff point until we find an assistant message # problem: we need the cutoff point to be an assistant message, so will grow the cutoff point until we find an assistant message
# also need to grow the cutoff point until the token count is less than the target token count # also need to grow the cutoff point until the token count is less than the target token count
while approx_token_count >= (1 - summarizer_config.sliding_window_percentage) * llm_config.context_window and eviction_percentage < 1.0: while approx_token_count >= goal_tokens and eviction_percentage < 1.0:
# more eviction percentage # more eviction percentage
eviction_percentage += 0.10 eviction_percentage += 0.10
@@ -155,7 +165,7 @@ async def summarize_via_sliding_window(
( (
i i
for i in reversed(range(1, message_cutoff_index + 1)) for i in reversed(range(1, message_cutoff_index + 1))
if i < len(in_context_messages) and in_context_messages[i].role in valid_cutoff_roles if i < len(in_context_messages) and is_valid_cutoff(in_context_messages[i])
), ),
None, None,
) )
@@ -166,9 +176,9 @@ async def summarize_via_sliding_window(
# update token count # update token count
logger.info(f"Attempting to compact messages index 1:{assistant_message_index} messages") logger.info(f"Attempting to compact messages index 1:{assistant_message_index} messages")
post_summarization_buffer = [system_prompt] + in_context_messages[assistant_message_index:] post_summarization_buffer = [system_prompt] + in_context_messages[assistant_message_index:]
approx_token_count = await count_tokens(actor, llm_config, post_summarization_buffer) approx_token_count = await count_tokens(actor, agent_llm_config, post_summarization_buffer)
logger.info( logger.info(
f"Compacting messages index 1:{assistant_message_index} messages resulted in {approx_token_count} tokens, goal is {(1 - summarizer_config.sliding_window_percentage) * llm_config.context_window}" f"Compacting messages index 1:{assistant_message_index} messages resulted in {approx_token_count} tokens, goal is {goal_tokens}"
) )
if assistant_message_index is None or eviction_percentage >= 1.0: if assistant_message_index is None or eviction_percentage >= 1.0:
@@ -202,6 +212,8 @@ async def summarize_via_sliding_window(
}, },
) )
logger.info(f"\n==================\nSummary message string: {summary_message_str[:100]}\n==================\n")
if summarizer_config.clip_chars is not None and len(summary_message_str) > summarizer_config.clip_chars: if summarizer_config.clip_chars is not None and len(summary_message_str) > summarizer_config.clip_chars:
logger.warning(f"Summary length {len(summary_message_str)} exceeds clip length {summarizer_config.clip_chars}. Truncating.") logger.warning(f"Summary length {len(summary_message_str)} exceeds clip length {summarizer_config.clip_chars}. Truncating.")
summary_message_str = summary_message_str[: summarizer_config.clip_chars] + "... [summary truncated to fit]" summary_message_str = summary_message_str[: summarizer_config.clip_chars] + "... [summary truncated to fit]"

View File

@@ -1157,6 +1157,7 @@ async def test_sliding_window_cutoff_index_does_not_exceed_message_count(server:
summary, remaining_messages = await summarize_via_sliding_window( summary, remaining_messages = await summarize_via_sliding_window(
actor=actor, actor=actor,
llm_config=llm_config, llm_config=llm_config,
agent_llm_config=llm_config, # case where agent and summarizer have same config
summarizer_config=summarizer_config, summarizer_config=summarizer_config,
in_context_messages=messages, in_context_messages=messages,
) )

View File

@@ -210,6 +210,7 @@ class TestSummarizeSlidingWindowTelemetryContext:
await summarizer_sliding_window.summarize_via_sliding_window( await summarizer_sliding_window.summarize_via_sliding_window(
actor=mock_actor, actor=mock_actor,
llm_config=mock_llm_config, llm_config=mock_llm_config,
agent_llm_config=mock_llm_config, # case where agent and summarizer have same config
summarizer_config=mock_compaction_settings, summarizer_config=mock_compaction_settings,
in_context_messages=mock_messages, in_context_messages=mock_messages,
agent_id=agent_id, agent_id=agent_id,