Add modes self and self_sliding_window for prompt caching (#9372)

* add self compaction method with proper caching (pass in tools, don't refresh sys prompt beforehand) + sliding fallback

* updated prompts for self compaction

* add tests for self, self_sliding_window modes and w/o refresh messages before compaction

* add cache logging to summarization

* better handling to prevent agent from continuing convo on self modes

* if mode changes via summarize endpoint, will use default prompt for the new mode

---------

Co-authored-by: Amy Guan <amy@letta.com>
This commit is contained in:
amysguan
2026-02-24 10:15:36 -08:00
committed by Caren Thomas
parent 47d55362a4
commit 47b0c87ebe
15 changed files with 1065 additions and 223 deletions

View File

@@ -31151,7 +31151,7 @@
}
],
"title": "Model",
"description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
"description": "Model handle to use for sliding_window/all summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
},
"model_settings": {
"anyOf": [
@@ -31256,7 +31256,12 @@
},
"mode": {
"type": "string",
"enum": ["all", "sliding_window", "self"],
"enum": [
"all",
"sliding_window",
"self_compact_all",
"self_compact_sliding_window"
],
"title": "Mode",
"description": "The type of summarization technique use.",
"default": "sliding_window"
@@ -31264,12 +31269,12 @@
"sliding_window_percentage": {
"type": "number",
"title": "Sliding Window Percentage",
"description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)."
"description": "The percentage of the context window to keep post-summarization (only used in sliding window modes)."
}
},
"type": "object",
"title": "CompactionSettings",
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
"description": "Configuration for conversation compaction / summarization.\n\nPer-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
},
"CompactionSettings-Output": {
"properties": {
@@ -31283,7 +31288,7 @@
}
],
"title": "Model",
"description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
"description": "Model handle to use for sliding_window/all summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
},
"model_settings": {
"anyOf": [
@@ -31388,7 +31393,12 @@
},
"mode": {
"type": "string",
"enum": ["all", "sliding_window", "self"],
"enum": [
"all",
"sliding_window",
"self_compact_all",
"self_compact_sliding_window"
],
"title": "Mode",
"description": "The type of summarization technique use.",
"default": "sliding_window"
@@ -31396,12 +31406,12 @@
"sliding_window_percentage": {
"type": "number",
"title": "Sliding Window Percentage",
"description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)."
"description": "The percentage of the context window to keep post-summarization (only used in sliding window modes)."
}
},
"type": "object",
"title": "CompactionSettings",
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
"description": "Configuration for conversation compaction / summarization.\n\nPer-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
},
"CompactionStats": {
"properties": {