Add modes self and self_sliding_window for prompt caching (#9372)

* add self compaction method with proper caching (pass in tools, don't refresh sys prompt beforehand) + sliding fallback * updated prompts for self compaction * add tests for self, self_sliding_window modes and w/o refresh messages before compaction * add cache logging to summarization * better handling to prevent agent from continuing convo on self modes * if mode changes via summarize endpoint, will use default prompt for the new mode --------- Co-authored-by: Amy Guan <amy@letta.com>
2026-02-24 10:15:36 -08:00
parent 47d55362a4
commit 47b0c87ebe
15 changed files with 1065 additions and 223 deletions
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -31151,7 +31151,7 @@
              }
            ],
            "title": "Model",
-            "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
+            "description": "Model handle to use for sliding_window/all summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
          },
          "model_settings": {
            "anyOf": [
@@ -31256,7 +31256,12 @@
          },
          "mode": {
            "type": "string",
-            "enum": ["all", "sliding_window", "self"],
+            "enum": [
+              "all",
+              "sliding_window",
+              "self_compact_all",
+              "self_compact_sliding_window"
+            ],
            "title": "Mode",
            "description": "The type of summarization technique use.",
            "default": "sliding_window"
@@ -31264,12 +31269,12 @@
          "sliding_window_percentage": {
            "type": "number",
            "title": "Sliding Window Percentage",
-            "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)."
+            "description": "The percentage of the context window to keep post-summarization (only used in sliding window modes)."
          }
        },
        "type": "object",
        "title": "CompactionSettings",
-        "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
+        "description": "Configuration for conversation compaction / summarization.\n\nPer-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
      },
      "CompactionSettings-Output": {
        "properties": {
@@ -31283,7 +31288,7 @@
              }
            ],
            "title": "Model",
-            "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
+            "description": "Model handle to use for sliding_window/all summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
          },
          "model_settings": {
            "anyOf": [
@@ -31388,7 +31393,12 @@
          },
          "mode": {
            "type": "string",
-            "enum": ["all", "sliding_window", "self"],
+            "enum": [
+              "all",
+              "sliding_window",
+              "self_compact_all",
+              "self_compact_sliding_window"
+            ],
            "title": "Mode",
            "description": "The type of summarization technique use.",
            "default": "sliding_window"
@@ -31396,12 +31406,12 @@
          "sliding_window_percentage": {
            "type": "number",
            "title": "Sliding Window Percentage",
-            "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode)."
+            "description": "The percentage of the context window to keep post-summarization (only used in sliding window modes)."
          }
        },
        "type": "object",
        "title": "CompactionSettings",
-        "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
+        "description": "Configuration for conversation compaction / summarization.\n\nPer-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
      },
      "CompactionStats": {
        "properties": {