From 33969d71906e6c9a946fadb6606b9b3a2ffee26d Mon Sep 17 00:00:00 2001
From: amysguan <64990783+amysguan@users.noreply.github.com>
Date: Thu, 19 Feb 2026 15:31:13 -0800
Subject: [PATCH] Default to lightweight compaction model instead of agent's
 model (#9488)

---------

Co-authored-by: Amy Guan <amy@letta.com>
---
 fern/openapi.json                             | 28 ++++--
 letta/server/rest_api/routers/v1/agents.py    | 16 ++-
 letta/services/agent_manager.py               | 30 +++++-
 letta/services/summarizer/compact.py          | 31 +++---
 .../services/summarizer/summarizer_config.py  | 39 +++++---
 tests/managers/test_agent_manager.py          | 98 +++++++++++++++++--
 6 files changed, 202 insertions(+), 40 deletions(-)

diff --git a/fern/openapi.json b/fern/openapi.json
index da3d576d..f9638a4c 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -30600,9 +30600,16 @@
       "CompactionSettings-Input": {
         "properties": {
           "model": {
-            "type": "string",
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
             "title": "Model",
-            "description": "Model handle to use for summarization (format: provider/model-name)."
+            "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
           },
           "model_settings": {
             "anyOf": [
@@ -30707,7 +30714,7 @@
           },
           "mode": {
             "type": "string",
-            "enum": ["all", "sliding_window"],
+            "enum": ["all", "sliding_window", "self"],
             "title": "Mode",
             "description": "The type of summarization technique use.",
             "default": "sliding_window"
@@ -30719,16 +30726,22 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "CompactionSettings",
         "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
       },
       "CompactionSettings-Output": {
         "properties": {
           "model": {
-            "type": "string",
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
             "title": "Model",
-            "description": "Model handle to use for summarization (format: provider/model-name)."
+            "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
           },
           "model_settings": {
             "anyOf": [
@@ -30833,7 +30846,7 @@
           },
           "mode": {
             "type": "string",
-            "enum": ["all", "sliding_window"],
+            "enum": ["all", "sliding_window", "self"],
             "title": "Mode",
             "description": "The type of summarization technique use.",
             "default": "sliding_window"
@@ -30845,7 +30858,6 @@
           }
         },
         "type": "object",
-        "required": ["model"],
         "title": "CompactionSettings",
         "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
       },
diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py
index 4cc28540..f4b49865 100644
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -2389,7 +2389,21 @@ async def summarize_messages(
 
     agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
     in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
-    compaction_settings = request.compaction_settings if request else None
+    # Merge request compaction_settings with agent's settings (request overrides agent)
+    if agent.compaction_settings and request and request.compaction_settings:
+        # Start with agent's settings, override with new values from request
+        # Use model_fields_set to get the fields that were changed in the request (want to ignore the defaults that get set automatically)
+        compaction_settings = agent.compaction_settings
+        changed_fields = request.compaction_settings.model_fields_set
+        for field in changed_fields:
+            setattr(compaction_settings, field, getattr(request.compaction_settings, field))
+
+        # If mode changed from agent's original settings and prompt not explicitly set in request, then use the default prompt for the new mode
+        # Ex: previously was sliding_window, now is all, so we need to use the default prompt for all mode
+        if "mode" in changed_fields and compaction_settings.mode != request.compaction_settings.mode:
+            compaction_settings = compaction_settings.set_mode_specific_prompt()
+    else:
+        compaction_settings = (request and request.compaction_settings) or agent.compaction_settings
     num_messages_before = len(in_context_messages)
     summary_message, messages, summary = await agent_loop.compact(
         messages=in_context_messages,
diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py
index 6bbc2b59..b167d1db 100644
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -489,6 +489,34 @@ class AgentManager:
                 if tool_rules:
                     check_supports_structured_output(model=agent_create.llm_config.model, tool_rules=tool_rules)
 
+                # Update agent's compaction settings with defaults if needed
+                from letta.schemas.enums import ProviderType
+                from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_summarizer_model
+
+                effective_compaction_settings = agent_create.compaction_settings
+                # Use provider_name if set, otherwise fall back to model_endpoint_type
+                provider_name = agent_create.llm_config.provider_name or agent_create.llm_config.model_endpoint_type
+
+                # Convert to ProviderType enum to get default summarizer model
+                try:
+                    default_model = get_default_summarizer_model(provider_type=ProviderType(provider_name))
+                except (ValueError, TypeError):  # unknown provider
+                    default_model = None
+
+                # Use agent's model as fallback
+                if not default_model:
+                    default_model = agent_create.llm_config.model
+
+                if effective_compaction_settings is None:
+                    # If no settings provided, INITIALIZE with default model
+                    effective_compaction_settings = CompactionSettings(model=default_model)
+                elif effective_compaction_settings is not None and effective_compaction_settings.model is None:
+                    # If settings provided but no model, UPDATE with default model
+                    effective_compaction_settings = effective_compaction_settings.model_copy(update={"model": default_model})
+
+                # Will set mode-specific default prompt if no prompt is provided
+                effective_compaction_settings = effective_compaction_settings.set_mode_specific_prompt()
+
                 new_agent = AgentModel(
                     name=agent_create.name,
                     system=derive_system_message(
@@ -499,7 +527,7 @@ class AgentManager:
                     agent_type=agent_create.agent_type,
                     llm_config=agent_create.llm_config,
                     embedding_config=agent_create.embedding_config,
-                    compaction_settings=agent_create.compaction_settings,
+                    compaction_settings=effective_compaction_settings,
                     organization_id=actor.organization_id,
                     description=agent_create.description,
                     metadata_=agent_create.metadata,
diff --git a/letta/services/summarizer/compact.py b/letta/services/summarizer/compact.py
index dc340a6e..f3431fca 100644
--- a/letta/services/summarizer/compact.py
+++ b/letta/services/summarizer/compact.py
@@ -13,7 +13,7 @@ from letta.schemas.message import Message, MessageCreate
 from letta.schemas.tool import Tool
 from letta.schemas.user import User
 from letta.services.summarizer.summarizer_all import summarize_all
-from letta.services.summarizer.summarizer_config import CompactionSettings
+from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_summarizer_model
 from letta.services.summarizer.summarizer_sliding_window import (
     count_tokens,
     count_tokens_with_tools,
@@ -54,7 +54,21 @@ async def build_summarizer_llm_config(
     Returns:
         LLMConfig configured for summarization.
     """
-    # If no summarizer model handle is provided, fall back to the agent's config
+    from letta.schemas.enums import ProviderType
+
+    # If no summarizer model specified, use lightweight provider-specific defaults
+    if not summarizer_config.model:
+        provider_name = agent_llm_config.provider_name or agent_llm_config.model_endpoint_type
+        try:
+            provider_type = ProviderType(provider_name)
+            default_model = get_default_summarizer_model(provider_type=provider_type)
+            if default_model:
+                # Use default model
+                summarizer_config = summarizer_config.model_copy(update={"model": default_model})
+        except (ValueError, TypeError):
+            pass  # Unknown provider - will fall back to agent's model below
+
+    # If still no model after defaults, use agent's model
     if not summarizer_config.model:
         return agent_llm_config
 
@@ -71,7 +85,6 @@ async def build_summarizer_llm_config(
         # Check if the summarizer's provider matches the agent's provider
         # If they match, we can safely use the agent's config as a base
         # If they don't match, we need to load the default config for the new provider
-        from letta.schemas.enums import ProviderType
 
         provider_matches = False
         try:
@@ -158,19 +171,11 @@ async def compact_messages(
         CompactResult containing the summary message, compacted messages, summary text,
         and updated context token estimate.
     """
-    # Determine compaction settings
-    if compaction_settings is not None:
-        summarizer_config = compaction_settings
-    elif agent_model_handle is not None:
-        summarizer_config = CompactionSettings(model=agent_model_handle)
-    else:
-        # Fall back to deriving from llm_config
-        handle = agent_llm_config.handle or f"{agent_llm_config.model_endpoint_type}/{agent_llm_config.model}"
-        summarizer_config = CompactionSettings(model=handle)
+    summarizer_config = compaction_settings if compaction_settings else CompactionSettings()
 
     # Build the LLMConfig used for summarization
     summarizer_llm_config = await build_summarizer_llm_config(
-        agent_llm_config=agent_llm_config,
+        agent_llm_config=agent_llm_config,  # used to set default compaction model
         summarizer_config=summarizer_config,
         actor=actor,
     )
diff --git a/letta/services/summarizer/summarizer_config.py b/letta/services/summarizer/summarizer_config.py
index a2de7372..18e7203b 100644
--- a/letta/services/summarizer/summarizer_config.py
+++ b/letta/services/summarizer/summarizer_config.py
@@ -1,12 +1,32 @@
 from typing import Literal
 
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Field
 
 from letta.prompts.summarizer_prompt import ALL_PROMPT, SLIDING_PROMPT
+from letta.schemas.enums import ProviderType
 from letta.schemas.model import ModelSettingsUnion
 from letta.settings import summarizer_settings
 
 
+def get_default_summarizer_model(provider_type: ProviderType) -> str | None:
+    """Get default model for summarization for given provider type."""
+    summarizer_defaults = {
+        ProviderType.anthropic: "anthropic/claude-haiku-4-5",
+        ProviderType.openai: "openai/gpt-5-mini",
+        ProviderType.google_ai: "google_ai/gemini-2.0-flash",
+    }
+    return summarizer_defaults.get(provider_type)
+
+
+def get_default_prompt_for_mode(mode: Literal["all", "sliding_window"]) -> str:
+    """Get the default prompt for a given compaction mode.
+    Also used in /summarize endpoint if mode is changed and prompt is not explicitly set."""
+    if mode == "all":
+        return ALL_PROMPT
+    else:  # sliding_window
+        return SLIDING_PROMPT
+
+
 class CompactionSettings(BaseModel):
     """Configuration for conversation compaction / summarization.
 
@@ -16,10 +36,10 @@ class CompactionSettings(BaseModel):
     """
 
     # Summarizer model handle (provider/model-name).
-    # This is required whenever compaction_settings is provided.
-    model: str = Field(
-        ...,
-        description="Model handle to use for summarization (format: provider/model-name).",
+    # If None, uses lightweight provider-specific defaults (e.g., haiku for Anthropic, gpt-5-mini for OpenAI).
+    model: str | None = Field(
+        default=None,
+        description="Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults.",
     )
 
     # Optional provider-specific model settings for the summarizer model
@@ -36,18 +56,15 @@ class CompactionSettings(BaseModel):
         default=50000, description="The maximum length of the summary in characters. If none, no clipping is performed."
     )
 
-    mode: Literal["all", "sliding_window"] = Field(default="sliding_window", description="The type of summarization technique use.")
+    mode: Literal["all", "sliding_window", "self"] = Field(default="sliding_window", description="The type of summarization technique use.")
     sliding_window_percentage: float = Field(
         default_factory=lambda: summarizer_settings.partial_evict_summarizer_percentage,
         description="The percentage of the context window to keep post-summarization (only used in sliding window mode).",
     )
 
-    @model_validator(mode="after")
+    # Called upon agent creation and if mode is changed in summarize endpoint request
     def set_mode_specific_prompt(self):
         """Set mode-specific default prompt if none provided."""
         if self.prompt is None:
-            if self.mode == "all":
-                self.prompt = ALL_PROMPT
-            else:  # sliding_window
-                self.prompt = SLIDING_PROMPT
+            self.prompt = get_default_prompt_for_mode(self.mode)
         return self
diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py
index 07d24289..073a5d34 100644
--- a/tests/managers/test_agent_manager.py
+++ b/tests/managers/test_agent_manager.py
@@ -261,8 +261,8 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat
     base_llm_config = LLMConfig.default_config("gpt-4o-mini")
     assert base_llm_config.model == "gpt-4o-mini"
 
-    # Configure compaction to use a different summarizer model
-    summarizer_handle = "openai/gpt-5-mini"
+    # Configure compaction to use a different summarizer model (!= default openai summarizer model)
+    summarizer_handle = "openai/gpt-5-nano"
     summarizer_model_settings = OpenAIModelSettings(
         max_output_tokens=1234,
         temperature=0.1,
@@ -354,12 +354,101 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat
 
     # Summarizer config should use the handle/model from compaction_settings
     assert summarizer_llm_config.handle == summarizer_handle
-    assert summarizer_llm_config.model == "gpt-5-mini"
+    assert summarizer_llm_config.model == "gpt-5-nano"
     # And should reflect overrides from model_settings
     assert summarizer_llm_config.max_tokens == 1234
     assert summarizer_llm_config.temperature == 0.1
 
 
+@pytest.mark.asyncio
+async def test_create_agent_sets_default_compaction_model_anthropic(server: SyncServer, default_user):
+    """When no compaction_settings provided for Anthropic agent, default haiku model should be set."""
+    from letta.schemas.agent import CreateAgent
+
+    await server.init_async(init_with_default_org_and_user=True)
+
+    # Upsert base tools
+    await server.tool_manager.upsert_base_tools_async(actor=default_user)
+
+    # Create agent without compaction_settings using Anthropic LLM
+    agent = await server.create_agent_async(
+        CreateAgent(
+            name="test-default-compaction-anthropic",
+            model="anthropic/claude-sonnet-4-5-20250929",
+            # No compaction_settings
+        ),
+        actor=default_user,
+    )
+
+    # Should have default haiku model set
+    assert agent.compaction_settings is not None
+    assert agent.compaction_settings.model == "anthropic/claude-haiku-4-5"
+
+
+@pytest.mark.asyncio
+async def test_create_agent_sets_default_compaction_model_openai(server: SyncServer, default_user):
+    """When no compaction_settings provided for OpenAI agent, default gpt-5-mini model should be set."""
+    from letta.schemas.agent import CreateAgent
+
+    await server.init_async(init_with_default_org_and_user=True)
+
+    # Upsert base tools
+    await server.tool_manager.upsert_base_tools_async(actor=default_user)
+
+    # Create agent without compaction_settings using OpenAI LLM
+    agent = await server.create_agent_async(
+        CreateAgent(
+            name="test-default-compaction-openai",
+            model="openai/gpt-4o-mini",
+            # No compaction_settings
+        ),
+        actor=default_user,
+    )
+
+    # Should have default gpt-5-mini model set
+    assert agent.compaction_settings is not None
+    assert agent.compaction_settings.model == "openai/gpt-5-mini"
+
+
+@pytest.mark.asyncio
+async def test_create_agent_preserves_compaction_settings_when_model_set(server: SyncServer, default_user):
+    """When compaction_settings.model is already set, it should not be overwritten."""
+    from letta.schemas.agent import CreateAgent
+    from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning
+    from letta.services.summarizer.summarizer_config import CompactionSettings
+
+    await server.init_async(init_with_default_org_and_user=True)
+
+    # Upsert base tools
+    await server.tool_manager.upsert_base_tools_async(actor=default_user)
+
+    summarizer_handle = "gpt-4o-mini"
+
+    summarizer_config = CompactionSettings(
+        model=summarizer_handle,
+        model_settings=OpenAIModelSettings(max_output_tokens=1234, temperature=0.1, reasoning=OpenAIReasoning(reasoning_effort="high")),
+        prompt="You are a summarizer.",
+        clip_chars=2000,
+        mode="all",
+        sliding_window_percentage=0.3,
+    )
+
+    # Create agent with explicit compaction_settings model
+    agent = await server.create_agent_async(
+        CreateAgent(
+            name="test-preserve-compaction",
+            model="openai/gpt-5.2-codex",
+            compaction_settings=summarizer_config,
+        ),
+        actor=default_user,
+    )
+
+    # Should preserve the custom model, not override with gpt-5-mini default
+    assert agent.compaction_settings is not None
+    assert agent.compaction_settings.model == summarizer_handle
+    assert agent.compaction_settings.mode == "all"
+
+
 @pytest.mark.asyncio
 async def test_calculate_multi_agent_tools(set_letta_environment):
     """Test that calculate_multi_agent_tools excludes local-only tools in production."""
@@ -687,9 +776,6 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv
     """Test that an agent's compaction_settings can be updated"""
     agent, _ = comprehensive_test_agent_fixture
 
-    # Verify initial state (should be None or default)
-    assert agent.compaction_settings is None
-
     # Create new compaction settings
     llm_config = LLMConfig.default_config("gpt-4o-mini")
     model_settings = llm_config._to_model_settings()