Default to lightweight compaction model instead of agent's model (#9488)

--------- Co-authored-by: Amy Guan <amy@letta.com>
2026-02-19 15:31:13 -08:00
parent eb4a0daabd
commit 33969d7190
6 changed files with 202 additions and 40 deletions
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -30600,9 +30600,16 @@
      "CompactionSettings-Input": {
        "properties": {
          "model": {
-            "type": "string",
+            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "title": "Model",
-            "description": "Model handle to use for summarization (format: provider/model-name)."
+            "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
          },
          "model_settings": {
            "anyOf": [
@@ -30707,7 +30714,7 @@
          },
          "mode": {
            "type": "string",
-            "enum": ["all", "sliding_window"],
+            "enum": ["all", "sliding_window", "self"],
            "title": "Mode",
            "description": "The type of summarization technique use.",
            "default": "sliding_window"
@@ -30719,16 +30726,22 @@
          }
        },
        "type": "object",
        "required": ["model"],
        "title": "CompactionSettings",
        "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
      },
      "CompactionSettings-Output": {
        "properties": {
          "model": {
-            "type": "string",
+            "anyOf": [
              {
                "type": "string"
              },
              {
                "type": "null"
              }
            ],
            "title": "Model",
-            "description": "Model handle to use for summarization (format: provider/model-name)."
+            "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
          },
          "model_settings": {
            "anyOf": [
@@ -30833,7 +30846,7 @@
          },
          "mode": {
            "type": "string",
-            "enum": ["all", "sliding_window"],
+            "enum": ["all", "sliding_window", "self"],
            "title": "Mode",
            "description": "The type of summarization technique use.",
            "default": "sliding_window"
@@ -30845,7 +30858,6 @@
          }
        },
        "type": "object",
        "required": ["model"],
        "title": "CompactionSettings",
        "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
      },
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -2389,7 +2389,21 @@ async def summarize_messages(
    agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
    in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
-    compaction_settings = request.compaction_settings if request else None
+    # Merge request compaction_settings with agent's settings (request overrides agent)
    if agent.compaction_settings and request and request.compaction_settings:
        # Start with agent's settings, override with new values from request
        # Use model_fields_set to get the fields that were changed in the request (want to ignore the defaults that get set automatically)
        compaction_settings = agent.compaction_settings
        changed_fields = request.compaction_settings.model_fields_set
        for field in changed_fields:
            setattr(compaction_settings, field, getattr(request.compaction_settings, field))
        # If mode changed from agent's original settings and prompt not explicitly set in request, then use the default prompt for the new mode
        # Ex: previously was sliding_window, now is all, so we need to use the default prompt for all mode
        if "mode" in changed_fields and compaction_settings.mode != request.compaction_settings.mode:
            compaction_settings = compaction_settings.set_mode_specific_prompt()
    else:
        compaction_settings = (request and request.compaction_settings) or agent.compaction_settings
    num_messages_before = len(in_context_messages)
    summary_message, messages, summary = await agent_loop.compact(
        messages=in_context_messages,
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -489,6 +489,34 @@ class AgentManager:
                if tool_rules:
                    check_supports_structured_output(model=agent_create.llm_config.model, tool_rules=tool_rules)
                # Update agent's compaction settings with defaults if needed
                from letta.schemas.enums import ProviderType
                from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_summarizer_model
                effective_compaction_settings = agent_create.compaction_settings
                # Use provider_name if set, otherwise fall back to model_endpoint_type
                provider_name = agent_create.llm_config.provider_name or agent_create.llm_config.model_endpoint_type
                # Convert to ProviderType enum to get default summarizer model
                try:
                    default_model = get_default_summarizer_model(provider_type=ProviderType(provider_name))
                except (ValueError, TypeError):  # unknown provider
                    default_model = None
                # Use agent's model as fallback
                if not default_model:
                    default_model = agent_create.llm_config.model
                if effective_compaction_settings is None:
                    # If no settings provided, INITIALIZE with default model
                    effective_compaction_settings = CompactionSettings(model=default_model)
                elif effective_compaction_settings is not None and effective_compaction_settings.model is None:
                    # If settings provided but no model, UPDATE with default model
                    effective_compaction_settings = effective_compaction_settings.model_copy(update={"model": default_model})
                # Will set mode-specific default prompt if no prompt is provided
                effective_compaction_settings = effective_compaction_settings.set_mode_specific_prompt()
                new_agent = AgentModel(
                    name=agent_create.name,
                    system=derive_system_message(
@@ -499,7 +527,7 @@ class AgentManager:
                    agent_type=agent_create.agent_type,
                    llm_config=agent_create.llm_config,
                    embedding_config=agent_create.embedding_config,
-                    compaction_settings=agent_create.compaction_settings,
+                    compaction_settings=effective_compaction_settings,
                    organization_id=actor.organization_id,
                    description=agent_create.description,
                    metadata_=agent_create.metadata,
--- a/letta/services/summarizer/compact.py
+++ b/letta/services/summarizer/compact.py
@@ -13,7 +13,7 @@ from letta.schemas.message import Message, MessageCreate
 from letta.schemas.tool import Tool
 from letta.schemas.user import User
 from letta.services.summarizer.summarizer_all import summarize_all
-from letta.services.summarizer.summarizer_config import CompactionSettings
+from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_summarizer_model
 from letta.services.summarizer.summarizer_sliding_window import (
    count_tokens,
    count_tokens_with_tools,
@@ -54,7 +54,21 @@ async def build_summarizer_llm_config(
    Returns:
        LLMConfig configured for summarization.
    """
-    # If no summarizer model handle is provided, fall back to the agent's config
+    from letta.schemas.enums import ProviderType
    # If no summarizer model specified, use lightweight provider-specific defaults
    if not summarizer_config.model:
        provider_name = agent_llm_config.provider_name or agent_llm_config.model_endpoint_type
        try:
            provider_type = ProviderType(provider_name)
            default_model = get_default_summarizer_model(provider_type=provider_type)
            if default_model:
                # Use default model
                summarizer_config = summarizer_config.model_copy(update={"model": default_model})
        except (ValueError, TypeError):
            pass  # Unknown provider - will fall back to agent's model below
    # If still no model after defaults, use agent's model
    if not summarizer_config.model:
        return agent_llm_config
@@ -71,7 +85,6 @@ async def build_summarizer_llm_config(
        # Check if the summarizer's provider matches the agent's provider
        # If they match, we can safely use the agent's config as a base
        # If they don't match, we need to load the default config for the new provider
        from letta.schemas.enums import ProviderType
        provider_matches = False
        try:
@@ -158,19 +171,11 @@ async def compact_messages(
        CompactResult containing the summary message, compacted messages, summary text,
        and updated context token estimate.
    """
-    # Determine compaction settings
+    summarizer_config = compaction_settings if compaction_settings else CompactionSettings()
    if compaction_settings is not None:
        summarizer_config = compaction_settings
    elif agent_model_handle is not None:
        summarizer_config = CompactionSettings(model=agent_model_handle)
    else:
        # Fall back to deriving from llm_config
        handle = agent_llm_config.handle or f"{agent_llm_config.model_endpoint_type}/{agent_llm_config.model}"
        summarizer_config = CompactionSettings(model=handle)
    # Build the LLMConfig used for summarization
    summarizer_llm_config = await build_summarizer_llm_config(
-        agent_llm_config=agent_llm_config,
+        agent_llm_config=agent_llm_config,  # used to set default compaction model
        summarizer_config=summarizer_config,
        actor=actor,
    )
--- a/letta/services/summarizer/summarizer_config.py
+++ b/letta/services/summarizer/summarizer_config.py
@@ -1,12 +1,32 @@
 from typing import Literal
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Field
 from letta.prompts.summarizer_prompt import ALL_PROMPT, SLIDING_PROMPT
 from letta.schemas.enums import ProviderType
 from letta.schemas.model import ModelSettingsUnion
 from letta.settings import summarizer_settings
 def get_default_summarizer_model(provider_type: ProviderType) -> str | None:
    """Get default model for summarization for given provider type."""
    summarizer_defaults = {
        ProviderType.anthropic: "anthropic/claude-haiku-4-5",
        ProviderType.openai: "openai/gpt-5-mini",
        ProviderType.google_ai: "google_ai/gemini-2.0-flash",
    }
    return summarizer_defaults.get(provider_type)
 def get_default_prompt_for_mode(mode: Literal["all", "sliding_window"]) -> str:
    """Get the default prompt for a given compaction mode.
    Also used in /summarize endpoint if mode is changed and prompt is not explicitly set."""
    if mode == "all":
        return ALL_PROMPT
    else:  # sliding_window
        return SLIDING_PROMPT
 class CompactionSettings(BaseModel):
    """Configuration for conversation compaction / summarization.
@@ -16,10 +36,10 @@ class CompactionSettings(BaseModel):
    """
    # Summarizer model handle (provider/model-name).
-    # This is required whenever compaction_settings is provided.
+    # If None, uses lightweight provider-specific defaults (e.g., haiku for Anthropic, gpt-5-mini for OpenAI).
-    model: str = Field(
+    model: str | None = Field(
-        ...,
+        default=None,
-        description="Model handle to use for summarization (format: provider/model-name).",
+        description="Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults.",
    )
    # Optional provider-specific model settings for the summarizer model
@@ -36,18 +56,15 @@ class CompactionSettings(BaseModel):
        default=50000, description="The maximum length of the summary in characters. If none, no clipping is performed."
    )
-    mode: Literal["all", "sliding_window"] = Field(default="sliding_window", description="The type of summarization technique use.")
+    mode: Literal["all", "sliding_window", "self"] = Field(default="sliding_window", description="The type of summarization technique use.")
    sliding_window_percentage: float = Field(
        default_factory=lambda: summarizer_settings.partial_evict_summarizer_percentage,
        description="The percentage of the context window to keep post-summarization (only used in sliding window mode).",
    )
-    @model_validator(mode="after")
+    # Called upon agent creation and if mode is changed in summarize endpoint request
    def set_mode_specific_prompt(self):
        """Set mode-specific default prompt if none provided."""
        if self.prompt is None:
-            if self.mode == "all":
+            self.prompt = get_default_prompt_for_mode(self.mode)
                self.prompt = ALL_PROMPT
            else:  # sliding_window
                self.prompt = SLIDING_PROMPT
        return self
--- a/tests/managers/test_agent_manager.py
+++ b/tests/managers/test_agent_manager.py
@@ -261,8 +261,8 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat
    base_llm_config = LLMConfig.default_config("gpt-4o-mini")
    assert base_llm_config.model == "gpt-4o-mini"
-    # Configure compaction to use a different summarizer model
+    # Configure compaction to use a different summarizer model (!= default openai summarizer model)
-    summarizer_handle = "openai/gpt-5-mini"
+    summarizer_handle = "openai/gpt-5-nano"
    summarizer_model_settings = OpenAIModelSettings(
        max_output_tokens=1234,
        temperature=0.1,
@@ -354,12 +354,101 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat
    # Summarizer config should use the handle/model from compaction_settings
    assert summarizer_llm_config.handle == summarizer_handle
-    assert summarizer_llm_config.model == "gpt-5-mini"
+    assert summarizer_llm_config.model == "gpt-5-nano"
    # And should reflect overrides from model_settings
    assert summarizer_llm_config.max_tokens == 1234
    assert summarizer_llm_config.temperature == 0.1
@pytest.mark.asyncio
 async def test_create_agent_sets_default_compaction_model_anthropic(server: SyncServer, default_user):
    """When no compaction_settings provided for Anthropic agent, default haiku model should be set."""
    from letta.schemas.agent import CreateAgent
    await server.init_async(init_with_default_org_and_user=True)
    # Upsert base tools
    await server.tool_manager.upsert_base_tools_async(actor=default_user)
    # Create agent without compaction_settings using Anthropic LLM
    agent = await server.create_agent_async(
        CreateAgent(
            name="test-default-compaction-anthropic",
            model="anthropic/claude-sonnet-4-5-20250929",
            # No compaction_settings
        ),
        actor=default_user,
    )
    # Should have default haiku model set
    assert agent.compaction_settings is not None
    assert agent.compaction_settings.model == "anthropic/claude-haiku-4-5"
@pytest.mark.asyncio
 async def test_create_agent_sets_default_compaction_model_openai(server: SyncServer, default_user):
    """When no compaction_settings provided for OpenAI agent, default gpt-5-mini model should be set."""
    from letta.schemas.agent import CreateAgent
    await server.init_async(init_with_default_org_and_user=True)
    # Upsert base tools
    await server.tool_manager.upsert_base_tools_async(actor=default_user)
    # Create agent without compaction_settings using OpenAI LLM
    agent = await server.create_agent_async(
        CreateAgent(
            name="test-default-compaction-openai",
            model="openai/gpt-4o-mini",
            # No compaction_settings
        ),
        actor=default_user,
    )
    # Should have default gpt-5-mini model set
    assert agent.compaction_settings is not None
    assert agent.compaction_settings.model == "openai/gpt-5-mini"
@pytest.mark.asyncio
 async def test_create_agent_preserves_compaction_settings_when_model_set(server: SyncServer, default_user):
    """When compaction_settings.model is already set, it should not be overwritten."""
    from letta.schemas.agent import CreateAgent
    from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning
    from letta.services.summarizer.summarizer_config import CompactionSettings
    await server.init_async(init_with_default_org_and_user=True)
    # Upsert base tools
    await server.tool_manager.upsert_base_tools_async(actor=default_user)
    summarizer_handle = "gpt-4o-mini"
    summarizer_config = CompactionSettings(
        model=summarizer_handle,
        model_settings=OpenAIModelSettings(max_output_tokens=1234, temperature=0.1, reasoning=OpenAIReasoning(reasoning_effort="high")),
        prompt="You are a summarizer.",
        clip_chars=2000,
        mode="all",
        sliding_window_percentage=0.3,
    )
    # Create agent with explicit compaction_settings model
    agent = await server.create_agent_async(
        CreateAgent(
            name="test-preserve-compaction",
            model="openai/gpt-5.2-codex",
            compaction_settings=summarizer_config,
        ),
        actor=default_user,
    )
    # Should preserve the custom model, not override with gpt-5-mini default
    assert agent.compaction_settings is not None
    assert agent.compaction_settings.model == summarizer_handle
    assert agent.compaction_settings.mode == "all"
@pytest.mark.asyncio
 async def test_calculate_multi_agent_tools(set_letta_environment):
    """Test that calculate_multi_agent_tools excludes local-only tools in production."""
@@ -687,9 +776,6 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv
    """Test that an agent's compaction_settings can be updated"""
    agent, _ = comprehensive_test_agent_fixture
    # Verify initial state (should be None or default)
    assert agent.compaction_settings is None
    # Create new compaction settings
    llm_config = LLMConfig.default_config("gpt-4o-mini")
    model_settings = llm_config._to_model_settings()