From 33969d71906e6c9a946fadb6606b9b3a2ffee26d Mon Sep 17 00:00:00 2001 From: amysguan <64990783+amysguan@users.noreply.github.com> Date: Thu, 19 Feb 2026 15:31:13 -0800 Subject: [PATCH] Default to lightweight compaction model instead of agent's model (#9488) --------- Co-authored-by: Amy Guan --- fern/openapi.json | 28 ++++-- letta/server/rest_api/routers/v1/agents.py | 16 ++- letta/services/agent_manager.py | 30 +++++- letta/services/summarizer/compact.py | 31 +++--- .../services/summarizer/summarizer_config.py | 39 +++++--- tests/managers/test_agent_manager.py | 98 +++++++++++++++++-- 6 files changed, 202 insertions(+), 40 deletions(-) diff --git a/fern/openapi.json b/fern/openapi.json index da3d576d..f9638a4c 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -30600,9 +30600,16 @@ "CompactionSettings-Input": { "properties": { "model": { - "type": "string", + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], "title": "Model", - "description": "Model handle to use for summarization (format: provider/model-name)." + "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults." }, "model_settings": { "anyOf": [ @@ -30707,7 +30714,7 @@ }, "mode": { "type": "string", - "enum": ["all", "sliding_window"], + "enum": ["all", "sliding_window", "self"], "title": "Mode", "description": "The type of summarization technique use.", "default": "sliding_window" @@ -30719,16 +30726,22 @@ } }, "type": "object", - "required": ["model"], "title": "CompactionSettings", "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." }, "CompactionSettings-Output": { "properties": { "model": { - "type": "string", + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], "title": "Model", - "description": "Model handle to use for summarization (format: provider/model-name)." + "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults." }, "model_settings": { "anyOf": [ @@ -30833,7 +30846,7 @@ }, "mode": { "type": "string", - "enum": ["all", "sliding_window"], + "enum": ["all", "sliding_window", "self"], "title": "Mode", "description": "The type of summarization technique use.", "default": "sliding_window" @@ -30845,7 +30858,6 @@ } }, "type": "object", - "required": ["model"], "title": "CompactionSettings", "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." }, diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py index 4cc28540..f4b49865 100644 --- a/letta/server/rest_api/routers/v1/agents.py +++ b/letta/server/rest_api/routers/v1/agents.py @@ -2389,7 +2389,21 @@ async def summarize_messages( agent_loop = LettaAgentV3(agent_state=agent, actor=actor) in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor) - compaction_settings = request.compaction_settings if request else None + # Merge request compaction_settings with agent's settings (request overrides agent) + if agent.compaction_settings and request and request.compaction_settings: + # Start with agent's settings, override with new values from request + # Use model_fields_set to get the fields that were changed in the request (want to ignore the defaults that get set automatically) + compaction_settings = agent.compaction_settings + changed_fields = request.compaction_settings.model_fields_set + for field in changed_fields: + setattr(compaction_settings, field, getattr(request.compaction_settings, field)) + + # If mode changed from agent's original settings and prompt not explicitly set in request, then use the default prompt for the new mode + # Ex: previously was sliding_window, now is all, so we need to use the default prompt for all mode + if "mode" in changed_fields and compaction_settings.mode != request.compaction_settings.mode: + compaction_settings = compaction_settings.set_mode_specific_prompt() + else: + compaction_settings = (request and request.compaction_settings) or agent.compaction_settings num_messages_before = len(in_context_messages) summary_message, messages, summary = await agent_loop.compact( messages=in_context_messages, diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py index 6bbc2b59..b167d1db 100644 --- a/letta/services/agent_manager.py +++ b/letta/services/agent_manager.py @@ -489,6 +489,34 @@ class AgentManager: if tool_rules: check_supports_structured_output(model=agent_create.llm_config.model, tool_rules=tool_rules) + # Update agent's compaction settings with defaults if needed + from letta.schemas.enums import ProviderType + from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_summarizer_model + + effective_compaction_settings = agent_create.compaction_settings + # Use provider_name if set, otherwise fall back to model_endpoint_type + provider_name = agent_create.llm_config.provider_name or agent_create.llm_config.model_endpoint_type + + # Convert to ProviderType enum to get default summarizer model + try: + default_model = get_default_summarizer_model(provider_type=ProviderType(provider_name)) + except (ValueError, TypeError): # unknown provider + default_model = None + + # Use agent's model as fallback + if not default_model: + default_model = agent_create.llm_config.model + + if effective_compaction_settings is None: + # If no settings provided, INITIALIZE with default model + effective_compaction_settings = CompactionSettings(model=default_model) + elif effective_compaction_settings is not None and effective_compaction_settings.model is None: + # If settings provided but no model, UPDATE with default model + effective_compaction_settings = effective_compaction_settings.model_copy(update={"model": default_model}) + + # Will set mode-specific default prompt if no prompt is provided + effective_compaction_settings = effective_compaction_settings.set_mode_specific_prompt() + new_agent = AgentModel( name=agent_create.name, system=derive_system_message( @@ -499,7 +527,7 @@ class AgentManager: agent_type=agent_create.agent_type, llm_config=agent_create.llm_config, embedding_config=agent_create.embedding_config, - compaction_settings=agent_create.compaction_settings, + compaction_settings=effective_compaction_settings, organization_id=actor.organization_id, description=agent_create.description, metadata_=agent_create.metadata, diff --git a/letta/services/summarizer/compact.py b/letta/services/summarizer/compact.py index dc340a6e..f3431fca 100644 --- a/letta/services/summarizer/compact.py +++ b/letta/services/summarizer/compact.py @@ -13,7 +13,7 @@ from letta.schemas.message import Message, MessageCreate from letta.schemas.tool import Tool from letta.schemas.user import User from letta.services.summarizer.summarizer_all import summarize_all -from letta.services.summarizer.summarizer_config import CompactionSettings +from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_summarizer_model from letta.services.summarizer.summarizer_sliding_window import ( count_tokens, count_tokens_with_tools, @@ -54,7 +54,21 @@ async def build_summarizer_llm_config( Returns: LLMConfig configured for summarization. """ - # If no summarizer model handle is provided, fall back to the agent's config + from letta.schemas.enums import ProviderType + + # If no summarizer model specified, use lightweight provider-specific defaults + if not summarizer_config.model: + provider_name = agent_llm_config.provider_name or agent_llm_config.model_endpoint_type + try: + provider_type = ProviderType(provider_name) + default_model = get_default_summarizer_model(provider_type=provider_type) + if default_model: + # Use default model + summarizer_config = summarizer_config.model_copy(update={"model": default_model}) + except (ValueError, TypeError): + pass # Unknown provider - will fall back to agent's model below + + # If still no model after defaults, use agent's model if not summarizer_config.model: return agent_llm_config @@ -71,7 +85,6 @@ async def build_summarizer_llm_config( # Check if the summarizer's provider matches the agent's provider # If they match, we can safely use the agent's config as a base # If they don't match, we need to load the default config for the new provider - from letta.schemas.enums import ProviderType provider_matches = False try: @@ -158,19 +171,11 @@ async def compact_messages( CompactResult containing the summary message, compacted messages, summary text, and updated context token estimate. """ - # Determine compaction settings - if compaction_settings is not None: - summarizer_config = compaction_settings - elif agent_model_handle is not None: - summarizer_config = CompactionSettings(model=agent_model_handle) - else: - # Fall back to deriving from llm_config - handle = agent_llm_config.handle or f"{agent_llm_config.model_endpoint_type}/{agent_llm_config.model}" - summarizer_config = CompactionSettings(model=handle) + summarizer_config = compaction_settings if compaction_settings else CompactionSettings() # Build the LLMConfig used for summarization summarizer_llm_config = await build_summarizer_llm_config( - agent_llm_config=agent_llm_config, + agent_llm_config=agent_llm_config, # used to set default compaction model summarizer_config=summarizer_config, actor=actor, ) diff --git a/letta/services/summarizer/summarizer_config.py b/letta/services/summarizer/summarizer_config.py index a2de7372..18e7203b 100644 --- a/letta/services/summarizer/summarizer_config.py +++ b/letta/services/summarizer/summarizer_config.py @@ -1,12 +1,32 @@ from typing import Literal -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field from letta.prompts.summarizer_prompt import ALL_PROMPT, SLIDING_PROMPT +from letta.schemas.enums import ProviderType from letta.schemas.model import ModelSettingsUnion from letta.settings import summarizer_settings +def get_default_summarizer_model(provider_type: ProviderType) -> str | None: + """Get default model for summarization for given provider type.""" + summarizer_defaults = { + ProviderType.anthropic: "anthropic/claude-haiku-4-5", + ProviderType.openai: "openai/gpt-5-mini", + ProviderType.google_ai: "google_ai/gemini-2.0-flash", + } + return summarizer_defaults.get(provider_type) + + +def get_default_prompt_for_mode(mode: Literal["all", "sliding_window"]) -> str: + """Get the default prompt for a given compaction mode. + Also used in /summarize endpoint if mode is changed and prompt is not explicitly set.""" + if mode == "all": + return ALL_PROMPT + else: # sliding_window + return SLIDING_PROMPT + + class CompactionSettings(BaseModel): """Configuration for conversation compaction / summarization. @@ -16,10 +36,10 @@ class CompactionSettings(BaseModel): """ # Summarizer model handle (provider/model-name). - # This is required whenever compaction_settings is provided. - model: str = Field( - ..., - description="Model handle to use for summarization (format: provider/model-name).", + # If None, uses lightweight provider-specific defaults (e.g., haiku for Anthropic, gpt-5-mini for OpenAI). + model: str | None = Field( + default=None, + description="Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults.", ) # Optional provider-specific model settings for the summarizer model @@ -36,18 +56,15 @@ class CompactionSettings(BaseModel): default=50000, description="The maximum length of the summary in characters. If none, no clipping is performed." ) - mode: Literal["all", "sliding_window"] = Field(default="sliding_window", description="The type of summarization technique use.") + mode: Literal["all", "sliding_window", "self"] = Field(default="sliding_window", description="The type of summarization technique use.") sliding_window_percentage: float = Field( default_factory=lambda: summarizer_settings.partial_evict_summarizer_percentage, description="The percentage of the context window to keep post-summarization (only used in sliding window mode).", ) - @model_validator(mode="after") + # Called upon agent creation and if mode is changed in summarize endpoint request def set_mode_specific_prompt(self): """Set mode-specific default prompt if none provided.""" if self.prompt is None: - if self.mode == "all": - self.prompt = ALL_PROMPT - else: # sliding_window - self.prompt = SLIDING_PROMPT + self.prompt = get_default_prompt_for_mode(self.mode) return self diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py index 07d24289..073a5d34 100644 --- a/tests/managers/test_agent_manager.py +++ b/tests/managers/test_agent_manager.py @@ -261,8 +261,8 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat base_llm_config = LLMConfig.default_config("gpt-4o-mini") assert base_llm_config.model == "gpt-4o-mini" - # Configure compaction to use a different summarizer model - summarizer_handle = "openai/gpt-5-mini" + # Configure compaction to use a different summarizer model (!= default openai summarizer model) + summarizer_handle = "openai/gpt-5-nano" summarizer_model_settings = OpenAIModelSettings( max_output_tokens=1234, temperature=0.1, @@ -354,12 +354,101 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat # Summarizer config should use the handle/model from compaction_settings assert summarizer_llm_config.handle == summarizer_handle - assert summarizer_llm_config.model == "gpt-5-mini" + assert summarizer_llm_config.model == "gpt-5-nano" # And should reflect overrides from model_settings assert summarizer_llm_config.max_tokens == 1234 assert summarizer_llm_config.temperature == 0.1 +@pytest.mark.asyncio +async def test_create_agent_sets_default_compaction_model_anthropic(server: SyncServer, default_user): + """When no compaction_settings provided for Anthropic agent, default haiku model should be set.""" + from letta.schemas.agent import CreateAgent + + await server.init_async(init_with_default_org_and_user=True) + + # Upsert base tools + await server.tool_manager.upsert_base_tools_async(actor=default_user) + + # Create agent without compaction_settings using Anthropic LLM + agent = await server.create_agent_async( + CreateAgent( + name="test-default-compaction-anthropic", + model="anthropic/claude-sonnet-4-5-20250929", + # No compaction_settings + ), + actor=default_user, + ) + + # Should have default haiku model set + assert agent.compaction_settings is not None + assert agent.compaction_settings.model == "anthropic/claude-haiku-4-5" + + +@pytest.mark.asyncio +async def test_create_agent_sets_default_compaction_model_openai(server: SyncServer, default_user): + """When no compaction_settings provided for OpenAI agent, default gpt-5-mini model should be set.""" + from letta.schemas.agent import CreateAgent + + await server.init_async(init_with_default_org_and_user=True) + + # Upsert base tools + await server.tool_manager.upsert_base_tools_async(actor=default_user) + + # Create agent without compaction_settings using OpenAI LLM + agent = await server.create_agent_async( + CreateAgent( + name="test-default-compaction-openai", + model="openai/gpt-4o-mini", + # No compaction_settings + ), + actor=default_user, + ) + + # Should have default gpt-5-mini model set + assert agent.compaction_settings is not None + assert agent.compaction_settings.model == "openai/gpt-5-mini" + + +@pytest.mark.asyncio +async def test_create_agent_preserves_compaction_settings_when_model_set(server: SyncServer, default_user): + """When compaction_settings.model is already set, it should not be overwritten.""" + from letta.schemas.agent import CreateAgent + from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning + from letta.services.summarizer.summarizer_config import CompactionSettings + + await server.init_async(init_with_default_org_and_user=True) + + # Upsert base tools + await server.tool_manager.upsert_base_tools_async(actor=default_user) + + summarizer_handle = "gpt-4o-mini" + + summarizer_config = CompactionSettings( + model=summarizer_handle, + model_settings=OpenAIModelSettings(max_output_tokens=1234, temperature=0.1, reasoning=OpenAIReasoning(reasoning_effort="high")), + prompt="You are a summarizer.", + clip_chars=2000, + mode="all", + sliding_window_percentage=0.3, + ) + + # Create agent with explicit compaction_settings model + agent = await server.create_agent_async( + CreateAgent( + name="test-preserve-compaction", + model="openai/gpt-5.2-codex", + compaction_settings=summarizer_config, + ), + actor=default_user, + ) + + # Should preserve the custom model, not override with gpt-5-mini default + assert agent.compaction_settings is not None + assert agent.compaction_settings.model == summarizer_handle + assert agent.compaction_settings.mode == "all" + + @pytest.mark.asyncio async def test_calculate_multi_agent_tools(set_letta_environment): """Test that calculate_multi_agent_tools excludes local-only tools in production.""" @@ -687,9 +776,6 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv """Test that an agent's compaction_settings can be updated""" agent, _ = comprehensive_test_agent_fixture - # Verify initial state (should be None or default) - assert agent.compaction_settings is None - # Create new compaction settings llm_config = LLMConfig.default_config("gpt-4o-mini") model_settings = llm_config._to_model_settings()