Default to lightweight compaction model instead of agent's model (#9488)

---------

Co-authored-by: Amy Guan <amy@letta.com>
This commit is contained in:
amysguan
2026-02-19 15:31:13 -08:00
committed by Caren Thomas
parent eb4a0daabd
commit 33969d7190
6 changed files with 202 additions and 40 deletions

View File

@@ -30600,9 +30600,16 @@
"CompactionSettings-Input": { "CompactionSettings-Input": {
"properties": { "properties": {
"model": { "model": {
"type": "string", "anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Model", "title": "Model",
"description": "Model handle to use for summarization (format: provider/model-name)." "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
}, },
"model_settings": { "model_settings": {
"anyOf": [ "anyOf": [
@@ -30707,7 +30714,7 @@
}, },
"mode": { "mode": {
"type": "string", "type": "string",
"enum": ["all", "sliding_window"], "enum": ["all", "sliding_window", "self"],
"title": "Mode", "title": "Mode",
"description": "The type of summarization technique use.", "description": "The type of summarization technique use.",
"default": "sliding_window" "default": "sliding_window"
@@ -30719,16 +30726,22 @@
} }
}, },
"type": "object", "type": "object",
"required": ["model"],
"title": "CompactionSettings", "title": "CompactionSettings",
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
}, },
"CompactionSettings-Output": { "CompactionSettings-Output": {
"properties": { "properties": {
"model": { "model": {
"type": "string", "anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Model", "title": "Model",
"description": "Model handle to use for summarization (format: provider/model-name)." "description": "Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults."
}, },
"model_settings": { "model_settings": {
"anyOf": [ "anyOf": [
@@ -30833,7 +30846,7 @@
}, },
"mode": { "mode": {
"type": "string", "type": "string",
"enum": ["all", "sliding_window"], "enum": ["all", "sliding_window", "self"],
"title": "Mode", "title": "Mode",
"description": "The type of summarization technique use.", "description": "The type of summarization technique use.",
"default": "sliding_window" "default": "sliding_window"
@@ -30845,7 +30858,6 @@
} }
}, },
"type": "object", "type": "object",
"required": ["model"],
"title": "CompactionSettings", "title": "CompactionSettings",
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
}, },

View File

@@ -2389,7 +2389,21 @@ async def summarize_messages(
agent_loop = LettaAgentV3(agent_state=agent, actor=actor) agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor) in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
compaction_settings = request.compaction_settings if request else None # Merge request compaction_settings with agent's settings (request overrides agent)
if agent.compaction_settings and request and request.compaction_settings:
# Start with agent's settings, override with new values from request
# Use model_fields_set to get the fields that were changed in the request (want to ignore the defaults that get set automatically)
compaction_settings = agent.compaction_settings
changed_fields = request.compaction_settings.model_fields_set
for field in changed_fields:
setattr(compaction_settings, field, getattr(request.compaction_settings, field))
# If mode changed from agent's original settings and prompt not explicitly set in request, then use the default prompt for the new mode
# Ex: previously was sliding_window, now is all, so we need to use the default prompt for all mode
if "mode" in changed_fields and compaction_settings.mode != request.compaction_settings.mode:
compaction_settings = compaction_settings.set_mode_specific_prompt()
else:
compaction_settings = (request and request.compaction_settings) or agent.compaction_settings
num_messages_before = len(in_context_messages) num_messages_before = len(in_context_messages)
summary_message, messages, summary = await agent_loop.compact( summary_message, messages, summary = await agent_loop.compact(
messages=in_context_messages, messages=in_context_messages,

View File

@@ -489,6 +489,34 @@ class AgentManager:
if tool_rules: if tool_rules:
check_supports_structured_output(model=agent_create.llm_config.model, tool_rules=tool_rules) check_supports_structured_output(model=agent_create.llm_config.model, tool_rules=tool_rules)
# Update agent's compaction settings with defaults if needed
from letta.schemas.enums import ProviderType
from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_summarizer_model
effective_compaction_settings = agent_create.compaction_settings
# Use provider_name if set, otherwise fall back to model_endpoint_type
provider_name = agent_create.llm_config.provider_name or agent_create.llm_config.model_endpoint_type
# Convert to ProviderType enum to get default summarizer model
try:
default_model = get_default_summarizer_model(provider_type=ProviderType(provider_name))
except (ValueError, TypeError): # unknown provider
default_model = None
# Use agent's model as fallback
if not default_model:
default_model = agent_create.llm_config.model
if effective_compaction_settings is None:
# If no settings provided, INITIALIZE with default model
effective_compaction_settings = CompactionSettings(model=default_model)
elif effective_compaction_settings is not None and effective_compaction_settings.model is None:
# If settings provided but no model, UPDATE with default model
effective_compaction_settings = effective_compaction_settings.model_copy(update={"model": default_model})
# Will set mode-specific default prompt if no prompt is provided
effective_compaction_settings = effective_compaction_settings.set_mode_specific_prompt()
new_agent = AgentModel( new_agent = AgentModel(
name=agent_create.name, name=agent_create.name,
system=derive_system_message( system=derive_system_message(
@@ -499,7 +527,7 @@ class AgentManager:
agent_type=agent_create.agent_type, agent_type=agent_create.agent_type,
llm_config=agent_create.llm_config, llm_config=agent_create.llm_config,
embedding_config=agent_create.embedding_config, embedding_config=agent_create.embedding_config,
compaction_settings=agent_create.compaction_settings, compaction_settings=effective_compaction_settings,
organization_id=actor.organization_id, organization_id=actor.organization_id,
description=agent_create.description, description=agent_create.description,
metadata_=agent_create.metadata, metadata_=agent_create.metadata,

View File

@@ -13,7 +13,7 @@ from letta.schemas.message import Message, MessageCreate
from letta.schemas.tool import Tool from letta.schemas.tool import Tool
from letta.schemas.user import User from letta.schemas.user import User
from letta.services.summarizer.summarizer_all import summarize_all from letta.services.summarizer.summarizer_all import summarize_all
from letta.services.summarizer.summarizer_config import CompactionSettings from letta.services.summarizer.summarizer_config import CompactionSettings, get_default_summarizer_model
from letta.services.summarizer.summarizer_sliding_window import ( from letta.services.summarizer.summarizer_sliding_window import (
count_tokens, count_tokens,
count_tokens_with_tools, count_tokens_with_tools,
@@ -54,7 +54,21 @@ async def build_summarizer_llm_config(
Returns: Returns:
LLMConfig configured for summarization. LLMConfig configured for summarization.
""" """
# If no summarizer model handle is provided, fall back to the agent's config from letta.schemas.enums import ProviderType
# If no summarizer model specified, use lightweight provider-specific defaults
if not summarizer_config.model:
provider_name = agent_llm_config.provider_name or agent_llm_config.model_endpoint_type
try:
provider_type = ProviderType(provider_name)
default_model = get_default_summarizer_model(provider_type=provider_type)
if default_model:
# Use default model
summarizer_config = summarizer_config.model_copy(update={"model": default_model})
except (ValueError, TypeError):
pass # Unknown provider - will fall back to agent's model below
# If still no model after defaults, use agent's model
if not summarizer_config.model: if not summarizer_config.model:
return agent_llm_config return agent_llm_config
@@ -71,7 +85,6 @@ async def build_summarizer_llm_config(
# Check if the summarizer's provider matches the agent's provider # Check if the summarizer's provider matches the agent's provider
# If they match, we can safely use the agent's config as a base # If they match, we can safely use the agent's config as a base
# If they don't match, we need to load the default config for the new provider # If they don't match, we need to load the default config for the new provider
from letta.schemas.enums import ProviderType
provider_matches = False provider_matches = False
try: try:
@@ -158,19 +171,11 @@ async def compact_messages(
CompactResult containing the summary message, compacted messages, summary text, CompactResult containing the summary message, compacted messages, summary text,
and updated context token estimate. and updated context token estimate.
""" """
# Determine compaction settings summarizer_config = compaction_settings if compaction_settings else CompactionSettings()
if compaction_settings is not None:
summarizer_config = compaction_settings
elif agent_model_handle is not None:
summarizer_config = CompactionSettings(model=agent_model_handle)
else:
# Fall back to deriving from llm_config
handle = agent_llm_config.handle or f"{agent_llm_config.model_endpoint_type}/{agent_llm_config.model}"
summarizer_config = CompactionSettings(model=handle)
# Build the LLMConfig used for summarization # Build the LLMConfig used for summarization
summarizer_llm_config = await build_summarizer_llm_config( summarizer_llm_config = await build_summarizer_llm_config(
agent_llm_config=agent_llm_config, agent_llm_config=agent_llm_config, # used to set default compaction model
summarizer_config=summarizer_config, summarizer_config=summarizer_config,
actor=actor, actor=actor,
) )

View File

@@ -1,12 +1,32 @@
from typing import Literal from typing import Literal
from pydantic import BaseModel, Field, model_validator from pydantic import BaseModel, Field
from letta.prompts.summarizer_prompt import ALL_PROMPT, SLIDING_PROMPT from letta.prompts.summarizer_prompt import ALL_PROMPT, SLIDING_PROMPT
from letta.schemas.enums import ProviderType
from letta.schemas.model import ModelSettingsUnion from letta.schemas.model import ModelSettingsUnion
from letta.settings import summarizer_settings from letta.settings import summarizer_settings
def get_default_summarizer_model(provider_type: ProviderType) -> str | None:
"""Get default model for summarization for given provider type."""
summarizer_defaults = {
ProviderType.anthropic: "anthropic/claude-haiku-4-5",
ProviderType.openai: "openai/gpt-5-mini",
ProviderType.google_ai: "google_ai/gemini-2.0-flash",
}
return summarizer_defaults.get(provider_type)
def get_default_prompt_for_mode(mode: Literal["all", "sliding_window"]) -> str:
"""Get the default prompt for a given compaction mode.
Also used in /summarize endpoint if mode is changed and prompt is not explicitly set."""
if mode == "all":
return ALL_PROMPT
else: # sliding_window
return SLIDING_PROMPT
class CompactionSettings(BaseModel): class CompactionSettings(BaseModel):
"""Configuration for conversation compaction / summarization. """Configuration for conversation compaction / summarization.
@@ -16,10 +36,10 @@ class CompactionSettings(BaseModel):
""" """
# Summarizer model handle (provider/model-name). # Summarizer model handle (provider/model-name).
# This is required whenever compaction_settings is provided. # If None, uses lightweight provider-specific defaults (e.g., haiku for Anthropic, gpt-5-mini for OpenAI).
model: str = Field( model: str | None = Field(
..., default=None,
description="Model handle to use for summarization (format: provider/model-name).", description="Model handle to use for summarization (format: provider/model-name). If None, uses lightweight provider-specific defaults.",
) )
# Optional provider-specific model settings for the summarizer model # Optional provider-specific model settings for the summarizer model
@@ -36,18 +56,15 @@ class CompactionSettings(BaseModel):
default=50000, description="The maximum length of the summary in characters. If none, no clipping is performed." default=50000, description="The maximum length of the summary in characters. If none, no clipping is performed."
) )
mode: Literal["all", "sliding_window"] = Field(default="sliding_window", description="The type of summarization technique use.") mode: Literal["all", "sliding_window", "self"] = Field(default="sliding_window", description="The type of summarization technique use.")
sliding_window_percentage: float = Field( sliding_window_percentage: float = Field(
default_factory=lambda: summarizer_settings.partial_evict_summarizer_percentage, default_factory=lambda: summarizer_settings.partial_evict_summarizer_percentage,
description="The percentage of the context window to keep post-summarization (only used in sliding window mode).", description="The percentage of the context window to keep post-summarization (only used in sliding window mode).",
) )
@model_validator(mode="after") # Called upon agent creation and if mode is changed in summarize endpoint request
def set_mode_specific_prompt(self): def set_mode_specific_prompt(self):
"""Set mode-specific default prompt if none provided.""" """Set mode-specific default prompt if none provided."""
if self.prompt is None: if self.prompt is None:
if self.mode == "all": self.prompt = get_default_prompt_for_mode(self.mode)
self.prompt = ALL_PROMPT
else: # sliding_window
self.prompt = SLIDING_PROMPT
return self return self

View File

@@ -261,8 +261,8 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat
base_llm_config = LLMConfig.default_config("gpt-4o-mini") base_llm_config = LLMConfig.default_config("gpt-4o-mini")
assert base_llm_config.model == "gpt-4o-mini" assert base_llm_config.model == "gpt-4o-mini"
# Configure compaction to use a different summarizer model # Configure compaction to use a different summarizer model (!= default openai summarizer model)
summarizer_handle = "openai/gpt-5-mini" summarizer_handle = "openai/gpt-5-nano"
summarizer_model_settings = OpenAIModelSettings( summarizer_model_settings = OpenAIModelSettings(
max_output_tokens=1234, max_output_tokens=1234,
temperature=0.1, temperature=0.1,
@@ -354,12 +354,101 @@ async def test_compaction_settings_model_uses_separate_llm_config_for_summarizat
# Summarizer config should use the handle/model from compaction_settings # Summarizer config should use the handle/model from compaction_settings
assert summarizer_llm_config.handle == summarizer_handle assert summarizer_llm_config.handle == summarizer_handle
assert summarizer_llm_config.model == "gpt-5-mini" assert summarizer_llm_config.model == "gpt-5-nano"
# And should reflect overrides from model_settings # And should reflect overrides from model_settings
assert summarizer_llm_config.max_tokens == 1234 assert summarizer_llm_config.max_tokens == 1234
assert summarizer_llm_config.temperature == 0.1 assert summarizer_llm_config.temperature == 0.1
@pytest.mark.asyncio
async def test_create_agent_sets_default_compaction_model_anthropic(server: SyncServer, default_user):
"""When no compaction_settings provided for Anthropic agent, default haiku model should be set."""
from letta.schemas.agent import CreateAgent
await server.init_async(init_with_default_org_and_user=True)
# Upsert base tools
await server.tool_manager.upsert_base_tools_async(actor=default_user)
# Create agent without compaction_settings using Anthropic LLM
agent = await server.create_agent_async(
CreateAgent(
name="test-default-compaction-anthropic",
model="anthropic/claude-sonnet-4-5-20250929",
# No compaction_settings
),
actor=default_user,
)
# Should have default haiku model set
assert agent.compaction_settings is not None
assert agent.compaction_settings.model == "anthropic/claude-haiku-4-5"
@pytest.mark.asyncio
async def test_create_agent_sets_default_compaction_model_openai(server: SyncServer, default_user):
"""When no compaction_settings provided for OpenAI agent, default gpt-5-mini model should be set."""
from letta.schemas.agent import CreateAgent
await server.init_async(init_with_default_org_and_user=True)
# Upsert base tools
await server.tool_manager.upsert_base_tools_async(actor=default_user)
# Create agent without compaction_settings using OpenAI LLM
agent = await server.create_agent_async(
CreateAgent(
name="test-default-compaction-openai",
model="openai/gpt-4o-mini",
# No compaction_settings
),
actor=default_user,
)
# Should have default gpt-5-mini model set
assert agent.compaction_settings is not None
assert agent.compaction_settings.model == "openai/gpt-5-mini"
@pytest.mark.asyncio
async def test_create_agent_preserves_compaction_settings_when_model_set(server: SyncServer, default_user):
"""When compaction_settings.model is already set, it should not be overwritten."""
from letta.schemas.agent import CreateAgent
from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning
from letta.services.summarizer.summarizer_config import CompactionSettings
await server.init_async(init_with_default_org_and_user=True)
# Upsert base tools
await server.tool_manager.upsert_base_tools_async(actor=default_user)
summarizer_handle = "gpt-4o-mini"
summarizer_config = CompactionSettings(
model=summarizer_handle,
model_settings=OpenAIModelSettings(max_output_tokens=1234, temperature=0.1, reasoning=OpenAIReasoning(reasoning_effort="high")),
prompt="You are a summarizer.",
clip_chars=2000,
mode="all",
sliding_window_percentage=0.3,
)
# Create agent with explicit compaction_settings model
agent = await server.create_agent_async(
CreateAgent(
name="test-preserve-compaction",
model="openai/gpt-5.2-codex",
compaction_settings=summarizer_config,
),
actor=default_user,
)
# Should preserve the custom model, not override with gpt-5-mini default
assert agent.compaction_settings is not None
assert agent.compaction_settings.model == summarizer_handle
assert agent.compaction_settings.mode == "all"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_calculate_multi_agent_tools(set_letta_environment): async def test_calculate_multi_agent_tools(set_letta_environment):
"""Test that calculate_multi_agent_tools excludes local-only tools in production.""" """Test that calculate_multi_agent_tools excludes local-only tools in production."""
@@ -687,9 +776,6 @@ async def test_update_agent_compaction_settings(server: SyncServer, comprehensiv
"""Test that an agent's compaction_settings can be updated""" """Test that an agent's compaction_settings can be updated"""
agent, _ = comprehensive_test_agent_fixture agent, _ = comprehensive_test_agent_fixture
# Verify initial state (should be None or default)
assert agent.compaction_settings is None
# Create new compaction settings # Create new compaction settings
llm_config = LLMConfig.default_config("gpt-4o-mini") llm_config = LLMConfig.default_config("gpt-4o-mini")
model_settings = llm_config._to_model_settings() model_settings = llm_config._to_model_settings()