feat: allow for configuration compaction and return message delta (#7378)

This commit is contained in:
Sarah Wooders
2025-12-17 17:58:41 -08:00
committed by Caren Thomas
parent 82e5d70807
commit f9f1b1e82d
3 changed files with 66 additions and 4 deletions

View File

@@ -8174,6 +8174,23 @@
"description": "The ID of the agent in the format 'agent-<uuid4>'"
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"anyOf": [
{
"$ref": "#/components/schemas/CompactionRequest"
},
{
"type": "null"
}
],
"title": "Request"
}
}
}
},
"responses": {
"204": {
"description": "Successful Response"
@@ -24626,6 +24643,23 @@
"required": ["code"],
"title": "CodeInput"
},
"CompactionRequest": {
"properties": {
"compaction_settings": {
"anyOf": [
{
"$ref": "#/components/schemas/CompactionSettings-Input"
},
{
"type": "null"
}
],
"description": "Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used."
}
},
"type": "object",
"title": "CompactionRequest"
},
"CompactionSettings-Input": {
"properties": {
"model": {

View File

@@ -1332,7 +1332,9 @@ class LettaAgentV3(LettaAgentV2):
return allowed_tools
@trace_method
async def compact(self, messages, trigger_threshold: Optional[int] = None) -> Message:
async def compact(
self, messages, trigger_threshold: Optional[int] = None, compaction_settings: Optional["CompactionSettings"] = None
) -> Message:
"""Compact the current in-context messages for this agent.
Compaction uses a summarizer LLM configuration derived from
@@ -1341,9 +1343,11 @@ class LettaAgentV3(LettaAgentV2):
localized to summarization.
"""
# Use agent's compaction_settings if set, otherwise fall back to
# global defaults based on the agent's model handle.
if self.agent_state.compaction_settings is not None:
# Use the passed-in compaction_settings first, then agent's compaction_settings if set,
# otherwise fall back to global defaults based on the agent's model handle.
if compaction_settings is not None:
summarizer_config = compaction_settings
elif self.agent_state.compaction_settings is not None:
summarizer_config = self.agent_state.compaction_settings
else:
# Prefer the new handle field if set, otherwise derive from llm_config

View File

@@ -66,6 +66,7 @@ from letta.server.server import SyncServer
from letta.services.lettuce import LettuceClient
from letta.services.run_manager import RunManager
from letta.services.streaming_service import StreamingService
from letta.services.summarizer.summarizer_config import CompactionSettings
from letta.settings import settings
from letta.utils import is_1_0_sdk_version, safe_create_shielded_task, safe_create_task, truncate_file_visible_content
from letta.validators import AgentId, BlockId, FileId, MessageId, SourceId, ToolId
@@ -2091,9 +2092,23 @@ async def preview_model_request(
)
class CompactionRequest(BaseModel):
compaction_settings: Optional[CompactionSettings] = Field(
default=None,
description="Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used.",
)
class CompactionResult(BaseModel):
summary_message: str
num_messages_before: int
num_messages_after: int
@router.post("/{agent_id}/summarize", status_code=204, operation_id="summarize_messages")
async def summarize_messages(
agent_id: AgentId,
request: Optional[CompactionRequest] = Body(default=None),
server: SyncServer = Depends(get_letta_server),
headers: HeaderParams = Depends(get_headers),
):
@@ -2121,12 +2136,21 @@ async def summarize_messages(
if agent_eligible and model_compatible:
agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
compaction_settings = request.compaction_settings if request else None
num_messages_before = len(in_context_messages)
summary_message, messages = await agent_loop.compact(
messages=in_context_messages,
compaction_settings=compaction_settings,
)
num_messages_after = len(messages)
# update the agent state
await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
return CompactionResult(
summary_message=summary_message,
num_messages_before=num_messages_before,
num_messages_after=num_messages_after,
)
else:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,