diff --git a/fern/openapi.json b/fern/openapi.json index fec347dd..8f15541f 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -8174,6 +8174,23 @@ "description": "The ID of the agent in the format 'agent-'" } ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionRequest" + }, + { + "type": "null" + } + ], + "title": "Request" + } + } + } + }, "responses": { "204": { "description": "Successful Response" @@ -24626,6 +24643,23 @@ "required": ["code"], "title": "CodeInput" }, + "CompactionRequest": { + "properties": { + "compaction_settings": { + "anyOf": [ + { + "$ref": "#/components/schemas/CompactionSettings-Input" + }, + { + "type": "null" + } + ], + "description": "Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used." + } + }, + "type": "object", + "title": "CompactionRequest" + }, "CompactionSettings-Input": { "properties": { "model": { diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index cf3e71bb..91fc4d25 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -1332,7 +1332,9 @@ class LettaAgentV3(LettaAgentV2): return allowed_tools @trace_method - async def compact(self, messages, trigger_threshold: Optional[int] = None) -> Message: + async def compact( + self, messages, trigger_threshold: Optional[int] = None, compaction_settings: Optional["CompactionSettings"] = None + ) -> Message: """Compact the current in-context messages for this agent. Compaction uses a summarizer LLM configuration derived from @@ -1341,9 +1343,11 @@ class LettaAgentV3(LettaAgentV2): localized to summarization. """ - # Use agent's compaction_settings if set, otherwise fall back to - # global defaults based on the agent's model handle. - if self.agent_state.compaction_settings is not None: + # Use the passed-in compaction_settings first, then agent's compaction_settings if set, + # otherwise fall back to global defaults based on the agent's model handle. + if compaction_settings is not None: + summarizer_config = compaction_settings + elif self.agent_state.compaction_settings is not None: summarizer_config = self.agent_state.compaction_settings else: # Prefer the new handle field if set, otherwise derive from llm_config diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py index eaee8283..059fc2e8 100644 --- a/letta/server/rest_api/routers/v1/agents.py +++ b/letta/server/rest_api/routers/v1/agents.py @@ -66,6 +66,7 @@ from letta.server.server import SyncServer from letta.services.lettuce import LettuceClient from letta.services.run_manager import RunManager from letta.services.streaming_service import StreamingService +from letta.services.summarizer.summarizer_config import CompactionSettings from letta.settings import settings from letta.utils import is_1_0_sdk_version, safe_create_shielded_task, safe_create_task, truncate_file_visible_content from letta.validators import AgentId, BlockId, FileId, MessageId, SourceId, ToolId @@ -2091,9 +2092,23 @@ async def preview_model_request( ) +class CompactionRequest(BaseModel): + compaction_settings: Optional[CompactionSettings] = Field( + default=None, + description="Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used.", + ) + + +class CompactionResult(BaseModel): + summary_message: str + num_messages_before: int + num_messages_after: int + + @router.post("/{agent_id}/summarize", status_code=204, operation_id="summarize_messages") async def summarize_messages( agent_id: AgentId, + request: Optional[CompactionRequest] = Body(default=None), server: SyncServer = Depends(get_letta_server), headers: HeaderParams = Depends(get_headers), ): @@ -2121,12 +2136,21 @@ async def summarize_messages( if agent_eligible and model_compatible: agent_loop = LettaAgentV3(agent_state=agent, actor=actor) in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor) + compaction_settings = request.compaction_settings if request else None + num_messages_before = len(in_context_messages) summary_message, messages = await agent_loop.compact( messages=in_context_messages, + compaction_settings=compaction_settings, ) + num_messages_after = len(messages) # update the agent state await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages) + return CompactionResult( + summary_message=summary_message, + num_messages_before=num_messages_before, + num_messages_after=num_messages_after, + ) else: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN,