feat: allow for configuration compaction and return message delta (#7378)
This commit is contained in:
committed by
Caren Thomas
parent
82e5d70807
commit
f9f1b1e82d
@@ -8174,6 +8174,23 @@
|
||||
"description": "The ID of the agent in the format 'agent-<uuid4>'"
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionRequest"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Request"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"responses": {
|
||||
"204": {
|
||||
"description": "Successful Response"
|
||||
@@ -24626,6 +24643,23 @@
|
||||
"required": ["code"],
|
||||
"title": "CodeInput"
|
||||
},
|
||||
"CompactionRequest": {
|
||||
"properties": {
|
||||
"compaction_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionSettings-Input"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"title": "CompactionRequest"
|
||||
},
|
||||
"CompactionSettings-Input": {
|
||||
"properties": {
|
||||
"model": {
|
||||
|
||||
@@ -1332,7 +1332,9 @@ class LettaAgentV3(LettaAgentV2):
|
||||
return allowed_tools
|
||||
|
||||
@trace_method
|
||||
async def compact(self, messages, trigger_threshold: Optional[int] = None) -> Message:
|
||||
async def compact(
|
||||
self, messages, trigger_threshold: Optional[int] = None, compaction_settings: Optional["CompactionSettings"] = None
|
||||
) -> Message:
|
||||
"""Compact the current in-context messages for this agent.
|
||||
|
||||
Compaction uses a summarizer LLM configuration derived from
|
||||
@@ -1341,9 +1343,11 @@ class LettaAgentV3(LettaAgentV2):
|
||||
localized to summarization.
|
||||
"""
|
||||
|
||||
# Use agent's compaction_settings if set, otherwise fall back to
|
||||
# global defaults based on the agent's model handle.
|
||||
if self.agent_state.compaction_settings is not None:
|
||||
# Use the passed-in compaction_settings first, then agent's compaction_settings if set,
|
||||
# otherwise fall back to global defaults based on the agent's model handle.
|
||||
if compaction_settings is not None:
|
||||
summarizer_config = compaction_settings
|
||||
elif self.agent_state.compaction_settings is not None:
|
||||
summarizer_config = self.agent_state.compaction_settings
|
||||
else:
|
||||
# Prefer the new handle field if set, otherwise derive from llm_config
|
||||
|
||||
@@ -66,6 +66,7 @@ from letta.server.server import SyncServer
|
||||
from letta.services.lettuce import LettuceClient
|
||||
from letta.services.run_manager import RunManager
|
||||
from letta.services.streaming_service import StreamingService
|
||||
from letta.services.summarizer.summarizer_config import CompactionSettings
|
||||
from letta.settings import settings
|
||||
from letta.utils import is_1_0_sdk_version, safe_create_shielded_task, safe_create_task, truncate_file_visible_content
|
||||
from letta.validators import AgentId, BlockId, FileId, MessageId, SourceId, ToolId
|
||||
@@ -2091,9 +2092,23 @@ async def preview_model_request(
|
||||
)
|
||||
|
||||
|
||||
class CompactionRequest(BaseModel):
|
||||
compaction_settings: Optional[CompactionSettings] = Field(
|
||||
default=None,
|
||||
description="Optional compaction settings to use for this summarization request. If not provided, the agent's default settings will be used.",
|
||||
)
|
||||
|
||||
|
||||
class CompactionResult(BaseModel):
|
||||
summary_message: str
|
||||
num_messages_before: int
|
||||
num_messages_after: int
|
||||
|
||||
|
||||
@router.post("/{agent_id}/summarize", status_code=204, operation_id="summarize_messages")
|
||||
async def summarize_messages(
|
||||
agent_id: AgentId,
|
||||
request: Optional[CompactionRequest] = Body(default=None),
|
||||
server: SyncServer = Depends(get_letta_server),
|
||||
headers: HeaderParams = Depends(get_headers),
|
||||
):
|
||||
@@ -2121,12 +2136,21 @@ async def summarize_messages(
|
||||
if agent_eligible and model_compatible:
|
||||
agent_loop = LettaAgentV3(agent_state=agent, actor=actor)
|
||||
in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
|
||||
compaction_settings = request.compaction_settings if request else None
|
||||
num_messages_before = len(in_context_messages)
|
||||
summary_message, messages = await agent_loop.compact(
|
||||
messages=in_context_messages,
|
||||
compaction_settings=compaction_settings,
|
||||
)
|
||||
num_messages_after = len(messages)
|
||||
|
||||
# update the agent state
|
||||
await agent_loop._checkpoint_messages(run_id=None, step_id=None, new_messages=[summary_message], in_context_messages=messages)
|
||||
return CompactionResult(
|
||||
summary_message=summary_message,
|
||||
num_messages_before=num_messages_before,
|
||||
num_messages_after=num_messages_after,
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
|
||||
Reference in New Issue
Block a user