feat: Add summarization endpoint (#2524)

2025-05-29 13:14:36 -07:00
parent 3dec99e214
commit c6cef4070a
2 changed files with 56 additions and 0 deletions
--- a/letta/agents/letta_agent.py
+++ b/letta/agents/letta_agent.py
@@ -614,6 +614,18 @@ class LettaAgent(BaseAgent):

        return new_in_context_messages

+    @trace_method
+    async def summarize_conversation_history(self) -> AgentState:
+        agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor)
+        message_ids = agent_state.message_ids
+        in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=message_ids, actor=self.actor)
+        new_in_context_messages, updated = self.summarizer.summarize(
+            in_context_messages=in_context_messages, new_letta_messages=[], force=True
+        )
+        return await self.agent_manager.set_in_context_messages_async(
+            agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
+        )
+
    @trace_method
    async def _create_llm_request_data_async(
        self,
--- a/letta/server/rest_api/routers/v1/agents.py
+++ b/letta/server/rest_api/routers/v1/agents.py
@@ -902,3 +902,47 @@ async def list_agent_groups(
    actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
    print("in list agents with manager_type", manager_type)
    return server.agent_manager.list_groups(agent_id=agent_id, manager_type=manager_type, actor=actor)
+
+
+@router.post("/{agent_id}/summarize", response_model=AgentState, operation_id="summarize_agent_conversation")
+async def summarize_agent_conversation(
+    agent_id: str,
+    request_obj: Request,  # FastAPI Request
+    max_message_length: int = Query(..., description="Maximum number of messages to retain after summarization."),
+    server: SyncServer = Depends(get_letta_server),
+    actor_id: Optional[str] = Header(None, alias="user_id"),
+):
+    """
+    Summarize an agent's conversation history to a target message length.
+
+    This endpoint summarizes the current message history for a given agent,
+    truncating and compressing it down to the specified `max_message_length`.
+    """
+    actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
+
+    user_eligible = actor.organization_id not in ["org-4a3af5dd-4c6a-48cb-ac13-3f73ecaaa4bf", "org-4ab3f6e8-9a44-4bee-aeb6-c681cbbc7bf6"]
+    # TODO: This is redundant, remove soon
+    agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
+    agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
+    experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
+    feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
+    model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
+
+    if user_eligible and agent_eligible and feature_enabled and model_compatible:
+        agent = LettaAgent(
+            agent_id=agent_id,
+            message_manager=server.message_manager,
+            agent_manager=server.agent_manager,
+            block_manager=server.block_manager,
+            passage_manager=server.passage_manager,
+            actor=actor,
+            step_manager=server.step_manager,
+            telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
+            message_buffer_min=max_message_length,
+        )
+        return await agent.summarize_conversation_history()
+
+    raise HTTPException(
+        status_code=status.HTTP_403_FORBIDDEN,
+        detail="Summarization is not currently supported for this agent configuration. Please contact Letta support.",
+    )