* auto fixes * auto fix pt2 and transitive deps and undefined var checking locals() * manual fixes (ignored or letta-code fixed) * fix circular import * remove all ignores, add FastAPI rules and Ruff rules * add ty and precommit * ruff stuff * ty check fixes * ty check fixes pt 2 * error on invalid
83 lines
3.4 KiB
Python
83 lines
3.4 KiB
Python
from typing import List, Optional
|
|
|
|
from letta.log import get_logger
|
|
from letta.otel.tracing import trace_method
|
|
from letta.schemas.llm_config import LLMConfig
|
|
from letta.schemas.message import Message, MessageRole
|
|
from letta.schemas.user import User
|
|
from letta.services.summarizer.summarizer import simple_summary
|
|
from letta.services.summarizer.summarizer_config import CompactionSettings
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
@trace_method
|
|
async def summarize_all(
|
|
# Required to tag LLM calls
|
|
actor: User,
|
|
# LLM config for the summarizer model
|
|
llm_config: LLMConfig,
|
|
# Actual summarization configuration
|
|
summarizer_config: CompactionSettings,
|
|
in_context_messages: List[Message],
|
|
# Telemetry context
|
|
agent_id: Optional[str] = None,
|
|
agent_tags: Optional[List[str]] = None,
|
|
run_id: Optional[str] = None,
|
|
step_id: Optional[str] = None,
|
|
) -> str:
|
|
"""
|
|
Summarize the entire conversation history into a single summary.
|
|
|
|
Returns:
|
|
- The summary string
|
|
"""
|
|
logger.info(
|
|
f"Summarizing all messages (index 1 to {len(in_context_messages) - 2}), keeping last message: {in_context_messages[-1].role}"
|
|
)
|
|
if in_context_messages[-1].role == MessageRole.approval:
|
|
# cannot evict a pending approval request (will cause client-side errors)
|
|
# Also protect the assistant message before it if they share the same step_id
|
|
# (both are part of the same LLM response - assistant has thinking/tool_calls, approval has approval-required subset)
|
|
protected_messages = [in_context_messages[-1]]
|
|
|
|
# Check if the message before approval is also from the same step (has reasoning/tool_calls)
|
|
if len(in_context_messages) >= 2:
|
|
potential_assistant = in_context_messages[-2]
|
|
approval_request = in_context_messages[-1]
|
|
if potential_assistant.role == MessageRole.assistant and potential_assistant.step_id == approval_request.step_id:
|
|
# They're part of the same LLM response - protect both
|
|
protected_messages = [potential_assistant, approval_request]
|
|
messages_to_summarize = in_context_messages[1:-2]
|
|
else:
|
|
messages_to_summarize = in_context_messages[1:-1]
|
|
else:
|
|
messages_to_summarize = in_context_messages[1:-1]
|
|
else:
|
|
messages_to_summarize = in_context_messages[1:]
|
|
protected_messages = []
|
|
|
|
# TODO: add fallback in case this has a context window error
|
|
summary_message_str = await simple_summary(
|
|
messages=messages_to_summarize,
|
|
llm_config=llm_config,
|
|
actor=actor,
|
|
include_ack=bool(summarizer_config.prompt_acknowledgement),
|
|
prompt=summarizer_config.prompt,
|
|
agent_id=agent_id,
|
|
agent_tags=agent_tags,
|
|
run_id=run_id,
|
|
step_id=step_id,
|
|
compaction_settings={
|
|
"mode": "summarize_all",
|
|
"clip_chars": summarizer_config.clip_chars,
|
|
},
|
|
)
|
|
logger.info(f"Summarized {len(messages_to_summarize)} messages")
|
|
|
|
if summarizer_config.clip_chars is not None and len(summary_message_str) > summarizer_config.clip_chars:
|
|
logger.warning(f"Summary length {len(summary_message_str)} exceeds clip length {summarizer_config.clip_chars}. Truncating.")
|
|
summary_message_str = summary_message_str[: summarizer_config.clip_chars] + "... [summary truncated to fit]"
|
|
|
|
return summary_message_str, [in_context_messages[0], *protected_messages]
|