Files
letta-server/letta/memory.py
Charles Packer a4041879a4 feat: add new agent loop (squash rebase of OSS PR) (#4815)
* feat: squash rebase of OSS PR

* fix: revert changes that weren't on manual rebase

* fix: caught another one

* fix: disable force

* chore: drop print

* fix: just stage-api && just publish-api

* fix: make agent_type consistently an arg in the client

* fix: patch multi-modal support

* chore: put in todo stub

* fix: disable hardcoding for tests

* fix: patch validate agent sync (#4882)

patch validate agent sync

* fix: strip bad merge diff

* fix: revert unrelated diff

* fix: react_v2 naming -> letta_v1 naming

* fix: strip bad merge

---------

Co-authored-by: Kevin Lin <klin5061@gmail.com>
2025-10-07 17:50:45 -07:00

109 lines
4.2 KiB
Python

from typing import TYPE_CHECKING, Callable, Dict, List
from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
from letta.llm_api.llm_api_tools import create
from letta.llm_api.llm_client import LLMClient
from letta.otel.tracing import trace_method
from letta.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM
from letta.schemas.agent import AgentState
from letta.schemas.enums import MessageRole
from letta.schemas.letta_message_content import TextContent
from letta.schemas.memory import Memory
from letta.schemas.message import Message
from letta.settings import summarizer_settings
from letta.utils import count_tokens, printd
if TYPE_CHECKING:
from letta.orm import User
def get_memory_functions(cls: Memory) -> Dict[str, Callable]:
"""Get memory functions for a memory class"""
functions = {}
# collect base memory functions (should not be included)
base_functions = []
for func_name in dir(Memory):
funct = getattr(Memory, func_name)
if callable(funct):
base_functions.append(func_name)
for func_name in dir(cls):
if func_name.startswith("_") or func_name in ["load", "to_dict"]: # skip base functions
continue
if func_name in base_functions: # dont use BaseMemory functions
continue
func = getattr(cls, func_name)
if not callable(func): # not a function
continue
functions[func_name] = func
return functions
def _format_summary_history(message_history: List[Message]):
# TODO use existing prompt formatters for this (eg ChatML)
def get_message_text(content):
if content and len(content) == 1 and isinstance(content[0], TextContent):
return content[0].text
return ""
return "\n".join([f"{m.role}: {get_message_text(m.content)}" for m in message_history])
@trace_method
def summarize_messages(
agent_state: AgentState,
message_sequence_to_summarize: List[Message],
actor: "User",
):
"""Summarize a message sequence using GPT"""
# we need the context_window
context_window = agent_state.llm_config.context_window
summary_prompt = SUMMARY_PROMPT_SYSTEM
summary_input = _format_summary_history(message_sequence_to_summarize)
summary_input_tkns = count_tokens(summary_input)
if summary_input_tkns > summarizer_settings.memory_warning_threshold * context_window:
trunc_ratio = (summarizer_settings.memory_warning_threshold * context_window / summary_input_tkns) * 0.8 # For good measure...
cutoff = int(len(message_sequence_to_summarize) * trunc_ratio)
summary_input = str(
[summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff], actor=actor)]
+ message_sequence_to_summarize[cutoff:]
)
dummy_agent_id = agent_state.id
message_sequence = [
Message(agent_id=dummy_agent_id, role=MessageRole.system, content=[TextContent(text=summary_prompt)]),
Message(agent_id=dummy_agent_id, role=MessageRole.assistant, content=[TextContent(text=MESSAGE_SUMMARY_REQUEST_ACK)]),
Message(agent_id=dummy_agent_id, role=MessageRole.user, content=[TextContent(text=summary_input)]),
]
# TODO: We need to eventually have a separate LLM config for the summarizer LLM
llm_config_no_inner_thoughts = agent_state.llm_config.model_copy(deep=True)
llm_config_no_inner_thoughts.put_inner_thoughts_in_kwargs = False
llm_client = LLMClient.create(
provider_type=agent_state.llm_config.model_endpoint_type,
put_inner_thoughts_first=False,
actor=actor,
)
# try to use new client, otherwise fallback to old flow
# TODO: we can just directly call the LLM here?
if llm_client:
response = llm_client.send_llm_request(
agent_type=agent_state.agent_type,
messages=message_sequence,
llm_config=llm_config_no_inner_thoughts,
)
else:
response = create(
llm_config=llm_config_no_inner_thoughts,
user_id=agent_state.created_by_id,
messages=message_sequence,
stream=False,
)
printd(f"summarize_messages gpt reply: {response.choices[0]}")
reply = response.choices[0].message.content
return reply