Files
letta-server/letta/agents/ephemeral_memory_agent.py

423 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import xml.etree.ElementTree as ET
from typing import AsyncGenerator, Dict, List, Tuple, Union
import openai
from letta.agents.base_agent import BaseAgent
from letta.schemas.agent import AgentState
from letta.schemas.block import BlockUpdate
from letta.schemas.enums import MessageStreamStatus
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
from letta.schemas.letta_message_content import TextContent
from letta.schemas.letta_response import LettaResponse
from letta.schemas.message import MessageCreate
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, SystemMessage, Tool, UserMessage
from letta.schemas.usage import LettaUsageStatistics
from letta.schemas.user import User
from letta.server.rest_api.utils import convert_in_context_letta_messages_to_openai, create_input_messages
from letta.services.agent_manager import AgentManager
from letta.services.block_manager import BlockManager
from letta.services.message_manager import MessageManager
class EphemeralMemoryAgent(BaseAgent):
"""
A stateless agent that helps with offline memory computations.
"""
def __init__(
self,
agent_id: str,
openai_client: openai.AsyncClient,
message_manager: MessageManager,
agent_manager: AgentManager,
block_manager: BlockManager,
target_block_label: str,
message_transcripts: List[str],
actor: User,
):
super().__init__(
agent_id=agent_id,
openai_client=openai_client,
message_manager=message_manager,
agent_manager=agent_manager,
actor=actor,
)
self.block_manager = block_manager
self.target_block_label = target_block_label
self.message_transcripts = message_transcripts
def update_message_transcript(self, message_transcripts: List[str]):
self.message_transcripts = message_transcripts
async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
"""
Process the user's input message, allowing the model to call memory-related tools
until it decides to stop and provide a final response.
"""
agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
in_context_messages = create_input_messages(input_messages=input_messages, agent_id=self.agent_id, actor=self.actor)
openai_messages = convert_in_context_letta_messages_to_openai(in_context_messages, exclude_system_messages=True)
# 1. Store memories
request = self._build_openai_request(
openai_messages, agent_state, tools=self._build_store_memory_tool_schemas(), system=self._get_memory_store_system_prompt()
)
chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
assistant_message = chat_completion.choices[0].message
# Process tool calls
tool_call = assistant_message.tool_calls[0]
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
if function_name == "store_memories":
print("Called store_memories")
print(function_args)
for chunk_args in function_args.get("chunks"):
self.store_memory(agent_state=agent_state, **chunk_args)
result = "Successfully stored memories"
else:
raise ValueError("Error: Unknown tool function '{function_name}'")
openai_messages.append(
{
"role": "assistant",
"content": assistant_message.content,
"tool_calls": [
{
"id": tool_call.id,
"type": "function",
"function": {"name": function_name, "arguments": tool_call.function.arguments},
}
],
}
)
openai_messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(result)})
# 2. Execute rethink block memory loop
human_block_content = self.agent_manager.get_block_with_label(
agent_id=self.agent_id, block_label=self.target_block_label, actor=self.actor
)
rethink_command = f"""
Here is the current memory block created earlier:
### CURRENT MEMORY
{human_block_content}
### END CURRENT MEMORY
Please refine this block:
- Merge in any new facts and remove outdated or contradictory details.
- Organize related information together (e.g., preferences, background, ongoing goals).
- Add any light, supportable inferences that deepen understanding—but do not invent unsupported details.
Use `rethink_user_memor(new_memory)` as many times as you need to iteratively improve the text. When its fully polished and complete, call `finish_rethinking_memory()`.
"""
rethink_command = UserMessage(content=rethink_command)
openai_messages.append(rethink_command.model_dump())
for _ in range(max_steps):
request = self._build_openai_request(
openai_messages, agent_state, tools=self._build_sleeptime_tools(), system=self._get_rethink_memory_system_prompt()
)
chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
assistant_message = chat_completion.choices[0].message
# Process tool calls
tool_call = assistant_message.tool_calls[0]
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
if function_name == "rethink_user_memor":
print("Called rethink_user_memor")
print(function_args)
result = self.rethink_user_memory(agent_state=agent_state, **function_args)
elif function_name == "finish_rethinking_memory":
print("Called finish_rethinking_memory")
break
else:
result = f"Error: Unknown tool function '{function_name}'"
openai_messages.append(
{
"role": "assistant",
"content": assistant_message.content,
"tool_calls": [
{
"id": tool_call.id,
"type": "function",
"function": {"name": function_name, "arguments": tool_call.function.arguments},
}
],
}
)
openai_messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(result)})
# Actually save the memory:
target_block = agent_state.memory.get_block(self.target_block_label)
self.block_manager.update_block(block_id=target_block.id, block_update=BlockUpdate(value=target_block.value), actor=self.actor)
return LettaResponse(messages=[], usage=LettaUsageStatistics())
def _format_messages_llm_friendly(self):
messages = self.message_manager.list_messages_for_agent(agent_id=self.agent_id, actor=self.actor)
llm_friendly_messages = [f"{m.role}: {m.content[0].text}" for m in messages if m.content and isinstance(m.content[0], TextContent)]
return "\n".join(llm_friendly_messages)
def _build_openai_request(
self, openai_messages: List[Dict], agent_state: AgentState, tools: List[Tool], system: str
) -> ChatCompletionRequest:
system_message = SystemMessage(role="system", content=system)
openai_request = ChatCompletionRequest(
model="gpt-4o", # agent_state.llm_config.model, # TODO: Separate config for summarizer?
messages=[system_message] + openai_messages,
tools=tools,
tool_choice="required",
user=self.actor.id,
max_completion_tokens=agent_state.llm_config.max_tokens,
temperature=agent_state.llm_config.temperature,
stream=False,
)
return openai_request
def _build_store_memory_tool_schemas(self) -> List[Tool]:
"""
Build the schemas for the three memory-related tools.
"""
tools = [
Tool(
type="function",
function={
"name": "store_memories",
"description": "Archive coherent chunks of dialogue that will be evicted, preserving raw lines and a brief contextual description.",
"parameters": {
"type": "object",
"properties": {
"chunks": {
"type": "array",
"items": {
"type": "object",
"properties": {
"start_index": {"type": "integer", "description": "Index of first line in original history."},
"end_index": {"type": "integer", "description": "Index of last line in original history."},
"context": {
"type": "string",
"description": "A high-level description providing context for why this chunk matters.",
},
},
"required": ["start_index", "end_index", "context"],
},
}
},
"required": ["chunks"],
"additionalProperties": False,
},
},
),
]
return tools
def _build_sleeptime_tools(self) -> List[Tool]:
tools = [
Tool(
type="function",
function={
"name": "rethink_user_memory",
"description": (
"Rewrite memory block for the main agent, new_memory should contain all current "
"information from the block that is not outdated or inconsistent, integrating any "
"new information, resulting in a new memory block that is organized, readable, and "
"comprehensive."
),
"parameters": {
"type": "object",
"properties": {
"new_memory": {
"type": "string",
"description": (
"The new memory with information integrated from the memory block. "
"If there is no new information, then this should be the same as the "
"content in the source block."
),
},
},
"required": ["new_memory"],
"additionalProperties": False,
},
},
),
Tool(
type="function",
function={
"name": "finish_rethinking_memory",
"description": ("This function is called when the agent is done rethinking the memory."),
"parameters": {
"type": "object",
"properties": {},
"required": [],
"additionalProperties": False,
},
},
),
]
return tools
def rethink_user_memory(self, new_memory: str, agent_state: AgentState) -> str:
if agent_state.memory.get_block(self.target_block_label) is None:
agent_state.memory.create_block(label=self.target_block_label, value=new_memory)
agent_state.memory.update_block_value(label=self.target_block_label, value=new_memory)
return "Successfully updated memory"
def store_memory(self, start_index: int, end_index: int, context: str, agent_state: AgentState) -> str:
"""
Store a memory.
"""
try:
messages = self.message_transcripts[start_index : end_index + 1]
memory = self.serialize(messages, context)
self.agent_manager.passage_manager.insert_passage(
agent_state=agent_state,
agent_id=agent_state.id,
text=memory,
actor=self.actor,
)
self.agent_manager.rebuild_system_prompt(agent_id=agent_state.id, actor=self.actor, force=True)
return "Sucessfully stored memory"
except Exception as e:
return f"Failed to store memory given start_index {start_index} and end_index {end_index}: {e}"
def serialize(self, messages: List[str], context: str) -> str:
"""
Produce an XML document like:
<memory>
<messages>
<message>…</message>
<message>…</message>
</messages>
<context>…</context>
</memory>
"""
root = ET.Element("memory")
msgs_el = ET.SubElement(root, "messages")
for msg in messages:
m = ET.SubElement(msgs_el, "message")
m.text = msg
sum_el = ET.SubElement(root, "context")
sum_el.text = context
# ET.tostring will escape reserved chars for you
return ET.tostring(root, encoding="unicode")
def deserialize(self, xml_str: str) -> Tuple[List[str], str]:
"""
Parse the XML back into (messages, context). Raises ValueError if tags are missing.
"""
try:
root = ET.fromstring(xml_str)
except ET.ParseError as e:
raise ValueError(f"Invalid XML: {e}")
msgs_el = root.find("messages")
if msgs_el is None:
raise ValueError("Missing <messages> section")
messages = []
for m in msgs_el.findall("message"):
# .text may be None if empty, so coerce to empty string
messages.append(m.text or "")
sum_el = root.find("context")
if sum_el is None:
raise ValueError("Missing <context> section")
context = sum_el.text or ""
return messages, context
async def step_stream(
self, input_messages: List[MessageCreate], max_steps: int = 10
) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
"""
This agent is synchronous-only. If called in an async context, raise an error.
"""
raise NotImplementedError("EphemeralMemoryAgent does not support async step.")
# TODO: Move these to independent text files
def _get_memory_store_system_prompt(self) -> str:
return """
You are a memory-recall assistant working asynchronously alongside a main chat agent that retains only a portion of the message history in its context window.
When given a full transcript with lines marked (Older) or (Newer), you should:
1. Segment the (Older) portion into coherent chunks by topic, instruction, or preference.
2. For each chunk, produce only:
- start_index: the first lines index
- end_index: the last lines index
- context: a blurb explaining why this chunk matters
Return exactly one JSON tool call to `store_memories`, consider this miniature example:
---
(Older)
0. user: Okay. Got it. Keep your answers shorter, please.
1. assistant: Sure thing! Ill keep it brief. What would you like to know?
2. user: I like basketball.
3. assistant: That's great! Do you have a favorite team or player?
(Newer)
4. user: Yeah. I like basketball.
5. assistant: Awesome! What do you enjoy most about basketball?
---
Example output:
```json
{
"name": "store_memories",
"arguments": {
"chunks": [
{
"start_index": 0,
"end_index": 1,
"context": "User explicitly asked the assistant to keep responses concise."
},
{
"start_index": 2,
"end_index": 3,
"context": "User enjoys basketball and prompted follow-up about their favorite team or player."
}
]
}
}
```
"""
def _get_rethink_memory_system_prompt(self) -> str:
return """
SYSTEM
You are a Memory-Updater agent. Your job is to iteratively refine the given memory block until its concise, organized, and complete.
Instructions:
- Call `rethink_user_memor(new_memory: string)` as many times as you like. Each call should submit a fully revised version of the block so far.
- When youre fully satisfied, call `finish_rethinking_memory()`.
- Dont output anything else—only the JSON for these tool calls.
Goals:
- Merge in new facts and remove contradictions.
- Group related details (preferences, biography, goals).
- Draw light, supportable inferences without inventing facts.
- Preserve every critical piece of information.
"""