feat: add prompt generator for on the fly system prompt generation (#4060)

This commit is contained in:
cthomas
2025-08-20 15:49:40 -07:00
committed by GitHub
parent d0ddc5545c
commit 395faf3ed8
6 changed files with 203 additions and 213 deletions

View File

@@ -42,6 +42,7 @@ from letta.log import get_logger
from letta.memory import summarize_messages
from letta.orm import User
from letta.otel.tracing import log_event, trace_method
from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
from letta.schemas.block import BlockUpdate
from letta.schemas.embedding_config import EmbeddingConfig
@@ -59,7 +60,7 @@ from letta.schemas.tool_rule import TerminalToolRule
from letta.schemas.usage import LettaUsageStatistics
from letta.services.agent_manager import AgentManager
from letta.services.block_manager import BlockManager
from letta.services.helpers.agent_manager_helper import check_supports_structured_output, compile_memory_metadata_block
from letta.services.helpers.agent_manager_helper import check_supports_structured_output
from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
from letta.services.job_manager import JobManager
from letta.services.mcp.base_client import AsyncBaseMCPClient
@@ -1246,7 +1247,7 @@ class Agent(BaseAgent):
agent_manager_passage_size = self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id)
message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id)
external_memory_summary = compile_memory_metadata_block(
external_memory_summary = PromptGenerator.compile_memory_metadata_block(
memory_edit_timestamp=get_utc_time(),
timezone=self.agent_state.timezone,
previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),

View File

@@ -7,6 +7,7 @@ from letta.constants import DEFAULT_MAX_STEPS
from letta.helpers import ToolRulesSolver
from letta.helpers.datetime_helpers import get_utc_time
from letta.log import get_logger
from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState
from letta.schemas.enums import MessageStreamStatus
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
@@ -17,7 +18,6 @@ from letta.schemas.message import Message, MessageCreate, MessageUpdate
from letta.schemas.usage import LettaUsageStatistics
from letta.schemas.user import User
from letta.services.agent_manager import AgentManager
from letta.services.helpers.agent_manager_helper import get_system_message_from_compiled_memory
from letta.services.message_manager import MessageManager
from letta.services.passage_manager import PassageManager
from letta.utils import united_diff
@@ -142,7 +142,7 @@ class BaseAgent(ABC):
if num_archival_memories is None:
num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
new_system_message_str = get_system_message_from_compiled_memory(
new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
system_prompt=agent_state.system,
memory_with_sources=curr_memory_str,
in_context_memory_last_edit=memory_edit_timestamp,

View File

@@ -13,6 +13,7 @@ from letta.helpers.datetime_helpers import get_utc_time
from letta.helpers.tool_execution_helper import add_pre_execution_message, enable_strict_mode, remove_request_heartbeat
from letta.interfaces.openai_chat_completions_streaming_interface import OpenAIChatCompletionsStreamingInterface
from letta.log import get_logger
from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState, AgentType
from letta.schemas.enums import MessageRole, ToolType
from letta.schemas.letta_response import LettaResponse
@@ -35,7 +36,6 @@ from letta.server.rest_api.utils import (
)
from letta.services.agent_manager import AgentManager
from letta.services.block_manager import BlockManager
from letta.services.helpers.agent_manager_helper import compile_system_message_async
from letta.services.job_manager import JobManager
from letta.services.message_manager import MessageManager
from letta.services.passage_manager import PassageManager
@@ -144,7 +144,7 @@ class VoiceAgent(BaseAgent):
in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=self.actor)
memory_edit_timestamp = get_utc_time()
in_context_messages[0].content[0].text = await compile_system_message_async(
in_context_messages[0].content[0].text = await PromptGenerator.compile_system_message_async(
system_prompt=agent_state.system,
in_context_memory=agent_state.memory,
in_context_memory_last_edit=memory_edit_timestamp,

View File

@@ -0,0 +1,190 @@
from datetime import datetime
from typing import List, Literal, Optional
from letta.constants import IN_CONTEXT_MEMORY_KEYWORD
from letta.helpers import ToolRulesSolver
from letta.helpers.datetime_helpers import format_datetime, get_local_time_fast
from letta.otel.tracing import trace_method
from letta.schemas.memory import Memory
class PromptGenerator:
# TODO: This code is kind of wonky and deserves a rewrite
@trace_method
@staticmethod
def compile_memory_metadata_block(
memory_edit_timestamp: datetime,
timezone: str,
previous_message_count: int = 0,
archival_memory_size: Optional[int] = 0,
) -> str:
"""
Generate a memory metadata block for the agent's system prompt.
This creates a structured metadata section that informs the agent about
the current state of its memory systems, including timing information
and memory counts. This helps the agent understand what information
is available through its tools.
Args:
memory_edit_timestamp: When memory blocks were last modified
timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
previous_message_count: Number of messages in recall memory (conversation history)
archival_memory_size: Number of items in archival memory (long-term storage)
Returns:
A formatted string containing the memory metadata block with XML-style tags
Example Output:
<memory_metadata>
- The current time is: 2024-01-15 10:30 AM PST
- Memory blocks were last modified: 2024-01-15 09:00 AM PST
- 42 previous messages between you and the user are stored in recall memory (use tools to access them)
- 156 total memories you created are stored in archival memory (use tools to access them)
</memory_metadata>
"""
# Put the timestamp in the local timezone (mimicking get_local_time())
timestamp_str = format_datetime(memory_edit_timestamp, timezone)
# Create a metadata block of info so the agent knows about the metadata of out-of-context memories
metadata_lines = [
"<memory_metadata>",
f"- The current time is: {get_local_time_fast(timezone)}",
f"- Memory blocks were last modified: {timestamp_str}",
f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
]
# Only include archival memory line if there are archival memories
if archival_memory_size is not None and archival_memory_size > 0:
metadata_lines.append(
f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
)
metadata_lines.append("</memory_metadata>")
memory_metadata_block = "\n".join(metadata_lines)
return memory_metadata_block
@staticmethod
def safe_format(template: str, variables: dict) -> str:
"""
Safely formats a template string, preserving empty {} and {unknown_vars}
while substituting known variables.
If we simply use {} in format_map, it'll be treated as a positional field
"""
# First escape any empty {} by doubling them
escaped = template.replace("{}", "{{}}")
# Now use format_map with our custom mapping
return escaped.format_map(PreserveMapping(variables))
@trace_method
@staticmethod
def get_system_message_from_compiled_memory(
system_prompt: str,
memory_with_sources: str,
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
timezone: str,
user_defined_variables: Optional[dict] = None,
append_icm_if_missing: bool = True,
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
previous_message_count: int = 0,
archival_memory_size: int = 0,
) -> str:
"""Prepare the final/full system message that will be fed into the LLM API
The base system message may be templated, in which case we need to render the variables.
The following are reserved variables:
- CORE_MEMORY: the in-context memory of the LLM
"""
if user_defined_variables is not None:
# TODO eventually support the user defining their own variables to inject
raise NotImplementedError
else:
variables = {}
# Add the protected memory variable
if IN_CONTEXT_MEMORY_KEYWORD in variables:
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
else:
# TODO should this all put into the memory.__repr__ function?
memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
memory_edit_timestamp=in_context_memory_last_edit,
previous_message_count=previous_message_count,
archival_memory_size=archival_memory_size,
timezone=timezone,
)
full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
# Add to the variables list to inject
variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
if template_format == "f-string":
memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
# Catch the special case where the system prompt is unformatted
if append_icm_if_missing:
if memory_variable_string not in system_prompt:
# In this case, append it to the end to make sure memory is still injected
# warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
system_prompt += "\n\n" + memory_variable_string
# render the variables using the built-in templater
try:
if user_defined_variables:
formatted_prompt = PromptGenerator.safe_format(system_prompt, variables)
else:
formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
except Exception as e:
raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
else:
# TODO support for mustache and jinja2
raise NotImplementedError(template_format)
return formatted_prompt
@trace_method
@staticmethod
async def compile_system_message_async(
system_prompt: str,
in_context_memory: Memory,
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
timezone: str,
user_defined_variables: Optional[dict] = None,
append_icm_if_missing: bool = True,
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
previous_message_count: int = 0,
archival_memory_size: int = 0,
tool_rules_solver: Optional[ToolRulesSolver] = None,
sources: Optional[List] = None,
max_files_open: Optional[int] = None,
) -> str:
tool_constraint_block = None
if tool_rules_solver is not None:
tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
if user_defined_variables is not None:
# TODO eventually support the user defining their own variables to inject
raise NotImplementedError
else:
pass
memory_with_sources = await in_context_memory.compile_in_thread_async(
tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
)
return PromptGenerator.get_system_message_from_compiled_memory(
system_prompt=system_prompt,
memory_with_sources=memory_with_sources,
in_context_memory_last_edit=in_context_memory_last_edit,
timezone=timezone,
user_defined_variables=user_defined_variables,
append_icm_if_missing=append_icm_if_missing,
template_format=template_format,
previous_message_count=previous_message_count,
archival_memory_size=archival_memory_size,
)

View File

@@ -42,6 +42,7 @@ from letta.orm.sandbox_config import AgentEnvironmentVariable
from letta.orm.sandbox_config import AgentEnvironmentVariable as AgentEnvironmentVariableModel
from letta.orm.sqlalchemy_base import AccessType
from letta.otel.tracing import trace_method
from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState as PydanticAgentState
from letta.schemas.agent import AgentType, CreateAgent, UpdateAgent, get_prompt_template_for_agent_type
from letta.schemas.block import DEFAULT_BLOCKS
@@ -89,7 +90,6 @@ from letta.services.helpers.agent_manager_helper import (
check_supports_structured_output,
compile_system_message,
derive_system_message,
get_system_message_from_compiled_memory,
initialize_message_sequence,
initialize_message_sequence_async,
package_initial_message_sequence,
@@ -1783,7 +1783,7 @@ class AgentManager:
# update memory (TODO: potentially update recall/archival stats separately)
new_system_message_str = get_system_message_from_compiled_memory(
new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
system_prompt=agent_state.system,
memory_with_sources=curr_memory_str,
in_context_memory_last_edit=memory_edit_timestamp,

View File

@@ -21,7 +21,7 @@ from letta.constants import (
STRUCTURED_OUTPUT_MODELS,
)
from letta.helpers import ToolRulesSolver
from letta.helpers.datetime_helpers import format_datetime, get_local_time, get_local_time_fast
from letta.helpers.datetime_helpers import get_local_time
from letta.llm_api.llm_client import LLMClient
from letta.orm.agent import Agent as AgentModel
from letta.orm.agents_tags import AgentsTags
@@ -33,6 +33,7 @@ from letta.orm.sources_agents import SourcesAgents
from letta.orm.sqlite_functions import adapt_array
from letta.otel.tracing import trace_method
from letta.prompts import gpt_system
from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState, AgentType
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import MessageRole
@@ -217,60 +218,6 @@ def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool
return system
# TODO: This code is kind of wonky and deserves a rewrite
def compile_memory_metadata_block(
memory_edit_timestamp: datetime,
timezone: str,
previous_message_count: int = 0,
archival_memory_size: Optional[int] = 0,
) -> str:
"""
Generate a memory metadata block for the agent's system prompt.
This creates a structured metadata section that informs the agent about
the current state of its memory systems, including timing information
and memory counts. This helps the agent understand what information
is available through its tools.
Args:
memory_edit_timestamp: When memory blocks were last modified
timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
previous_message_count: Number of messages in recall memory (conversation history)
archival_memory_size: Number of items in archival memory (long-term storage)
Returns:
A formatted string containing the memory metadata block with XML-style tags
Example Output:
<memory_metadata>
- The current time is: 2024-01-15 10:30 AM PST
- Memory blocks were last modified: 2024-01-15 09:00 AM PST
- 42 previous messages between you and the user are stored in recall memory (use tools to access them)
- 156 total memories you created are stored in archival memory (use tools to access them)
</memory_metadata>
"""
# Put the timestamp in the local timezone (mimicking get_local_time())
timestamp_str = format_datetime(memory_edit_timestamp, timezone)
# Create a metadata block of info so the agent knows about the metadata of out-of-context memories
metadata_lines = [
"<memory_metadata>",
f"- The current time is: {get_local_time_fast(timezone)}",
f"- Memory blocks were last modified: {timestamp_str}",
f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
]
# Only include archival memory line if there are archival memories
if archival_memory_size is not None and archival_memory_size > 0:
metadata_lines.append(
f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
)
metadata_lines.append("</memory_metadata>")
memory_metadata_block = "\n".join(metadata_lines)
return memory_metadata_block
class PreserveMapping(dict):
"""Used to preserve (do not modify) undefined variables in the system prompt"""
@@ -331,7 +278,7 @@ def compile_system_message(
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
else:
# TODO should this all put into the memory.__repr__ function?
memory_metadata_string = compile_memory_metadata_block(
memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
memory_edit_timestamp=in_context_memory_last_edit,
previous_message_count=previous_message_count,
archival_memory_size=archival_memory_size,
@@ -372,154 +319,6 @@ def compile_system_message(
return formatted_prompt
@trace_method
def get_system_message_from_compiled_memory(
system_prompt: str,
memory_with_sources: str,
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
timezone: str,
user_defined_variables: Optional[dict] = None,
append_icm_if_missing: bool = True,
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
previous_message_count: int = 0,
archival_memory_size: int = 0,
) -> str:
"""Prepare the final/full system message that will be fed into the LLM API
The base system message may be templated, in which case we need to render the variables.
The following are reserved variables:
- CORE_MEMORY: the in-context memory of the LLM
"""
if user_defined_variables is not None:
# TODO eventually support the user defining their own variables to inject
raise NotImplementedError
else:
variables = {}
# Add the protected memory variable
if IN_CONTEXT_MEMORY_KEYWORD in variables:
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
else:
# TODO should this all put into the memory.__repr__ function?
memory_metadata_string = compile_memory_metadata_block(
memory_edit_timestamp=in_context_memory_last_edit,
previous_message_count=previous_message_count,
archival_memory_size=archival_memory_size,
timezone=timezone,
)
full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
# Add to the variables list to inject
variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
if template_format == "f-string":
memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
# Catch the special case where the system prompt is unformatted
if append_icm_if_missing:
if memory_variable_string not in system_prompt:
# In this case, append it to the end to make sure memory is still injected
# warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
system_prompt += "\n\n" + memory_variable_string
# render the variables using the built-in templater
try:
if user_defined_variables:
formatted_prompt = safe_format(system_prompt, variables)
else:
formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
except Exception as e:
raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
else:
# TODO support for mustache and jinja2
raise NotImplementedError(template_format)
return formatted_prompt
@trace_method
async def compile_system_message_async(
system_prompt: str,
in_context_memory: Memory,
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
timezone: str,
user_defined_variables: Optional[dict] = None,
append_icm_if_missing: bool = True,
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
previous_message_count: int = 0,
archival_memory_size: int = 0,
tool_rules_solver: Optional[ToolRulesSolver] = None,
sources: Optional[List] = None,
max_files_open: Optional[int] = None,
) -> str:
"""Prepare the final/full system message that will be fed into the LLM API
The base system message may be templated, in which case we need to render the variables.
The following are reserved variables:
- CORE_MEMORY: the in-context memory of the LLM
"""
# Add tool rule constraints if available
tool_constraint_block = None
if tool_rules_solver is not None:
tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
if user_defined_variables is not None:
# TODO eventually support the user defining their own variables to inject
raise NotImplementedError
else:
variables = {}
# Add the protected memory variable
if IN_CONTEXT_MEMORY_KEYWORD in variables:
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
else:
# TODO should this all put into the memory.__repr__ function?
memory_metadata_string = compile_memory_metadata_block(
memory_edit_timestamp=in_context_memory_last_edit,
previous_message_count=previous_message_count,
archival_memory_size=archival_memory_size,
timezone=timezone,
)
memory_with_sources = await in_context_memory.compile_in_thread_async(
tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
)
full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
# Add to the variables list to inject
variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
if template_format == "f-string":
memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
# Catch the special case where the system prompt is unformatted
if append_icm_if_missing:
if memory_variable_string not in system_prompt:
# In this case, append it to the end to make sure memory is still injected
# warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
system_prompt += "\n\n" + memory_variable_string
# render the variables using the built-in templater
try:
if user_defined_variables:
formatted_prompt = safe_format(system_prompt, variables)
else:
formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
except Exception as e:
raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
else:
# TODO support for mustache and jinja2
raise NotImplementedError(template_format)
return formatted_prompt
@trace_method
def initialize_message_sequence(
agent_state: AgentState,
@@ -601,7 +400,7 @@ async def initialize_message_sequence_async(
if memory_edit_timestamp is None:
memory_edit_timestamp = get_local_time()
full_system_message = await compile_system_message_async(
full_system_message = await PromptGenerator.compile_system_message_async(
system_prompt=agent_state.system,
in_context_memory=agent_state.memory,
in_context_memory_last_edit=memory_edit_timestamp,