diff --git a/letta/agent.py b/letta/agent.py
index 7aec2469..4a9127c8 100644
--- a/letta/agent.py
+++ b/letta/agent.py
@@ -42,6 +42,7 @@ from letta.log import get_logger
from letta.memory import summarize_messages
from letta.orm import User
from letta.otel.tracing import log_event, trace_method
+from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
from letta.schemas.block import BlockUpdate
from letta.schemas.embedding_config import EmbeddingConfig
@@ -59,7 +60,7 @@ from letta.schemas.tool_rule import TerminalToolRule
from letta.schemas.usage import LettaUsageStatistics
from letta.services.agent_manager import AgentManager
from letta.services.block_manager import BlockManager
-from letta.services.helpers.agent_manager_helper import check_supports_structured_output, compile_memory_metadata_block
+from letta.services.helpers.agent_manager_helper import check_supports_structured_output
from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
from letta.services.job_manager import JobManager
from letta.services.mcp.base_client import AsyncBaseMCPClient
@@ -1246,7 +1247,7 @@ class Agent(BaseAgent):
agent_manager_passage_size = self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id)
message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id)
- external_memory_summary = compile_memory_metadata_block(
+ external_memory_summary = PromptGenerator.compile_memory_metadata_block(
memory_edit_timestamp=get_utc_time(),
timezone=self.agent_state.timezone,
previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),
diff --git a/letta/agents/base_agent.py b/letta/agents/base_agent.py
index d351eb10..3355076b 100644
--- a/letta/agents/base_agent.py
+++ b/letta/agents/base_agent.py
@@ -7,6 +7,7 @@ from letta.constants import DEFAULT_MAX_STEPS
from letta.helpers import ToolRulesSolver
from letta.helpers.datetime_helpers import get_utc_time
from letta.log import get_logger
+from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState
from letta.schemas.enums import MessageStreamStatus
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
@@ -17,7 +18,6 @@ from letta.schemas.message import Message, MessageCreate, MessageUpdate
from letta.schemas.usage import LettaUsageStatistics
from letta.schemas.user import User
from letta.services.agent_manager import AgentManager
-from letta.services.helpers.agent_manager_helper import get_system_message_from_compiled_memory
from letta.services.message_manager import MessageManager
from letta.services.passage_manager import PassageManager
from letta.utils import united_diff
@@ -142,7 +142,7 @@ class BaseAgent(ABC):
if num_archival_memories is None:
num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
- new_system_message_str = get_system_message_from_compiled_memory(
+ new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
system_prompt=agent_state.system,
memory_with_sources=curr_memory_str,
in_context_memory_last_edit=memory_edit_timestamp,
diff --git a/letta/agents/voice_agent.py b/letta/agents/voice_agent.py
index 69ce9150..3b00dfae 100644
--- a/letta/agents/voice_agent.py
+++ b/letta/agents/voice_agent.py
@@ -13,6 +13,7 @@ from letta.helpers.datetime_helpers import get_utc_time
from letta.helpers.tool_execution_helper import add_pre_execution_message, enable_strict_mode, remove_request_heartbeat
from letta.interfaces.openai_chat_completions_streaming_interface import OpenAIChatCompletionsStreamingInterface
from letta.log import get_logger
+from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState, AgentType
from letta.schemas.enums import MessageRole, ToolType
from letta.schemas.letta_response import LettaResponse
@@ -35,7 +36,6 @@ from letta.server.rest_api.utils import (
)
from letta.services.agent_manager import AgentManager
from letta.services.block_manager import BlockManager
-from letta.services.helpers.agent_manager_helper import compile_system_message_async
from letta.services.job_manager import JobManager
from letta.services.message_manager import MessageManager
from letta.services.passage_manager import PassageManager
@@ -144,7 +144,7 @@ class VoiceAgent(BaseAgent):
in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=self.actor)
memory_edit_timestamp = get_utc_time()
- in_context_messages[0].content[0].text = await compile_system_message_async(
+ in_context_messages[0].content[0].text = await PromptGenerator.compile_system_message_async(
system_prompt=agent_state.system,
in_context_memory=agent_state.memory,
in_context_memory_last_edit=memory_edit_timestamp,
diff --git a/letta/prompts/prompt_generator.py b/letta/prompts/prompt_generator.py
new file mode 100644
index 00000000..ffb36b05
--- /dev/null
+++ b/letta/prompts/prompt_generator.py
@@ -0,0 +1,190 @@
+from datetime import datetime
+from typing import List, Literal, Optional
+
+from letta.constants import IN_CONTEXT_MEMORY_KEYWORD
+from letta.helpers import ToolRulesSolver
+from letta.helpers.datetime_helpers import format_datetime, get_local_time_fast
+from letta.otel.tracing import trace_method
+from letta.schemas.memory import Memory
+
+
+class PromptGenerator:
+
+ # TODO: This code is kind of wonky and deserves a rewrite
+ @trace_method
+ @staticmethod
+ def compile_memory_metadata_block(
+ memory_edit_timestamp: datetime,
+ timezone: str,
+ previous_message_count: int = 0,
+ archival_memory_size: Optional[int] = 0,
+ ) -> str:
+ """
+ Generate a memory metadata block for the agent's system prompt.
+
+ This creates a structured metadata section that informs the agent about
+ the current state of its memory systems, including timing information
+ and memory counts. This helps the agent understand what information
+ is available through its tools.
+
+ Args:
+ memory_edit_timestamp: When memory blocks were last modified
+ timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
+ previous_message_count: Number of messages in recall memory (conversation history)
+ archival_memory_size: Number of items in archival memory (long-term storage)
+
+ Returns:
+ A formatted string containing the memory metadata block with XML-style tags
+
+ Example Output:
+
+ - The current time is: 2024-01-15 10:30 AM PST
+ - Memory blocks were last modified: 2024-01-15 09:00 AM PST
+ - 42 previous messages between you and the user are stored in recall memory (use tools to access them)
+ - 156 total memories you created are stored in archival memory (use tools to access them)
+
+ """
+ # Put the timestamp in the local timezone (mimicking get_local_time())
+ timestamp_str = format_datetime(memory_edit_timestamp, timezone)
+
+ # Create a metadata block of info so the agent knows about the metadata of out-of-context memories
+ metadata_lines = [
+ "",
+ f"- The current time is: {get_local_time_fast(timezone)}",
+ f"- Memory blocks were last modified: {timestamp_str}",
+ f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
+ ]
+
+ # Only include archival memory line if there are archival memories
+ if archival_memory_size is not None and archival_memory_size > 0:
+ metadata_lines.append(
+ f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
+ )
+
+ metadata_lines.append("")
+ memory_metadata_block = "\n".join(metadata_lines)
+ return memory_metadata_block
+
+ @staticmethod
+ def safe_format(template: str, variables: dict) -> str:
+ """
+ Safely formats a template string, preserving empty {} and {unknown_vars}
+ while substituting known variables.
+
+ If we simply use {} in format_map, it'll be treated as a positional field
+ """
+ # First escape any empty {} by doubling them
+ escaped = template.replace("{}", "{{}}")
+
+ # Now use format_map with our custom mapping
+ return escaped.format_map(PreserveMapping(variables))
+
+ @trace_method
+ @staticmethod
+ def get_system_message_from_compiled_memory(
+ system_prompt: str,
+ memory_with_sources: str,
+ in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
+ timezone: str,
+ user_defined_variables: Optional[dict] = None,
+ append_icm_if_missing: bool = True,
+ template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
+ previous_message_count: int = 0,
+ archival_memory_size: int = 0,
+ ) -> str:
+ """Prepare the final/full system message that will be fed into the LLM API
+
+ The base system message may be templated, in which case we need to render the variables.
+
+ The following are reserved variables:
+ - CORE_MEMORY: the in-context memory of the LLM
+ """
+ if user_defined_variables is not None:
+ # TODO eventually support the user defining their own variables to inject
+ raise NotImplementedError
+ else:
+ variables = {}
+
+ # Add the protected memory variable
+ if IN_CONTEXT_MEMORY_KEYWORD in variables:
+ raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
+ else:
+ # TODO should this all put into the memory.__repr__ function?
+ memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
+ memory_edit_timestamp=in_context_memory_last_edit,
+ previous_message_count=previous_message_count,
+ archival_memory_size=archival_memory_size,
+ timezone=timezone,
+ )
+
+ full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
+
+ # Add to the variables list to inject
+ variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
+
+ if template_format == "f-string":
+ memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
+
+ # Catch the special case where the system prompt is unformatted
+ if append_icm_if_missing:
+ if memory_variable_string not in system_prompt:
+ # In this case, append it to the end to make sure memory is still injected
+ # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
+ system_prompt += "\n\n" + memory_variable_string
+
+ # render the variables using the built-in templater
+ try:
+ if user_defined_variables:
+ formatted_prompt = PromptGenerator.safe_format(system_prompt, variables)
+ else:
+ formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
+ except Exception as e:
+ raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
+
+ else:
+ # TODO support for mustache and jinja2
+ raise NotImplementedError(template_format)
+
+ return formatted_prompt
+
+ @trace_method
+ @staticmethod
+ async def compile_system_message_async(
+ system_prompt: str,
+ in_context_memory: Memory,
+ in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
+ timezone: str,
+ user_defined_variables: Optional[dict] = None,
+ append_icm_if_missing: bool = True,
+ template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
+ previous_message_count: int = 0,
+ archival_memory_size: int = 0,
+ tool_rules_solver: Optional[ToolRulesSolver] = None,
+ sources: Optional[List] = None,
+ max_files_open: Optional[int] = None,
+ ) -> str:
+ tool_constraint_block = None
+ if tool_rules_solver is not None:
+ tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
+
+ if user_defined_variables is not None:
+ # TODO eventually support the user defining their own variables to inject
+ raise NotImplementedError
+ else:
+ pass
+
+ memory_with_sources = await in_context_memory.compile_in_thread_async(
+ tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
+ )
+
+ return PromptGenerator.get_system_message_from_compiled_memory(
+ system_prompt=system_prompt,
+ memory_with_sources=memory_with_sources,
+ in_context_memory_last_edit=in_context_memory_last_edit,
+ timezone=timezone,
+ user_defined_variables=user_defined_variables,
+ append_icm_if_missing=append_icm_if_missing,
+ template_format=template_format,
+ previous_message_count=previous_message_count,
+ archival_memory_size=archival_memory_size,
+ )
diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py
index 9a6ebc42..4d18f7cd 100644
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -42,6 +42,7 @@ from letta.orm.sandbox_config import AgentEnvironmentVariable
from letta.orm.sandbox_config import AgentEnvironmentVariable as AgentEnvironmentVariableModel
from letta.orm.sqlalchemy_base import AccessType
from letta.otel.tracing import trace_method
+from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState as PydanticAgentState
from letta.schemas.agent import AgentType, CreateAgent, UpdateAgent, get_prompt_template_for_agent_type
from letta.schemas.block import DEFAULT_BLOCKS
@@ -89,7 +90,6 @@ from letta.services.helpers.agent_manager_helper import (
check_supports_structured_output,
compile_system_message,
derive_system_message,
- get_system_message_from_compiled_memory,
initialize_message_sequence,
initialize_message_sequence_async,
package_initial_message_sequence,
@@ -1783,7 +1783,7 @@ class AgentManager:
# update memory (TODO: potentially update recall/archival stats separately)
- new_system_message_str = get_system_message_from_compiled_memory(
+ new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
system_prompt=agent_state.system,
memory_with_sources=curr_memory_str,
in_context_memory_last_edit=memory_edit_timestamp,
diff --git a/letta/services/helpers/agent_manager_helper.py b/letta/services/helpers/agent_manager_helper.py
index ad0caef8..da660e96 100644
--- a/letta/services/helpers/agent_manager_helper.py
+++ b/letta/services/helpers/agent_manager_helper.py
@@ -21,7 +21,7 @@ from letta.constants import (
STRUCTURED_OUTPUT_MODELS,
)
from letta.helpers import ToolRulesSolver
-from letta.helpers.datetime_helpers import format_datetime, get_local_time, get_local_time_fast
+from letta.helpers.datetime_helpers import get_local_time
from letta.llm_api.llm_client import LLMClient
from letta.orm.agent import Agent as AgentModel
from letta.orm.agents_tags import AgentsTags
@@ -33,6 +33,7 @@ from letta.orm.sources_agents import SourcesAgents
from letta.orm.sqlite_functions import adapt_array
from letta.otel.tracing import trace_method
from letta.prompts import gpt_system
+from letta.prompts.prompt_generator import PromptGenerator
from letta.schemas.agent import AgentState, AgentType
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import MessageRole
@@ -217,60 +218,6 @@ def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool
return system
-# TODO: This code is kind of wonky and deserves a rewrite
-def compile_memory_metadata_block(
- memory_edit_timestamp: datetime,
- timezone: str,
- previous_message_count: int = 0,
- archival_memory_size: Optional[int] = 0,
-) -> str:
- """
- Generate a memory metadata block for the agent's system prompt.
-
- This creates a structured metadata section that informs the agent about
- the current state of its memory systems, including timing information
- and memory counts. This helps the agent understand what information
- is available through its tools.
-
- Args:
- memory_edit_timestamp: When memory blocks were last modified
- timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
- previous_message_count: Number of messages in recall memory (conversation history)
- archival_memory_size: Number of items in archival memory (long-term storage)
-
- Returns:
- A formatted string containing the memory metadata block with XML-style tags
-
- Example Output:
-
- - The current time is: 2024-01-15 10:30 AM PST
- - Memory blocks were last modified: 2024-01-15 09:00 AM PST
- - 42 previous messages between you and the user are stored in recall memory (use tools to access them)
- - 156 total memories you created are stored in archival memory (use tools to access them)
-
- """
- # Put the timestamp in the local timezone (mimicking get_local_time())
- timestamp_str = format_datetime(memory_edit_timestamp, timezone)
-
- # Create a metadata block of info so the agent knows about the metadata of out-of-context memories
- metadata_lines = [
- "",
- f"- The current time is: {get_local_time_fast(timezone)}",
- f"- Memory blocks were last modified: {timestamp_str}",
- f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
- ]
-
- # Only include archival memory line if there are archival memories
- if archival_memory_size is not None and archival_memory_size > 0:
- metadata_lines.append(
- f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
- )
-
- metadata_lines.append("")
- memory_metadata_block = "\n".join(metadata_lines)
- return memory_metadata_block
-
-
class PreserveMapping(dict):
"""Used to preserve (do not modify) undefined variables in the system prompt"""
@@ -331,7 +278,7 @@ def compile_system_message(
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
else:
# TODO should this all put into the memory.__repr__ function?
- memory_metadata_string = compile_memory_metadata_block(
+ memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
memory_edit_timestamp=in_context_memory_last_edit,
previous_message_count=previous_message_count,
archival_memory_size=archival_memory_size,
@@ -372,154 +319,6 @@ def compile_system_message(
return formatted_prompt
-@trace_method
-def get_system_message_from_compiled_memory(
- system_prompt: str,
- memory_with_sources: str,
- in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
- timezone: str,
- user_defined_variables: Optional[dict] = None,
- append_icm_if_missing: bool = True,
- template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
- previous_message_count: int = 0,
- archival_memory_size: int = 0,
-) -> str:
- """Prepare the final/full system message that will be fed into the LLM API
-
- The base system message may be templated, in which case we need to render the variables.
-
- The following are reserved variables:
- - CORE_MEMORY: the in-context memory of the LLM
- """
- if user_defined_variables is not None:
- # TODO eventually support the user defining their own variables to inject
- raise NotImplementedError
- else:
- variables = {}
-
- # Add the protected memory variable
- if IN_CONTEXT_MEMORY_KEYWORD in variables:
- raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
- else:
- # TODO should this all put into the memory.__repr__ function?
- memory_metadata_string = compile_memory_metadata_block(
- memory_edit_timestamp=in_context_memory_last_edit,
- previous_message_count=previous_message_count,
- archival_memory_size=archival_memory_size,
- timezone=timezone,
- )
-
- full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
-
- # Add to the variables list to inject
- variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
-
- if template_format == "f-string":
- memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
-
- # Catch the special case where the system prompt is unformatted
- if append_icm_if_missing:
- if memory_variable_string not in system_prompt:
- # In this case, append it to the end to make sure memory is still injected
- # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
- system_prompt += "\n\n" + memory_variable_string
-
- # render the variables using the built-in templater
- try:
- if user_defined_variables:
- formatted_prompt = safe_format(system_prompt, variables)
- else:
- formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
- except Exception as e:
- raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
-
- else:
- # TODO support for mustache and jinja2
- raise NotImplementedError(template_format)
-
- return formatted_prompt
-
-
-@trace_method
-async def compile_system_message_async(
- system_prompt: str,
- in_context_memory: Memory,
- in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
- timezone: str,
- user_defined_variables: Optional[dict] = None,
- append_icm_if_missing: bool = True,
- template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
- previous_message_count: int = 0,
- archival_memory_size: int = 0,
- tool_rules_solver: Optional[ToolRulesSolver] = None,
- sources: Optional[List] = None,
- max_files_open: Optional[int] = None,
-) -> str:
- """Prepare the final/full system message that will be fed into the LLM API
-
- The base system message may be templated, in which case we need to render the variables.
-
- The following are reserved variables:
- - CORE_MEMORY: the in-context memory of the LLM
- """
-
- # Add tool rule constraints if available
- tool_constraint_block = None
- if tool_rules_solver is not None:
- tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
-
- if user_defined_variables is not None:
- # TODO eventually support the user defining their own variables to inject
- raise NotImplementedError
- else:
- variables = {}
-
- # Add the protected memory variable
- if IN_CONTEXT_MEMORY_KEYWORD in variables:
- raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
- else:
- # TODO should this all put into the memory.__repr__ function?
- memory_metadata_string = compile_memory_metadata_block(
- memory_edit_timestamp=in_context_memory_last_edit,
- previous_message_count=previous_message_count,
- archival_memory_size=archival_memory_size,
- timezone=timezone,
- )
-
- memory_with_sources = await in_context_memory.compile_in_thread_async(
- tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
- )
- full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
-
- # Add to the variables list to inject
- variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
-
- if template_format == "f-string":
- memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
-
- # Catch the special case where the system prompt is unformatted
- if append_icm_if_missing:
- if memory_variable_string not in system_prompt:
- # In this case, append it to the end to make sure memory is still injected
- # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
- system_prompt += "\n\n" + memory_variable_string
-
- # render the variables using the built-in templater
- try:
- if user_defined_variables:
- formatted_prompt = safe_format(system_prompt, variables)
- else:
- formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
- except Exception as e:
- raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
-
- else:
- # TODO support for mustache and jinja2
- raise NotImplementedError(template_format)
-
- return formatted_prompt
-
-
@trace_method
def initialize_message_sequence(
agent_state: AgentState,
@@ -601,7 +400,7 @@ async def initialize_message_sequence_async(
if memory_edit_timestamp is None:
memory_edit_timestamp = get_local_time()
- full_system_message = await compile_system_message_async(
+ full_system_message = await PromptGenerator.compile_system_message_async(
system_prompt=agent_state.system,
in_context_memory=agent_state.memory,
in_context_memory_last_edit=memory_edit_timestamp,