feat: add prompt generator for on the fly system prompt generation (#4060)

2025-08-20 15:49:40 -07:00
parent d0ddc5545c
commit 395faf3ed8
6 changed files with 203 additions and 213 deletions
--- a/letta/agent.py
+++ b/letta/agent.py
@@ -42,6 +42,7 @@ from letta.log import get_logger
 from letta.memory import summarize_messages
 from letta.orm import User
 from letta.otel.tracing import log_event, trace_method
+from letta.prompts.prompt_generator import PromptGenerator
 from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
 from letta.schemas.block import BlockUpdate
 from letta.schemas.embedding_config import EmbeddingConfig
@@ -59,7 +60,7 @@ from letta.schemas.tool_rule import TerminalToolRule
 from letta.schemas.usage import LettaUsageStatistics
 from letta.services.agent_manager import AgentManager
 from letta.services.block_manager import BlockManager
-from letta.services.helpers.agent_manager_helper import check_supports_structured_output, compile_memory_metadata_block
+from letta.services.helpers.agent_manager_helper import check_supports_structured_output
 from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
 from letta.services.job_manager import JobManager
 from letta.services.mcp.base_client import AsyncBaseMCPClient
@@ -1246,7 +1247,7 @@ class Agent(BaseAgent):

        agent_manager_passage_size = self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id)
        message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id)
-        external_memory_summary = compile_memory_metadata_block(
+        external_memory_summary = PromptGenerator.compile_memory_metadata_block(
            memory_edit_timestamp=get_utc_time(),
            timezone=self.agent_state.timezone,
            previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),
--- a/letta/agents/base_agent.py
+++ b/letta/agents/base_agent.py
@@ -7,6 +7,7 @@ from letta.constants import DEFAULT_MAX_STEPS
 from letta.helpers import ToolRulesSolver
 from letta.helpers.datetime_helpers import get_utc_time
 from letta.log import get_logger
+from letta.prompts.prompt_generator import PromptGenerator
 from letta.schemas.agent import AgentState
 from letta.schemas.enums import MessageStreamStatus
 from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
@@ -17,7 +18,6 @@ from letta.schemas.message import Message, MessageCreate, MessageUpdate
 from letta.schemas.usage import LettaUsageStatistics
 from letta.schemas.user import User
 from letta.services.agent_manager import AgentManager
-from letta.services.helpers.agent_manager_helper import get_system_message_from_compiled_memory
 from letta.services.message_manager import MessageManager
 from letta.services.passage_manager import PassageManager
 from letta.utils import united_diff
@@ -142,7 +142,7 @@ class BaseAgent(ABC):
            if num_archival_memories is None:
                num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)

-            new_system_message_str = get_system_message_from_compiled_memory(
+            new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
                system_prompt=agent_state.system,
                memory_with_sources=curr_memory_str,
                in_context_memory_last_edit=memory_edit_timestamp,
--- a/letta/agents/voice_agent.py
+++ b/letta/agents/voice_agent.py
@@ -13,6 +13,7 @@ from letta.helpers.datetime_helpers import get_utc_time
 from letta.helpers.tool_execution_helper import add_pre_execution_message, enable_strict_mode, remove_request_heartbeat
 from letta.interfaces.openai_chat_completions_streaming_interface import OpenAIChatCompletionsStreamingInterface
 from letta.log import get_logger
+from letta.prompts.prompt_generator import PromptGenerator
 from letta.schemas.agent import AgentState, AgentType
 from letta.schemas.enums import MessageRole, ToolType
 from letta.schemas.letta_response import LettaResponse
@@ -35,7 +36,6 @@ from letta.server.rest_api.utils import (
 )
 from letta.services.agent_manager import AgentManager
 from letta.services.block_manager import BlockManager
-from letta.services.helpers.agent_manager_helper import compile_system_message_async
 from letta.services.job_manager import JobManager
 from letta.services.message_manager import MessageManager
 from letta.services.passage_manager import PassageManager
@@ -144,7 +144,7 @@ class VoiceAgent(BaseAgent):

        in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=self.actor)
        memory_edit_timestamp = get_utc_time()
-        in_context_messages[0].content[0].text = await compile_system_message_async(
+        in_context_messages[0].content[0].text = await PromptGenerator.compile_system_message_async(
            system_prompt=agent_state.system,
            in_context_memory=agent_state.memory,
            in_context_memory_last_edit=memory_edit_timestamp,
--- a/letta/prompts/prompt_generator.py
+++ b/letta/prompts/prompt_generator.py
@@ -0,0 +1,190 @@
+from datetime import datetime
+from typing import List, Literal, Optional
+
+from letta.constants import IN_CONTEXT_MEMORY_KEYWORD
+from letta.helpers import ToolRulesSolver
+from letta.helpers.datetime_helpers import format_datetime, get_local_time_fast
+from letta.otel.tracing import trace_method
+from letta.schemas.memory import Memory
+
+
+class PromptGenerator:
+
+    # TODO: This code is kind of wonky and deserves a rewrite
+    @trace_method
+    @staticmethod
+    def compile_memory_metadata_block(
+        memory_edit_timestamp: datetime,
+        timezone: str,
+        previous_message_count: int = 0,
+        archival_memory_size: Optional[int] = 0,
+    ) -> str:
+        """
+        Generate a memory metadata block for the agent's system prompt.
+
+        This creates a structured metadata section that informs the agent about
+        the current state of its memory systems, including timing information
+        and memory counts. This helps the agent understand what information
+        is available through its tools.
+
+        Args:
+            memory_edit_timestamp: When memory blocks were last modified
+            timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
+            previous_message_count: Number of messages in recall memory (conversation history)
+            archival_memory_size: Number of items in archival memory (long-term storage)
+
+        Returns:
+            A formatted string containing the memory metadata block with XML-style tags
+
+        Example Output:
+            <memory_metadata>
+            - The current time is: 2024-01-15 10:30 AM PST
+            - Memory blocks were last modified: 2024-01-15 09:00 AM PST
+            - 42 previous messages between you and the user are stored in recall memory (use tools to access them)
+            - 156 total memories you created are stored in archival memory (use tools to access them)
+            </memory_metadata>
+        """
+        # Put the timestamp in the local timezone (mimicking get_local_time())
+        timestamp_str = format_datetime(memory_edit_timestamp, timezone)
+
+        # Create a metadata block of info so the agent knows about the metadata of out-of-context memories
+        metadata_lines = [
+            "<memory_metadata>",
+            f"- The current time is: {get_local_time_fast(timezone)}",
+            f"- Memory blocks were last modified: {timestamp_str}",
+            f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
+        ]
+
+        # Only include archival memory line if there are archival memories
+        if archival_memory_size is not None and archival_memory_size > 0:
+            metadata_lines.append(
+                f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
+            )
+
+        metadata_lines.append("</memory_metadata>")
+        memory_metadata_block = "\n".join(metadata_lines)
+        return memory_metadata_block
+
+    @staticmethod
+    def safe_format(template: str, variables: dict) -> str:
+        """
+        Safely formats a template string, preserving empty {} and {unknown_vars}
+        while substituting known variables.
+
+        If we simply use {} in format_map, it'll be treated as a positional field
+        """
+        # First escape any empty {} by doubling them
+        escaped = template.replace("{}", "{{}}")
+
+        # Now use format_map with our custom mapping
+        return escaped.format_map(PreserveMapping(variables))
+
+    @trace_method
+    @staticmethod
+    def get_system_message_from_compiled_memory(
+        system_prompt: str,
+        memory_with_sources: str,
+        in_context_memory_last_edit: datetime,  # TODO move this inside of BaseMemory?
+        timezone: str,
+        user_defined_variables: Optional[dict] = None,
+        append_icm_if_missing: bool = True,
+        template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
+        previous_message_count: int = 0,
+        archival_memory_size: int = 0,
+    ) -> str:
+        """Prepare the final/full system message that will be fed into the LLM API
+
+        The base system message may be templated, in which case we need to render the variables.
+
+        The following are reserved variables:
+        - CORE_MEMORY: the in-context memory of the LLM
+        """
+        if user_defined_variables is not None:
+            # TODO eventually support the user defining their own variables to inject
+            raise NotImplementedError
+        else:
+            variables = {}
+
+        # Add the protected memory variable
+        if IN_CONTEXT_MEMORY_KEYWORD in variables:
+            raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
+        else:
+            # TODO should this all put into the memory.__repr__ function?
+            memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
+                memory_edit_timestamp=in_context_memory_last_edit,
+                previous_message_count=previous_message_count,
+                archival_memory_size=archival_memory_size,
+                timezone=timezone,
+            )
+
+            full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
+
+            # Add to the variables list to inject
+            variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
+
+        if template_format == "f-string":
+            memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
+
+            # Catch the special case where the system prompt is unformatted
+            if append_icm_if_missing:
+                if memory_variable_string not in system_prompt:
+                    # In this case, append it to the end to make sure memory is still injected
+                    # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
+                    system_prompt += "\n\n" + memory_variable_string
+
+            # render the variables using the built-in templater
+            try:
+                if user_defined_variables:
+                    formatted_prompt = PromptGenerator.safe_format(system_prompt, variables)
+                else:
+                    formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
+            except Exception as e:
+                raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
+
+        else:
+            # TODO support for mustache and jinja2
+            raise NotImplementedError(template_format)
+
+        return formatted_prompt
+
+    @trace_method
+    @staticmethod
+    async def compile_system_message_async(
+        system_prompt: str,
+        in_context_memory: Memory,
+        in_context_memory_last_edit: datetime,  # TODO move this inside of BaseMemory?
+        timezone: str,
+        user_defined_variables: Optional[dict] = None,
+        append_icm_if_missing: bool = True,
+        template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
+        previous_message_count: int = 0,
+        archival_memory_size: int = 0,
+        tool_rules_solver: Optional[ToolRulesSolver] = None,
+        sources: Optional[List] = None,
+        max_files_open: Optional[int] = None,
+    ) -> str:
+        tool_constraint_block = None
+        if tool_rules_solver is not None:
+            tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
+
+        if user_defined_variables is not None:
+            # TODO eventually support the user defining their own variables to inject
+            raise NotImplementedError
+        else:
+            pass
+
+        memory_with_sources = await in_context_memory.compile_in_thread_async(
+            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
+        )
+
+        return PromptGenerator.get_system_message_from_compiled_memory(
+            system_prompt=system_prompt,
+            memory_with_sources=memory_with_sources,
+            in_context_memory_last_edit=in_context_memory_last_edit,
+            timezone=timezone,
+            user_defined_variables=user_defined_variables,
+            append_icm_if_missing=append_icm_if_missing,
+            template_format=template_format,
+            previous_message_count=previous_message_count,
+            archival_memory_size=archival_memory_size,
+        )
--- a/letta/services/agent_manager.py
+++ b/letta/services/agent_manager.py
@@ -42,6 +42,7 @@ from letta.orm.sandbox_config import AgentEnvironmentVariable
 from letta.orm.sandbox_config import AgentEnvironmentVariable as AgentEnvironmentVariableModel
 from letta.orm.sqlalchemy_base import AccessType
 from letta.otel.tracing import trace_method
+from letta.prompts.prompt_generator import PromptGenerator
 from letta.schemas.agent import AgentState as PydanticAgentState
 from letta.schemas.agent import AgentType, CreateAgent, UpdateAgent, get_prompt_template_for_agent_type
 from letta.schemas.block import DEFAULT_BLOCKS
@@ -89,7 +90,6 @@ from letta.services.helpers.agent_manager_helper import (
    check_supports_structured_output,
    compile_system_message,
    derive_system_message,
-    get_system_message_from_compiled_memory,
    initialize_message_sequence,
    initialize_message_sequence_async,
    package_initial_message_sequence,
@@ -1783,7 +1783,7 @@ class AgentManager:

        # update memory (TODO: potentially update recall/archival stats separately)

-        new_system_message_str = get_system_message_from_compiled_memory(
+        new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
            system_prompt=agent_state.system,
            memory_with_sources=curr_memory_str,
            in_context_memory_last_edit=memory_edit_timestamp,
--- a/letta/services/helpers/agent_manager_helper.py
+++ b/letta/services/helpers/agent_manager_helper.py
@@ -21,7 +21,7 @@ from letta.constants import (
    STRUCTURED_OUTPUT_MODELS,
 )
 from letta.helpers import ToolRulesSolver
-from letta.helpers.datetime_helpers import format_datetime, get_local_time, get_local_time_fast
+from letta.helpers.datetime_helpers import get_local_time
 from letta.llm_api.llm_client import LLMClient
 from letta.orm.agent import Agent as AgentModel
 from letta.orm.agents_tags import AgentsTags
@@ -33,6 +33,7 @@ from letta.orm.sources_agents import SourcesAgents
 from letta.orm.sqlite_functions import adapt_array
 from letta.otel.tracing import trace_method
 from letta.prompts import gpt_system
+from letta.prompts.prompt_generator import PromptGenerator
 from letta.schemas.agent import AgentState, AgentType
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import MessageRole
@@ -217,60 +218,6 @@ def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool
    return system


-# TODO: This code is kind of wonky and deserves a rewrite
-def compile_memory_metadata_block(
-    memory_edit_timestamp: datetime,
-    timezone: str,
-    previous_message_count: int = 0,
-    archival_memory_size: Optional[int] = 0,
-) -> str:
-    """
-    Generate a memory metadata block for the agent's system prompt.
-
-    This creates a structured metadata section that informs the agent about
-    the current state of its memory systems, including timing information
-    and memory counts. This helps the agent understand what information
-    is available through its tools.
-
-    Args:
-        memory_edit_timestamp: When memory blocks were last modified
-        timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
-        previous_message_count: Number of messages in recall memory (conversation history)
-        archival_memory_size: Number of items in archival memory (long-term storage)
-
-    Returns:
-        A formatted string containing the memory metadata block with XML-style tags
-
-    Example Output:
-        <memory_metadata>
-        - The current time is: 2024-01-15 10:30 AM PST
-        - Memory blocks were last modified: 2024-01-15 09:00 AM PST
-        - 42 previous messages between you and the user are stored in recall memory (use tools to access them)
-        - 156 total memories you created are stored in archival memory (use tools to access them)
-        </memory_metadata>
-    """
-    # Put the timestamp in the local timezone (mimicking get_local_time())
-    timestamp_str = format_datetime(memory_edit_timestamp, timezone)
-
-    # Create a metadata block of info so the agent knows about the metadata of out-of-context memories
-    metadata_lines = [
-        "<memory_metadata>",
-        f"- The current time is: {get_local_time_fast(timezone)}",
-        f"- Memory blocks were last modified: {timestamp_str}",
-        f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
-    ]
-
-    # Only include archival memory line if there are archival memories
-    if archival_memory_size is not None and archival_memory_size > 0:
-        metadata_lines.append(
-            f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
-        )
-
-    metadata_lines.append("</memory_metadata>")
-    memory_metadata_block = "\n".join(metadata_lines)
-    return memory_metadata_block
-
-
 class PreserveMapping(dict):
    """Used to preserve (do not modify) undefined variables in the system prompt"""

@@ -331,7 +278,7 @@ def compile_system_message(
        raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
    else:
        # TODO should this all put into the memory.__repr__ function?
-        memory_metadata_string = compile_memory_metadata_block(
+        memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
            memory_edit_timestamp=in_context_memory_last_edit,
            previous_message_count=previous_message_count,
            archival_memory_size=archival_memory_size,
@@ -372,154 +319,6 @@ def compile_system_message(
    return formatted_prompt


-@trace_method
-def get_system_message_from_compiled_memory(
-    system_prompt: str,
-    memory_with_sources: str,
-    in_context_memory_last_edit: datetime,  # TODO move this inside of BaseMemory?
-    timezone: str,
-    user_defined_variables: Optional[dict] = None,
-    append_icm_if_missing: bool = True,
-    template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
-    previous_message_count: int = 0,
-    archival_memory_size: int = 0,
-) -> str:
-    """Prepare the final/full system message that will be fed into the LLM API
-
-    The base system message may be templated, in which case we need to render the variables.
-
-    The following are reserved variables:
-      - CORE_MEMORY: the in-context memory of the LLM
-    """
-    if user_defined_variables is not None:
-        # TODO eventually support the user defining their own variables to inject
-        raise NotImplementedError
-    else:
-        variables = {}
-
-    # Add the protected memory variable
-    if IN_CONTEXT_MEMORY_KEYWORD in variables:
-        raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
-    else:
-        # TODO should this all put into the memory.__repr__ function?
-        memory_metadata_string = compile_memory_metadata_block(
-            memory_edit_timestamp=in_context_memory_last_edit,
-            previous_message_count=previous_message_count,
-            archival_memory_size=archival_memory_size,
-            timezone=timezone,
-        )
-
-        full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
-
-        # Add to the variables list to inject
-        variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
-
-    if template_format == "f-string":
-        memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
-
-        # Catch the special case where the system prompt is unformatted
-        if append_icm_if_missing:
-            if memory_variable_string not in system_prompt:
-                # In this case, append it to the end to make sure memory is still injected
-                # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
-                system_prompt += "\n\n" + memory_variable_string
-
-        # render the variables using the built-in templater
-        try:
-            if user_defined_variables:
-                formatted_prompt = safe_format(system_prompt, variables)
-            else:
-                formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
-        except Exception as e:
-            raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
-
-    else:
-        # TODO support for mustache and jinja2
-        raise NotImplementedError(template_format)
-
-    return formatted_prompt
-
-
-@trace_method
-async def compile_system_message_async(
-    system_prompt: str,
-    in_context_memory: Memory,
-    in_context_memory_last_edit: datetime,  # TODO move this inside of BaseMemory?
-    timezone: str,
-    user_defined_variables: Optional[dict] = None,
-    append_icm_if_missing: bool = True,
-    template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
-    previous_message_count: int = 0,
-    archival_memory_size: int = 0,
-    tool_rules_solver: Optional[ToolRulesSolver] = None,
-    sources: Optional[List] = None,
-    max_files_open: Optional[int] = None,
-) -> str:
-    """Prepare the final/full system message that will be fed into the LLM API
-
-    The base system message may be templated, in which case we need to render the variables.
-
-    The following are reserved variables:
-      - CORE_MEMORY: the in-context memory of the LLM
-    """
-
-    # Add tool rule constraints if available
-    tool_constraint_block = None
-    if tool_rules_solver is not None:
-        tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
-
-    if user_defined_variables is not None:
-        # TODO eventually support the user defining their own variables to inject
-        raise NotImplementedError
-    else:
-        variables = {}
-
-    # Add the protected memory variable
-    if IN_CONTEXT_MEMORY_KEYWORD in variables:
-        raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
-    else:
-        # TODO should this all put into the memory.__repr__ function?
-        memory_metadata_string = compile_memory_metadata_block(
-            memory_edit_timestamp=in_context_memory_last_edit,
-            previous_message_count=previous_message_count,
-            archival_memory_size=archival_memory_size,
-            timezone=timezone,
-        )
-
-        memory_with_sources = await in_context_memory.compile_in_thread_async(
-            tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
-        )
-        full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
-
-        # Add to the variables list to inject
-        variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
-
-    if template_format == "f-string":
-        memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
-
-        # Catch the special case where the system prompt is unformatted
-        if append_icm_if_missing:
-            if memory_variable_string not in system_prompt:
-                # In this case, append it to the end to make sure memory is still injected
-                # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
-                system_prompt += "\n\n" + memory_variable_string
-
-        # render the variables using the built-in templater
-        try:
-            if user_defined_variables:
-                formatted_prompt = safe_format(system_prompt, variables)
-            else:
-                formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
-        except Exception as e:
-            raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
-
-    else:
-        # TODO support for mustache and jinja2
-        raise NotImplementedError(template_format)
-
-    return formatted_prompt
-
-
@trace_method
 def initialize_message_sequence(
    agent_state: AgentState,
@@ -601,7 +400,7 @@ async def initialize_message_sequence_async(
    if memory_edit_timestamp is None:
        memory_edit_timestamp = get_local_time()

-    full_system_message = await compile_system_message_async(
+    full_system_message = await PromptGenerator.compile_system_message_async(
        system_prompt=agent_state.system,
        in_context_memory=agent_state.memory,
        in_context_memory_last_edit=memory_edit_timestamp,