diff --git a/letta/agent.py b/letta/agent.py index 7aec2469..4a9127c8 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -42,6 +42,7 @@ from letta.log import get_logger from letta.memory import summarize_messages from letta.orm import User from letta.otel.tracing import log_event, trace_method +from letta.prompts.prompt_generator import PromptGenerator from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type from letta.schemas.block import BlockUpdate from letta.schemas.embedding_config import EmbeddingConfig @@ -59,7 +60,7 @@ from letta.schemas.tool_rule import TerminalToolRule from letta.schemas.usage import LettaUsageStatistics from letta.services.agent_manager import AgentManager from letta.services.block_manager import BlockManager -from letta.services.helpers.agent_manager_helper import check_supports_structured_output, compile_memory_metadata_block +from letta.services.helpers.agent_manager_helper import check_supports_structured_output from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema from letta.services.job_manager import JobManager from letta.services.mcp.base_client import AsyncBaseMCPClient @@ -1246,7 +1247,7 @@ class Agent(BaseAgent): agent_manager_passage_size = self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id) message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id) - external_memory_summary = compile_memory_metadata_block( + external_memory_summary = PromptGenerator.compile_memory_metadata_block( memory_edit_timestamp=get_utc_time(), timezone=self.agent_state.timezone, previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id), diff --git a/letta/agents/base_agent.py b/letta/agents/base_agent.py index d351eb10..3355076b 100644 --- a/letta/agents/base_agent.py +++ b/letta/agents/base_agent.py @@ -7,6 +7,7 @@ from letta.constants import DEFAULT_MAX_STEPS from letta.helpers import ToolRulesSolver from letta.helpers.datetime_helpers import get_utc_time from letta.log import get_logger +from letta.prompts.prompt_generator import PromptGenerator from letta.schemas.agent import AgentState from letta.schemas.enums import MessageStreamStatus from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage @@ -17,7 +18,6 @@ from letta.schemas.message import Message, MessageCreate, MessageUpdate from letta.schemas.usage import LettaUsageStatistics from letta.schemas.user import User from letta.services.agent_manager import AgentManager -from letta.services.helpers.agent_manager_helper import get_system_message_from_compiled_memory from letta.services.message_manager import MessageManager from letta.services.passage_manager import PassageManager from letta.utils import united_diff @@ -142,7 +142,7 @@ class BaseAgent(ABC): if num_archival_memories is None: num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id) - new_system_message_str = get_system_message_from_compiled_memory( + new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory( system_prompt=agent_state.system, memory_with_sources=curr_memory_str, in_context_memory_last_edit=memory_edit_timestamp, diff --git a/letta/agents/voice_agent.py b/letta/agents/voice_agent.py index 69ce9150..3b00dfae 100644 --- a/letta/agents/voice_agent.py +++ b/letta/agents/voice_agent.py @@ -13,6 +13,7 @@ from letta.helpers.datetime_helpers import get_utc_time from letta.helpers.tool_execution_helper import add_pre_execution_message, enable_strict_mode, remove_request_heartbeat from letta.interfaces.openai_chat_completions_streaming_interface import OpenAIChatCompletionsStreamingInterface from letta.log import get_logger +from letta.prompts.prompt_generator import PromptGenerator from letta.schemas.agent import AgentState, AgentType from letta.schemas.enums import MessageRole, ToolType from letta.schemas.letta_response import LettaResponse @@ -35,7 +36,6 @@ from letta.server.rest_api.utils import ( ) from letta.services.agent_manager import AgentManager from letta.services.block_manager import BlockManager -from letta.services.helpers.agent_manager_helper import compile_system_message_async from letta.services.job_manager import JobManager from letta.services.message_manager import MessageManager from letta.services.passage_manager import PassageManager @@ -144,7 +144,7 @@ class VoiceAgent(BaseAgent): in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=self.actor) memory_edit_timestamp = get_utc_time() - in_context_messages[0].content[0].text = await compile_system_message_async( + in_context_messages[0].content[0].text = await PromptGenerator.compile_system_message_async( system_prompt=agent_state.system, in_context_memory=agent_state.memory, in_context_memory_last_edit=memory_edit_timestamp, diff --git a/letta/prompts/prompt_generator.py b/letta/prompts/prompt_generator.py new file mode 100644 index 00000000..ffb36b05 --- /dev/null +++ b/letta/prompts/prompt_generator.py @@ -0,0 +1,190 @@ +from datetime import datetime +from typing import List, Literal, Optional + +from letta.constants import IN_CONTEXT_MEMORY_KEYWORD +from letta.helpers import ToolRulesSolver +from letta.helpers.datetime_helpers import format_datetime, get_local_time_fast +from letta.otel.tracing import trace_method +from letta.schemas.memory import Memory + + +class PromptGenerator: + + # TODO: This code is kind of wonky and deserves a rewrite + @trace_method + @staticmethod + def compile_memory_metadata_block( + memory_edit_timestamp: datetime, + timezone: str, + previous_message_count: int = 0, + archival_memory_size: Optional[int] = 0, + ) -> str: + """ + Generate a memory metadata block for the agent's system prompt. + + This creates a structured metadata section that informs the agent about + the current state of its memory systems, including timing information + and memory counts. This helps the agent understand what information + is available through its tools. + + Args: + memory_edit_timestamp: When memory blocks were last modified + timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles') + previous_message_count: Number of messages in recall memory (conversation history) + archival_memory_size: Number of items in archival memory (long-term storage) + + Returns: + A formatted string containing the memory metadata block with XML-style tags + + Example Output: + + - The current time is: 2024-01-15 10:30 AM PST + - Memory blocks were last modified: 2024-01-15 09:00 AM PST + - 42 previous messages between you and the user are stored in recall memory (use tools to access them) + - 156 total memories you created are stored in archival memory (use tools to access them) + + """ + # Put the timestamp in the local timezone (mimicking get_local_time()) + timestamp_str = format_datetime(memory_edit_timestamp, timezone) + + # Create a metadata block of info so the agent knows about the metadata of out-of-context memories + metadata_lines = [ + "", + f"- The current time is: {get_local_time_fast(timezone)}", + f"- Memory blocks were last modified: {timestamp_str}", + f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)", + ] + + # Only include archival memory line if there are archival memories + if archival_memory_size is not None and archival_memory_size > 0: + metadata_lines.append( + f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)" + ) + + metadata_lines.append("") + memory_metadata_block = "\n".join(metadata_lines) + return memory_metadata_block + + @staticmethod + def safe_format(template: str, variables: dict) -> str: + """ + Safely formats a template string, preserving empty {} and {unknown_vars} + while substituting known variables. + + If we simply use {} in format_map, it'll be treated as a positional field + """ + # First escape any empty {} by doubling them + escaped = template.replace("{}", "{{}}") + + # Now use format_map with our custom mapping + return escaped.format_map(PreserveMapping(variables)) + + @trace_method + @staticmethod + def get_system_message_from_compiled_memory( + system_prompt: str, + memory_with_sources: str, + in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory? + timezone: str, + user_defined_variables: Optional[dict] = None, + append_icm_if_missing: bool = True, + template_format: Literal["f-string", "mustache", "jinja2"] = "f-string", + previous_message_count: int = 0, + archival_memory_size: int = 0, + ) -> str: + """Prepare the final/full system message that will be fed into the LLM API + + The base system message may be templated, in which case we need to render the variables. + + The following are reserved variables: + - CORE_MEMORY: the in-context memory of the LLM + """ + if user_defined_variables is not None: + # TODO eventually support the user defining their own variables to inject + raise NotImplementedError + else: + variables = {} + + # Add the protected memory variable + if IN_CONTEXT_MEMORY_KEYWORD in variables: + raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}") + else: + # TODO should this all put into the memory.__repr__ function? + memory_metadata_string = PromptGenerator.compile_memory_metadata_block( + memory_edit_timestamp=in_context_memory_last_edit, + previous_message_count=previous_message_count, + archival_memory_size=archival_memory_size, + timezone=timezone, + ) + + full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string + + # Add to the variables list to inject + variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string + + if template_format == "f-string": + memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}" + + # Catch the special case where the system prompt is unformatted + if append_icm_if_missing: + if memory_variable_string not in system_prompt: + # In this case, append it to the end to make sure memory is still injected + # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead") + system_prompt += "\n\n" + memory_variable_string + + # render the variables using the built-in templater + try: + if user_defined_variables: + formatted_prompt = PromptGenerator.safe_format(system_prompt, variables) + else: + formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string) + except Exception as e: + raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}") + + else: + # TODO support for mustache and jinja2 + raise NotImplementedError(template_format) + + return formatted_prompt + + @trace_method + @staticmethod + async def compile_system_message_async( + system_prompt: str, + in_context_memory: Memory, + in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory? + timezone: str, + user_defined_variables: Optional[dict] = None, + append_icm_if_missing: bool = True, + template_format: Literal["f-string", "mustache", "jinja2"] = "f-string", + previous_message_count: int = 0, + archival_memory_size: int = 0, + tool_rules_solver: Optional[ToolRulesSolver] = None, + sources: Optional[List] = None, + max_files_open: Optional[int] = None, + ) -> str: + tool_constraint_block = None + if tool_rules_solver is not None: + tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts() + + if user_defined_variables is not None: + # TODO eventually support the user defining their own variables to inject + raise NotImplementedError + else: + pass + + memory_with_sources = await in_context_memory.compile_in_thread_async( + tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open + ) + + return PromptGenerator.get_system_message_from_compiled_memory( + system_prompt=system_prompt, + memory_with_sources=memory_with_sources, + in_context_memory_last_edit=in_context_memory_last_edit, + timezone=timezone, + user_defined_variables=user_defined_variables, + append_icm_if_missing=append_icm_if_missing, + template_format=template_format, + previous_message_count=previous_message_count, + archival_memory_size=archival_memory_size, + ) diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py index 9a6ebc42..4d18f7cd 100644 --- a/letta/services/agent_manager.py +++ b/letta/services/agent_manager.py @@ -42,6 +42,7 @@ from letta.orm.sandbox_config import AgentEnvironmentVariable from letta.orm.sandbox_config import AgentEnvironmentVariable as AgentEnvironmentVariableModel from letta.orm.sqlalchemy_base import AccessType from letta.otel.tracing import trace_method +from letta.prompts.prompt_generator import PromptGenerator from letta.schemas.agent import AgentState as PydanticAgentState from letta.schemas.agent import AgentType, CreateAgent, UpdateAgent, get_prompt_template_for_agent_type from letta.schemas.block import DEFAULT_BLOCKS @@ -89,7 +90,6 @@ from letta.services.helpers.agent_manager_helper import ( check_supports_structured_output, compile_system_message, derive_system_message, - get_system_message_from_compiled_memory, initialize_message_sequence, initialize_message_sequence_async, package_initial_message_sequence, @@ -1783,7 +1783,7 @@ class AgentManager: # update memory (TODO: potentially update recall/archival stats separately) - new_system_message_str = get_system_message_from_compiled_memory( + new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory( system_prompt=agent_state.system, memory_with_sources=curr_memory_str, in_context_memory_last_edit=memory_edit_timestamp, diff --git a/letta/services/helpers/agent_manager_helper.py b/letta/services/helpers/agent_manager_helper.py index ad0caef8..da660e96 100644 --- a/letta/services/helpers/agent_manager_helper.py +++ b/letta/services/helpers/agent_manager_helper.py @@ -21,7 +21,7 @@ from letta.constants import ( STRUCTURED_OUTPUT_MODELS, ) from letta.helpers import ToolRulesSolver -from letta.helpers.datetime_helpers import format_datetime, get_local_time, get_local_time_fast +from letta.helpers.datetime_helpers import get_local_time from letta.llm_api.llm_client import LLMClient from letta.orm.agent import Agent as AgentModel from letta.orm.agents_tags import AgentsTags @@ -33,6 +33,7 @@ from letta.orm.sources_agents import SourcesAgents from letta.orm.sqlite_functions import adapt_array from letta.otel.tracing import trace_method from letta.prompts import gpt_system +from letta.prompts.prompt_generator import PromptGenerator from letta.schemas.agent import AgentState, AgentType from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.enums import MessageRole @@ -217,60 +218,6 @@ def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool return system -# TODO: This code is kind of wonky and deserves a rewrite -def compile_memory_metadata_block( - memory_edit_timestamp: datetime, - timezone: str, - previous_message_count: int = 0, - archival_memory_size: Optional[int] = 0, -) -> str: - """ - Generate a memory metadata block for the agent's system prompt. - - This creates a structured metadata section that informs the agent about - the current state of its memory systems, including timing information - and memory counts. This helps the agent understand what information - is available through its tools. - - Args: - memory_edit_timestamp: When memory blocks were last modified - timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles') - previous_message_count: Number of messages in recall memory (conversation history) - archival_memory_size: Number of items in archival memory (long-term storage) - - Returns: - A formatted string containing the memory metadata block with XML-style tags - - Example Output: - - - The current time is: 2024-01-15 10:30 AM PST - - Memory blocks were last modified: 2024-01-15 09:00 AM PST - - 42 previous messages between you and the user are stored in recall memory (use tools to access them) - - 156 total memories you created are stored in archival memory (use tools to access them) - - """ - # Put the timestamp in the local timezone (mimicking get_local_time()) - timestamp_str = format_datetime(memory_edit_timestamp, timezone) - - # Create a metadata block of info so the agent knows about the metadata of out-of-context memories - metadata_lines = [ - "", - f"- The current time is: {get_local_time_fast(timezone)}", - f"- Memory blocks were last modified: {timestamp_str}", - f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)", - ] - - # Only include archival memory line if there are archival memories - if archival_memory_size is not None and archival_memory_size > 0: - metadata_lines.append( - f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)" - ) - - metadata_lines.append("") - memory_metadata_block = "\n".join(metadata_lines) - return memory_metadata_block - - class PreserveMapping(dict): """Used to preserve (do not modify) undefined variables in the system prompt""" @@ -331,7 +278,7 @@ def compile_system_message( raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}") else: # TODO should this all put into the memory.__repr__ function? - memory_metadata_string = compile_memory_metadata_block( + memory_metadata_string = PromptGenerator.compile_memory_metadata_block( memory_edit_timestamp=in_context_memory_last_edit, previous_message_count=previous_message_count, archival_memory_size=archival_memory_size, @@ -372,154 +319,6 @@ def compile_system_message( return formatted_prompt -@trace_method -def get_system_message_from_compiled_memory( - system_prompt: str, - memory_with_sources: str, - in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory? - timezone: str, - user_defined_variables: Optional[dict] = None, - append_icm_if_missing: bool = True, - template_format: Literal["f-string", "mustache", "jinja2"] = "f-string", - previous_message_count: int = 0, - archival_memory_size: int = 0, -) -> str: - """Prepare the final/full system message that will be fed into the LLM API - - The base system message may be templated, in which case we need to render the variables. - - The following are reserved variables: - - CORE_MEMORY: the in-context memory of the LLM - """ - if user_defined_variables is not None: - # TODO eventually support the user defining their own variables to inject - raise NotImplementedError - else: - variables = {} - - # Add the protected memory variable - if IN_CONTEXT_MEMORY_KEYWORD in variables: - raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}") - else: - # TODO should this all put into the memory.__repr__ function? - memory_metadata_string = compile_memory_metadata_block( - memory_edit_timestamp=in_context_memory_last_edit, - previous_message_count=previous_message_count, - archival_memory_size=archival_memory_size, - timezone=timezone, - ) - - full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string - - # Add to the variables list to inject - variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string - - if template_format == "f-string": - memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}" - - # Catch the special case where the system prompt is unformatted - if append_icm_if_missing: - if memory_variable_string not in system_prompt: - # In this case, append it to the end to make sure memory is still injected - # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead") - system_prompt += "\n\n" + memory_variable_string - - # render the variables using the built-in templater - try: - if user_defined_variables: - formatted_prompt = safe_format(system_prompt, variables) - else: - formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string) - except Exception as e: - raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}") - - else: - # TODO support for mustache and jinja2 - raise NotImplementedError(template_format) - - return formatted_prompt - - -@trace_method -async def compile_system_message_async( - system_prompt: str, - in_context_memory: Memory, - in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory? - timezone: str, - user_defined_variables: Optional[dict] = None, - append_icm_if_missing: bool = True, - template_format: Literal["f-string", "mustache", "jinja2"] = "f-string", - previous_message_count: int = 0, - archival_memory_size: int = 0, - tool_rules_solver: Optional[ToolRulesSolver] = None, - sources: Optional[List] = None, - max_files_open: Optional[int] = None, -) -> str: - """Prepare the final/full system message that will be fed into the LLM API - - The base system message may be templated, in which case we need to render the variables. - - The following are reserved variables: - - CORE_MEMORY: the in-context memory of the LLM - """ - - # Add tool rule constraints if available - tool_constraint_block = None - if tool_rules_solver is not None: - tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts() - - if user_defined_variables is not None: - # TODO eventually support the user defining their own variables to inject - raise NotImplementedError - else: - variables = {} - - # Add the protected memory variable - if IN_CONTEXT_MEMORY_KEYWORD in variables: - raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}") - else: - # TODO should this all put into the memory.__repr__ function? - memory_metadata_string = compile_memory_metadata_block( - memory_edit_timestamp=in_context_memory_last_edit, - previous_message_count=previous_message_count, - archival_memory_size=archival_memory_size, - timezone=timezone, - ) - - memory_with_sources = await in_context_memory.compile_in_thread_async( - tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open - ) - full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string - - # Add to the variables list to inject - variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string - - if template_format == "f-string": - memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}" - - # Catch the special case where the system prompt is unformatted - if append_icm_if_missing: - if memory_variable_string not in system_prompt: - # In this case, append it to the end to make sure memory is still injected - # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead") - system_prompt += "\n\n" + memory_variable_string - - # render the variables using the built-in templater - try: - if user_defined_variables: - formatted_prompt = safe_format(system_prompt, variables) - else: - formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string) - except Exception as e: - raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}") - - else: - # TODO support for mustache and jinja2 - raise NotImplementedError(template_format) - - return formatted_prompt - - @trace_method def initialize_message_sequence( agent_state: AgentState, @@ -601,7 +400,7 @@ async def initialize_message_sequence_async( if memory_edit_timestamp is None: memory_edit_timestamp = get_local_time() - full_system_message = await compile_system_message_async( + full_system_message = await PromptGenerator.compile_system_message_async( system_prompt=agent_state.system, in_context_memory=agent_state.memory, in_context_memory_last_edit=memory_edit_timestamp,