feat: add prompt generator for on the fly system prompt generation (#4060)
This commit is contained in:
@@ -42,6 +42,7 @@ from letta.log import get_logger
|
||||
from letta.memory import summarize_messages
|
||||
from letta.orm import User
|
||||
from letta.otel.tracing import log_event, trace_method
|
||||
from letta.prompts.prompt_generator import PromptGenerator
|
||||
from letta.schemas.agent import AgentState, AgentStepResponse, UpdateAgent, get_prompt_template_for_agent_type
|
||||
from letta.schemas.block import BlockUpdate
|
||||
from letta.schemas.embedding_config import EmbeddingConfig
|
||||
@@ -59,7 +60,7 @@ from letta.schemas.tool_rule import TerminalToolRule
|
||||
from letta.schemas.usage import LettaUsageStatistics
|
||||
from letta.services.agent_manager import AgentManager
|
||||
from letta.services.block_manager import BlockManager
|
||||
from letta.services.helpers.agent_manager_helper import check_supports_structured_output, compile_memory_metadata_block
|
||||
from letta.services.helpers.agent_manager_helper import check_supports_structured_output
|
||||
from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
|
||||
from letta.services.job_manager import JobManager
|
||||
from letta.services.mcp.base_client import AsyncBaseMCPClient
|
||||
@@ -1246,7 +1247,7 @@ class Agent(BaseAgent):
|
||||
|
||||
agent_manager_passage_size = self.agent_manager.passage_size(actor=self.user, agent_id=self.agent_state.id)
|
||||
message_manager_size = self.message_manager.size(actor=self.user, agent_id=self.agent_state.id)
|
||||
external_memory_summary = compile_memory_metadata_block(
|
||||
external_memory_summary = PromptGenerator.compile_memory_metadata_block(
|
||||
memory_edit_timestamp=get_utc_time(),
|
||||
timezone=self.agent_state.timezone,
|
||||
previous_message_count=self.message_manager.size(actor=self.user, agent_id=self.agent_state.id),
|
||||
|
||||
@@ -7,6 +7,7 @@ from letta.constants import DEFAULT_MAX_STEPS
|
||||
from letta.helpers import ToolRulesSolver
|
||||
from letta.helpers.datetime_helpers import get_utc_time
|
||||
from letta.log import get_logger
|
||||
from letta.prompts.prompt_generator import PromptGenerator
|
||||
from letta.schemas.agent import AgentState
|
||||
from letta.schemas.enums import MessageStreamStatus
|
||||
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
|
||||
@@ -17,7 +18,6 @@ from letta.schemas.message import Message, MessageCreate, MessageUpdate
|
||||
from letta.schemas.usage import LettaUsageStatistics
|
||||
from letta.schemas.user import User
|
||||
from letta.services.agent_manager import AgentManager
|
||||
from letta.services.helpers.agent_manager_helper import get_system_message_from_compiled_memory
|
||||
from letta.services.message_manager import MessageManager
|
||||
from letta.services.passage_manager import PassageManager
|
||||
from letta.utils import united_diff
|
||||
@@ -142,7 +142,7 @@ class BaseAgent(ABC):
|
||||
if num_archival_memories is None:
|
||||
num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
|
||||
|
||||
new_system_message_str = get_system_message_from_compiled_memory(
|
||||
new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
|
||||
system_prompt=agent_state.system,
|
||||
memory_with_sources=curr_memory_str,
|
||||
in_context_memory_last_edit=memory_edit_timestamp,
|
||||
|
||||
@@ -13,6 +13,7 @@ from letta.helpers.datetime_helpers import get_utc_time
|
||||
from letta.helpers.tool_execution_helper import add_pre_execution_message, enable_strict_mode, remove_request_heartbeat
|
||||
from letta.interfaces.openai_chat_completions_streaming_interface import OpenAIChatCompletionsStreamingInterface
|
||||
from letta.log import get_logger
|
||||
from letta.prompts.prompt_generator import PromptGenerator
|
||||
from letta.schemas.agent import AgentState, AgentType
|
||||
from letta.schemas.enums import MessageRole, ToolType
|
||||
from letta.schemas.letta_response import LettaResponse
|
||||
@@ -35,7 +36,6 @@ from letta.server.rest_api.utils import (
|
||||
)
|
||||
from letta.services.agent_manager import AgentManager
|
||||
from letta.services.block_manager import BlockManager
|
||||
from letta.services.helpers.agent_manager_helper import compile_system_message_async
|
||||
from letta.services.job_manager import JobManager
|
||||
from letta.services.message_manager import MessageManager
|
||||
from letta.services.passage_manager import PassageManager
|
||||
@@ -144,7 +144,7 @@ class VoiceAgent(BaseAgent):
|
||||
|
||||
in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=agent_state.message_ids, actor=self.actor)
|
||||
memory_edit_timestamp = get_utc_time()
|
||||
in_context_messages[0].content[0].text = await compile_system_message_async(
|
||||
in_context_messages[0].content[0].text = await PromptGenerator.compile_system_message_async(
|
||||
system_prompt=agent_state.system,
|
||||
in_context_memory=agent_state.memory,
|
||||
in_context_memory_last_edit=memory_edit_timestamp,
|
||||
|
||||
190
letta/prompts/prompt_generator.py
Normal file
190
letta/prompts/prompt_generator.py
Normal file
@@ -0,0 +1,190 @@
|
||||
from datetime import datetime
|
||||
from typing import List, Literal, Optional
|
||||
|
||||
from letta.constants import IN_CONTEXT_MEMORY_KEYWORD
|
||||
from letta.helpers import ToolRulesSolver
|
||||
from letta.helpers.datetime_helpers import format_datetime, get_local_time_fast
|
||||
from letta.otel.tracing import trace_method
|
||||
from letta.schemas.memory import Memory
|
||||
|
||||
|
||||
class PromptGenerator:
|
||||
|
||||
# TODO: This code is kind of wonky and deserves a rewrite
|
||||
@trace_method
|
||||
@staticmethod
|
||||
def compile_memory_metadata_block(
|
||||
memory_edit_timestamp: datetime,
|
||||
timezone: str,
|
||||
previous_message_count: int = 0,
|
||||
archival_memory_size: Optional[int] = 0,
|
||||
) -> str:
|
||||
"""
|
||||
Generate a memory metadata block for the agent's system prompt.
|
||||
|
||||
This creates a structured metadata section that informs the agent about
|
||||
the current state of its memory systems, including timing information
|
||||
and memory counts. This helps the agent understand what information
|
||||
is available through its tools.
|
||||
|
||||
Args:
|
||||
memory_edit_timestamp: When memory blocks were last modified
|
||||
timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
|
||||
previous_message_count: Number of messages in recall memory (conversation history)
|
||||
archival_memory_size: Number of items in archival memory (long-term storage)
|
||||
|
||||
Returns:
|
||||
A formatted string containing the memory metadata block with XML-style tags
|
||||
|
||||
Example Output:
|
||||
<memory_metadata>
|
||||
- The current time is: 2024-01-15 10:30 AM PST
|
||||
- Memory blocks were last modified: 2024-01-15 09:00 AM PST
|
||||
- 42 previous messages between you and the user are stored in recall memory (use tools to access them)
|
||||
- 156 total memories you created are stored in archival memory (use tools to access them)
|
||||
</memory_metadata>
|
||||
"""
|
||||
# Put the timestamp in the local timezone (mimicking get_local_time())
|
||||
timestamp_str = format_datetime(memory_edit_timestamp, timezone)
|
||||
|
||||
# Create a metadata block of info so the agent knows about the metadata of out-of-context memories
|
||||
metadata_lines = [
|
||||
"<memory_metadata>",
|
||||
f"- The current time is: {get_local_time_fast(timezone)}",
|
||||
f"- Memory blocks were last modified: {timestamp_str}",
|
||||
f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
|
||||
]
|
||||
|
||||
# Only include archival memory line if there are archival memories
|
||||
if archival_memory_size is not None and archival_memory_size > 0:
|
||||
metadata_lines.append(
|
||||
f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
|
||||
)
|
||||
|
||||
metadata_lines.append("</memory_metadata>")
|
||||
memory_metadata_block = "\n".join(metadata_lines)
|
||||
return memory_metadata_block
|
||||
|
||||
@staticmethod
|
||||
def safe_format(template: str, variables: dict) -> str:
|
||||
"""
|
||||
Safely formats a template string, preserving empty {} and {unknown_vars}
|
||||
while substituting known variables.
|
||||
|
||||
If we simply use {} in format_map, it'll be treated as a positional field
|
||||
"""
|
||||
# First escape any empty {} by doubling them
|
||||
escaped = template.replace("{}", "{{}}")
|
||||
|
||||
# Now use format_map with our custom mapping
|
||||
return escaped.format_map(PreserveMapping(variables))
|
||||
|
||||
@trace_method
|
||||
@staticmethod
|
||||
def get_system_message_from_compiled_memory(
|
||||
system_prompt: str,
|
||||
memory_with_sources: str,
|
||||
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
|
||||
timezone: str,
|
||||
user_defined_variables: Optional[dict] = None,
|
||||
append_icm_if_missing: bool = True,
|
||||
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
|
||||
previous_message_count: int = 0,
|
||||
archival_memory_size: int = 0,
|
||||
) -> str:
|
||||
"""Prepare the final/full system message that will be fed into the LLM API
|
||||
|
||||
The base system message may be templated, in which case we need to render the variables.
|
||||
|
||||
The following are reserved variables:
|
||||
- CORE_MEMORY: the in-context memory of the LLM
|
||||
"""
|
||||
if user_defined_variables is not None:
|
||||
# TODO eventually support the user defining their own variables to inject
|
||||
raise NotImplementedError
|
||||
else:
|
||||
variables = {}
|
||||
|
||||
# Add the protected memory variable
|
||||
if IN_CONTEXT_MEMORY_KEYWORD in variables:
|
||||
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
|
||||
else:
|
||||
# TODO should this all put into the memory.__repr__ function?
|
||||
memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
|
||||
memory_edit_timestamp=in_context_memory_last_edit,
|
||||
previous_message_count=previous_message_count,
|
||||
archival_memory_size=archival_memory_size,
|
||||
timezone=timezone,
|
||||
)
|
||||
|
||||
full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
|
||||
|
||||
# Add to the variables list to inject
|
||||
variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
|
||||
|
||||
if template_format == "f-string":
|
||||
memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
|
||||
|
||||
# Catch the special case where the system prompt is unformatted
|
||||
if append_icm_if_missing:
|
||||
if memory_variable_string not in system_prompt:
|
||||
# In this case, append it to the end to make sure memory is still injected
|
||||
# warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
|
||||
system_prompt += "\n\n" + memory_variable_string
|
||||
|
||||
# render the variables using the built-in templater
|
||||
try:
|
||||
if user_defined_variables:
|
||||
formatted_prompt = PromptGenerator.safe_format(system_prompt, variables)
|
||||
else:
|
||||
formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
|
||||
|
||||
else:
|
||||
# TODO support for mustache and jinja2
|
||||
raise NotImplementedError(template_format)
|
||||
|
||||
return formatted_prompt
|
||||
|
||||
@trace_method
|
||||
@staticmethod
|
||||
async def compile_system_message_async(
|
||||
system_prompt: str,
|
||||
in_context_memory: Memory,
|
||||
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
|
||||
timezone: str,
|
||||
user_defined_variables: Optional[dict] = None,
|
||||
append_icm_if_missing: bool = True,
|
||||
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
|
||||
previous_message_count: int = 0,
|
||||
archival_memory_size: int = 0,
|
||||
tool_rules_solver: Optional[ToolRulesSolver] = None,
|
||||
sources: Optional[List] = None,
|
||||
max_files_open: Optional[int] = None,
|
||||
) -> str:
|
||||
tool_constraint_block = None
|
||||
if tool_rules_solver is not None:
|
||||
tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
|
||||
|
||||
if user_defined_variables is not None:
|
||||
# TODO eventually support the user defining their own variables to inject
|
||||
raise NotImplementedError
|
||||
else:
|
||||
pass
|
||||
|
||||
memory_with_sources = await in_context_memory.compile_in_thread_async(
|
||||
tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
|
||||
)
|
||||
|
||||
return PromptGenerator.get_system_message_from_compiled_memory(
|
||||
system_prompt=system_prompt,
|
||||
memory_with_sources=memory_with_sources,
|
||||
in_context_memory_last_edit=in_context_memory_last_edit,
|
||||
timezone=timezone,
|
||||
user_defined_variables=user_defined_variables,
|
||||
append_icm_if_missing=append_icm_if_missing,
|
||||
template_format=template_format,
|
||||
previous_message_count=previous_message_count,
|
||||
archival_memory_size=archival_memory_size,
|
||||
)
|
||||
@@ -42,6 +42,7 @@ from letta.orm.sandbox_config import AgentEnvironmentVariable
|
||||
from letta.orm.sandbox_config import AgentEnvironmentVariable as AgentEnvironmentVariableModel
|
||||
from letta.orm.sqlalchemy_base import AccessType
|
||||
from letta.otel.tracing import trace_method
|
||||
from letta.prompts.prompt_generator import PromptGenerator
|
||||
from letta.schemas.agent import AgentState as PydanticAgentState
|
||||
from letta.schemas.agent import AgentType, CreateAgent, UpdateAgent, get_prompt_template_for_agent_type
|
||||
from letta.schemas.block import DEFAULT_BLOCKS
|
||||
@@ -89,7 +90,6 @@ from letta.services.helpers.agent_manager_helper import (
|
||||
check_supports_structured_output,
|
||||
compile_system_message,
|
||||
derive_system_message,
|
||||
get_system_message_from_compiled_memory,
|
||||
initialize_message_sequence,
|
||||
initialize_message_sequence_async,
|
||||
package_initial_message_sequence,
|
||||
@@ -1783,7 +1783,7 @@ class AgentManager:
|
||||
|
||||
# update memory (TODO: potentially update recall/archival stats separately)
|
||||
|
||||
new_system_message_str = get_system_message_from_compiled_memory(
|
||||
new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
|
||||
system_prompt=agent_state.system,
|
||||
memory_with_sources=curr_memory_str,
|
||||
in_context_memory_last_edit=memory_edit_timestamp,
|
||||
|
||||
@@ -21,7 +21,7 @@ from letta.constants import (
|
||||
STRUCTURED_OUTPUT_MODELS,
|
||||
)
|
||||
from letta.helpers import ToolRulesSolver
|
||||
from letta.helpers.datetime_helpers import format_datetime, get_local_time, get_local_time_fast
|
||||
from letta.helpers.datetime_helpers import get_local_time
|
||||
from letta.llm_api.llm_client import LLMClient
|
||||
from letta.orm.agent import Agent as AgentModel
|
||||
from letta.orm.agents_tags import AgentsTags
|
||||
@@ -33,6 +33,7 @@ from letta.orm.sources_agents import SourcesAgents
|
||||
from letta.orm.sqlite_functions import adapt_array
|
||||
from letta.otel.tracing import trace_method
|
||||
from letta.prompts import gpt_system
|
||||
from letta.prompts.prompt_generator import PromptGenerator
|
||||
from letta.schemas.agent import AgentState, AgentType
|
||||
from letta.schemas.embedding_config import EmbeddingConfig
|
||||
from letta.schemas.enums import MessageRole
|
||||
@@ -217,60 +218,6 @@ def derive_system_message(agent_type: AgentType, enable_sleeptime: Optional[bool
|
||||
return system
|
||||
|
||||
|
||||
# TODO: This code is kind of wonky and deserves a rewrite
|
||||
def compile_memory_metadata_block(
|
||||
memory_edit_timestamp: datetime,
|
||||
timezone: str,
|
||||
previous_message_count: int = 0,
|
||||
archival_memory_size: Optional[int] = 0,
|
||||
) -> str:
|
||||
"""
|
||||
Generate a memory metadata block for the agent's system prompt.
|
||||
|
||||
This creates a structured metadata section that informs the agent about
|
||||
the current state of its memory systems, including timing information
|
||||
and memory counts. This helps the agent understand what information
|
||||
is available through its tools.
|
||||
|
||||
Args:
|
||||
memory_edit_timestamp: When memory blocks were last modified
|
||||
timezone: The timezone to use for formatting timestamps (e.g., 'America/Los_Angeles')
|
||||
previous_message_count: Number of messages in recall memory (conversation history)
|
||||
archival_memory_size: Number of items in archival memory (long-term storage)
|
||||
|
||||
Returns:
|
||||
A formatted string containing the memory metadata block with XML-style tags
|
||||
|
||||
Example Output:
|
||||
<memory_metadata>
|
||||
- The current time is: 2024-01-15 10:30 AM PST
|
||||
- Memory blocks were last modified: 2024-01-15 09:00 AM PST
|
||||
- 42 previous messages between you and the user are stored in recall memory (use tools to access them)
|
||||
- 156 total memories you created are stored in archival memory (use tools to access them)
|
||||
</memory_metadata>
|
||||
"""
|
||||
# Put the timestamp in the local timezone (mimicking get_local_time())
|
||||
timestamp_str = format_datetime(memory_edit_timestamp, timezone)
|
||||
|
||||
# Create a metadata block of info so the agent knows about the metadata of out-of-context memories
|
||||
metadata_lines = [
|
||||
"<memory_metadata>",
|
||||
f"- The current time is: {get_local_time_fast(timezone)}",
|
||||
f"- Memory blocks were last modified: {timestamp_str}",
|
||||
f"- {previous_message_count} previous messages between you and the user are stored in recall memory (use tools to access them)",
|
||||
]
|
||||
|
||||
# Only include archival memory line if there are archival memories
|
||||
if archival_memory_size is not None and archival_memory_size > 0:
|
||||
metadata_lines.append(
|
||||
f"- {archival_memory_size} total memories you created are stored in archival memory (use tools to access them)"
|
||||
)
|
||||
|
||||
metadata_lines.append("</memory_metadata>")
|
||||
memory_metadata_block = "\n".join(metadata_lines)
|
||||
return memory_metadata_block
|
||||
|
||||
|
||||
class PreserveMapping(dict):
|
||||
"""Used to preserve (do not modify) undefined variables in the system prompt"""
|
||||
|
||||
@@ -331,7 +278,7 @@ def compile_system_message(
|
||||
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
|
||||
else:
|
||||
# TODO should this all put into the memory.__repr__ function?
|
||||
memory_metadata_string = compile_memory_metadata_block(
|
||||
memory_metadata_string = PromptGenerator.compile_memory_metadata_block(
|
||||
memory_edit_timestamp=in_context_memory_last_edit,
|
||||
previous_message_count=previous_message_count,
|
||||
archival_memory_size=archival_memory_size,
|
||||
@@ -372,154 +319,6 @@ def compile_system_message(
|
||||
return formatted_prompt
|
||||
|
||||
|
||||
@trace_method
|
||||
def get_system_message_from_compiled_memory(
|
||||
system_prompt: str,
|
||||
memory_with_sources: str,
|
||||
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
|
||||
timezone: str,
|
||||
user_defined_variables: Optional[dict] = None,
|
||||
append_icm_if_missing: bool = True,
|
||||
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
|
||||
previous_message_count: int = 0,
|
||||
archival_memory_size: int = 0,
|
||||
) -> str:
|
||||
"""Prepare the final/full system message that will be fed into the LLM API
|
||||
|
||||
The base system message may be templated, in which case we need to render the variables.
|
||||
|
||||
The following are reserved variables:
|
||||
- CORE_MEMORY: the in-context memory of the LLM
|
||||
"""
|
||||
if user_defined_variables is not None:
|
||||
# TODO eventually support the user defining their own variables to inject
|
||||
raise NotImplementedError
|
||||
else:
|
||||
variables = {}
|
||||
|
||||
# Add the protected memory variable
|
||||
if IN_CONTEXT_MEMORY_KEYWORD in variables:
|
||||
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
|
||||
else:
|
||||
# TODO should this all put into the memory.__repr__ function?
|
||||
memory_metadata_string = compile_memory_metadata_block(
|
||||
memory_edit_timestamp=in_context_memory_last_edit,
|
||||
previous_message_count=previous_message_count,
|
||||
archival_memory_size=archival_memory_size,
|
||||
timezone=timezone,
|
||||
)
|
||||
|
||||
full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
|
||||
|
||||
# Add to the variables list to inject
|
||||
variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
|
||||
|
||||
if template_format == "f-string":
|
||||
memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
|
||||
|
||||
# Catch the special case where the system prompt is unformatted
|
||||
if append_icm_if_missing:
|
||||
if memory_variable_string not in system_prompt:
|
||||
# In this case, append it to the end to make sure memory is still injected
|
||||
# warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
|
||||
system_prompt += "\n\n" + memory_variable_string
|
||||
|
||||
# render the variables using the built-in templater
|
||||
try:
|
||||
if user_defined_variables:
|
||||
formatted_prompt = safe_format(system_prompt, variables)
|
||||
else:
|
||||
formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
|
||||
|
||||
else:
|
||||
# TODO support for mustache and jinja2
|
||||
raise NotImplementedError(template_format)
|
||||
|
||||
return formatted_prompt
|
||||
|
||||
|
||||
@trace_method
|
||||
async def compile_system_message_async(
|
||||
system_prompt: str,
|
||||
in_context_memory: Memory,
|
||||
in_context_memory_last_edit: datetime, # TODO move this inside of BaseMemory?
|
||||
timezone: str,
|
||||
user_defined_variables: Optional[dict] = None,
|
||||
append_icm_if_missing: bool = True,
|
||||
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
|
||||
previous_message_count: int = 0,
|
||||
archival_memory_size: int = 0,
|
||||
tool_rules_solver: Optional[ToolRulesSolver] = None,
|
||||
sources: Optional[List] = None,
|
||||
max_files_open: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Prepare the final/full system message that will be fed into the LLM API
|
||||
|
||||
The base system message may be templated, in which case we need to render the variables.
|
||||
|
||||
The following are reserved variables:
|
||||
- CORE_MEMORY: the in-context memory of the LLM
|
||||
"""
|
||||
|
||||
# Add tool rule constraints if available
|
||||
tool_constraint_block = None
|
||||
if tool_rules_solver is not None:
|
||||
tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
|
||||
|
||||
if user_defined_variables is not None:
|
||||
# TODO eventually support the user defining their own variables to inject
|
||||
raise NotImplementedError
|
||||
else:
|
||||
variables = {}
|
||||
|
||||
# Add the protected memory variable
|
||||
if IN_CONTEXT_MEMORY_KEYWORD in variables:
|
||||
raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
|
||||
else:
|
||||
# TODO should this all put into the memory.__repr__ function?
|
||||
memory_metadata_string = compile_memory_metadata_block(
|
||||
memory_edit_timestamp=in_context_memory_last_edit,
|
||||
previous_message_count=previous_message_count,
|
||||
archival_memory_size=archival_memory_size,
|
||||
timezone=timezone,
|
||||
)
|
||||
|
||||
memory_with_sources = await in_context_memory.compile_in_thread_async(
|
||||
tool_usage_rules=tool_constraint_block, sources=sources, max_files_open=max_files_open
|
||||
)
|
||||
full_memory_string = memory_with_sources + "\n\n" + memory_metadata_string
|
||||
|
||||
# Add to the variables list to inject
|
||||
variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
|
||||
|
||||
if template_format == "f-string":
|
||||
memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
|
||||
|
||||
# Catch the special case where the system prompt is unformatted
|
||||
if append_icm_if_missing:
|
||||
if memory_variable_string not in system_prompt:
|
||||
# In this case, append it to the end to make sure memory is still injected
|
||||
# warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
|
||||
system_prompt += "\n\n" + memory_variable_string
|
||||
|
||||
# render the variables using the built-in templater
|
||||
try:
|
||||
if user_defined_variables:
|
||||
formatted_prompt = safe_format(system_prompt, variables)
|
||||
else:
|
||||
formatted_prompt = system_prompt.replace(memory_variable_string, full_memory_string)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
|
||||
|
||||
else:
|
||||
# TODO support for mustache and jinja2
|
||||
raise NotImplementedError(template_format)
|
||||
|
||||
return formatted_prompt
|
||||
|
||||
|
||||
@trace_method
|
||||
def initialize_message_sequence(
|
||||
agent_state: AgentState,
|
||||
@@ -601,7 +400,7 @@ async def initialize_message_sequence_async(
|
||||
if memory_edit_timestamp is None:
|
||||
memory_edit_timestamp = get_local_time()
|
||||
|
||||
full_system_message = await compile_system_message_async(
|
||||
full_system_message = await PromptGenerator.compile_system_message_async(
|
||||
system_prompt=agent_state.system,
|
||||
in_context_memory=agent_state.memory,
|
||||
in_context_memory_last_edit=memory_edit_timestamp,
|
||||
|
||||
Reference in New Issue
Block a user