perf: optimize prefix caching by skipping system prompt rebuild on every step (#9080)
This commit is contained in:
committed by
Caren Thomas
parent
825019c2ce
commit
9dbe28e8f1
@@ -6717,6 +6717,151 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/agents/{agent_id}/recompile": {
|
||||||
|
"post": {
|
||||||
|
"tags": ["agents"],
|
||||||
|
"summary": "Recompile Agent",
|
||||||
|
"description": "Manually trigger system prompt recompilation for an agent.",
|
||||||
|
"operationId": "recompile_agent",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "agent_id",
|
||||||
|
"in": "path",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 42,
|
||||||
|
"maxLength": 42,
|
||||||
|
"pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
|
||||||
|
"description": "The ID of the agent in the format 'agent-<uuid4>'",
|
||||||
|
"examples": ["agent-123e4567-e89b-42d3-8456-426614174000"],
|
||||||
|
"title": "Agent Id"
|
||||||
|
},
|
||||||
|
"description": "The ID of the agent in the format 'agent-<uuid4>'"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "update_timestamp",
|
||||||
|
"in": "query",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "If True, update the in-context memory last edit timestamp embedded in the system prompt.",
|
||||||
|
"default": false,
|
||||||
|
"title": "Update Timestamp"
|
||||||
|
},
|
||||||
|
"description": "If True, update the in-context memory last edit timestamp embedded in the system prompt."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dry_run",
|
||||||
|
"in": "query",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "If True, do not persist changes; still returns the compiled system prompt.",
|
||||||
|
"default": false,
|
||||||
|
"title": "Dry Run"
|
||||||
|
},
|
||||||
|
"description": "If True, do not persist changes; still returns the compiled system prompt."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Successful Response",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"title": "Response Recompile Agent"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"422": {
|
||||||
|
"description": "Validation Error",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/HTTPValidationError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/agents/{agent_id}/system-prompt/recompile": {
|
||||||
|
"post": {
|
||||||
|
"tags": ["agents"],
|
||||||
|
"summary": "Recompile Agent System Prompt",
|
||||||
|
"description": "Deprecated alias for POST /v1/agents/{agent_id}/recompile.",
|
||||||
|
"operationId": "recompile_agent_system_prompt",
|
||||||
|
"deprecated": true,
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "agent_id",
|
||||||
|
"in": "path",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"minLength": 42,
|
||||||
|
"maxLength": 42,
|
||||||
|
"pattern": "^agent-[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
|
||||||
|
"description": "The ID of the agent in the format 'agent-<uuid4>'",
|
||||||
|
"examples": ["agent-123e4567-e89b-42d3-8456-426614174000"],
|
||||||
|
"title": "Agent Id"
|
||||||
|
},
|
||||||
|
"description": "The ID of the agent in the format 'agent-<uuid4>'"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "update_timestamp",
|
||||||
|
"in": "query",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "If True, update the in-context memory last edit timestamp embedded in the system prompt.",
|
||||||
|
"default": false,
|
||||||
|
"title": "Update Timestamp"
|
||||||
|
},
|
||||||
|
"description": "If True, update the in-context memory last edit timestamp embedded in the system prompt."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dry_run",
|
||||||
|
"in": "query",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "boolean",
|
||||||
|
"description": "If True, do not persist changes; still returns the compiled system prompt.",
|
||||||
|
"default": false,
|
||||||
|
"title": "Dry Run"
|
||||||
|
},
|
||||||
|
"description": "If True, do not persist changes; still returns the compiled system prompt."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Successful Response",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"type": "string",
|
||||||
|
"title": "Response Recompile Agent System Prompt"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"422": {
|
||||||
|
"description": "Validation Error",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/HTTPValidationError"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/agents/{agent_id}/core-memory/blocks/attach/{block_id}": {
|
"/v1/agents/{agent_id}/core-memory/blocks/attach/{block_id}": {
|
||||||
"patch": {
|
"patch": {
|
||||||
"tags": ["agents"],
|
"tags": ["agents"],
|
||||||
|
|||||||
@@ -687,20 +687,38 @@ class LettaAgentV2(BaseAgentV2):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
@trace_method
|
@trace_method
|
||||||
async def _refresh_messages(self, in_context_messages: list[Message]):
|
async def _refresh_messages(self, in_context_messages: list[Message], force_system_prompt_refresh: bool = False):
|
||||||
num_messages = await self.message_manager.size_async(
|
"""Refresh in-context messages.
|
||||||
agent_id=self.agent_state.id,
|
|
||||||
actor=self.actor,
|
This performs two tasks:
|
||||||
)
|
1) Rebuild the *system prompt* only if the memory/tool-rules/directories section has changed.
|
||||||
num_archival_memories = await self.passage_manager.agent_passage_size_async(
|
This avoids rebuilding the system prompt on every step due to dynamic metadata (e.g. message counts),
|
||||||
agent_id=self.agent_state.id,
|
which can bust prefix caching.
|
||||||
actor=self.actor,
|
2) Scrub inner thoughts from messages.
|
||||||
)
|
|
||||||
in_context_messages = await self._rebuild_memory(
|
Args:
|
||||||
in_context_messages,
|
in_context_messages: Current in-context messages
|
||||||
num_messages=num_messages,
|
force_system_prompt_refresh: If True, forces evaluation of whether the system prompt needs to be rebuilt.
|
||||||
num_archival_memories=num_archival_memories,
|
(The rebuild will still be skipped if memory/tool-rules/directories haven't changed.)
|
||||||
)
|
|
||||||
|
Returns:
|
||||||
|
Refreshed in-context messages.
|
||||||
|
"""
|
||||||
|
# Always attempt to rebuild the system prompt if the memory section changed.
|
||||||
|
# This method is careful to skip rebuilds when the memory section is unchanged.
|
||||||
|
try:
|
||||||
|
in_context_messages = await self._rebuild_memory(
|
||||||
|
in_context_messages,
|
||||||
|
num_messages=None,
|
||||||
|
num_archival_memories=None,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
# If callers requested a forced refresh, surface the error.
|
||||||
|
if force_system_prompt_refresh:
|
||||||
|
raise
|
||||||
|
self.logger.warning(f"Failed to refresh system prompt/memory: {e}")
|
||||||
|
|
||||||
|
# Always scrub inner thoughts regardless of system prompt refresh
|
||||||
in_context_messages = scrub_inner_thoughts_from_messages(in_context_messages, self.agent_state.llm_config)
|
in_context_messages = scrub_inner_thoughts_from_messages(in_context_messages, self.agent_state.llm_config)
|
||||||
return in_context_messages
|
return in_context_messages
|
||||||
|
|
||||||
@@ -708,8 +726,8 @@ class LettaAgentV2(BaseAgentV2):
|
|||||||
async def _rebuild_memory(
|
async def _rebuild_memory(
|
||||||
self,
|
self,
|
||||||
in_context_messages: list[Message],
|
in_context_messages: list[Message],
|
||||||
num_messages: int,
|
num_messages: int | None,
|
||||||
num_archival_memories: int,
|
num_archival_memories: int | None,
|
||||||
):
|
):
|
||||||
agent_state = await self.agent_manager.refresh_memory_async(agent_state=self.agent_state, actor=self.actor)
|
agent_state = await self.agent_manager.refresh_memory_async(agent_state=self.agent_state, actor=self.actor)
|
||||||
|
|
||||||
@@ -769,10 +787,14 @@ class LettaAgentV2(BaseAgentV2):
|
|||||||
)
|
)
|
||||||
new_memory_section = extract_memory_section(curr_memory_str)
|
new_memory_section = extract_memory_section(curr_memory_str)
|
||||||
|
|
||||||
# compare just the memory sections (memory blocks, tool rules, directories)
|
# Compare just the memory sections (memory blocks, tool rules, directories).
|
||||||
if curr_memory_section.strip() == new_memory_section.strip():
|
# Also ensure the configured system prompt is still present; if the system prompt
|
||||||
|
# changed (e.g. via UpdateAgent(system=...)), we must rebuild.
|
||||||
|
system_prompt_changed = agent_state.system not in curr_system_message_text
|
||||||
|
|
||||||
|
if (not system_prompt_changed) and (curr_memory_section.strip() == new_memory_section.strip()):
|
||||||
self.logger.debug(
|
self.logger.debug(
|
||||||
f"Memory and sources haven't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
|
f"Memory, sources, and system prompt haven't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
|
||||||
)
|
)
|
||||||
return in_context_messages
|
return in_context_messages
|
||||||
|
|
||||||
|
|||||||
@@ -733,13 +733,11 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
self.logger.info("switching to unconstrained mode (allowing non-tool responses)")
|
self.logger.info("switching to unconstrained mode (allowing non-tool responses)")
|
||||||
self._require_tool_call = require_tool_call
|
self._require_tool_call = require_tool_call
|
||||||
|
|
||||||
# Always refresh messages at the start of each step to pick up external inputs
|
# Refresh messages at the start of each step to scrub inner thoughts.
|
||||||
# (e.g., approval responses submitted by the client while this stream is running)
|
# NOTE: We skip system prompt refresh during normal steps to preserve prefix caching.
|
||||||
|
# The system prompt is only rebuilt after compaction or message reset.
|
||||||
try:
|
try:
|
||||||
# TODO: cleanup and de-dup
|
messages = await self._refresh_messages(messages, force_system_prompt_refresh=False)
|
||||||
# updates the system prompt with the latest blocks / message histories
|
|
||||||
messages = await self._refresh_messages(messages)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"Failed to refresh messages at step start: {e}")
|
self.logger.warning(f"Failed to refresh messages at step start: {e}")
|
||||||
|
|
||||||
@@ -924,6 +922,8 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
context_tokens_before=context_tokens_before,
|
context_tokens_before=context_tokens_before,
|
||||||
messages_count_before=messages_count_before,
|
messages_count_before=messages_count_before,
|
||||||
)
|
)
|
||||||
|
# Force system prompt rebuild after compaction to update memory blocks and timestamps
|
||||||
|
messages = await self._refresh_messages(messages, force_system_prompt_refresh=True)
|
||||||
self.logger.info("Summarization succeeded, continuing to retry LLM request")
|
self.logger.info("Summarization succeeded, continuing to retry LLM request")
|
||||||
|
|
||||||
# Persist the summary message
|
# Persist the summary message
|
||||||
@@ -1081,6 +1081,10 @@ class LettaAgentV3(LettaAgentV2):
|
|||||||
context_tokens_before=context_tokens_before,
|
context_tokens_before=context_tokens_before,
|
||||||
messages_count_before=messages_count_before,
|
messages_count_before=messages_count_before,
|
||||||
)
|
)
|
||||||
|
# Force system prompt rebuild after compaction to update memory blocks and timestamps
|
||||||
|
messages = await self._refresh_messages(messages, force_system_prompt_refresh=True)
|
||||||
|
# TODO: persist + return the summary message
|
||||||
|
# TODO: convert this to a SummaryMessage
|
||||||
self.response_messages.append(summary_message)
|
self.response_messages.append(summary_message)
|
||||||
|
|
||||||
# Yield summary result message to client
|
# Yield summary result message to client
|
||||||
|
|||||||
@@ -242,7 +242,7 @@ async def archival_memory_search(
|
|||||||
raise NotImplementedError("This should never be invoked directly. Contact Letta if you see this error message.")
|
raise NotImplementedError("This should never be invoked directly. Contact Letta if you see this error message.")
|
||||||
|
|
||||||
|
|
||||||
def core_memory_append(agent_state: "AgentState", label: str, content: str) -> Optional[str]: # type: ignore
|
def core_memory_append(agent_state: "AgentState", label: str, content: str) -> str: # type: ignore
|
||||||
"""
|
"""
|
||||||
Append to the contents of core memory.
|
Append to the contents of core memory.
|
||||||
|
|
||||||
@@ -251,15 +251,15 @@ def core_memory_append(agent_state: "AgentState", label: str, content: str) -> O
|
|||||||
content (str): Content to write to the memory. All unicode (including emojis) are supported.
|
content (str): Content to write to the memory. All unicode (including emojis) are supported.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Optional[str]: None is always returned as this function does not produce a response.
|
str: The updated value of the memory block.
|
||||||
"""
|
"""
|
||||||
current_value = str(agent_state.memory.get_block(label).value)
|
current_value = str(agent_state.memory.get_block(label).value)
|
||||||
new_value = current_value + "\n" + str(content)
|
new_value = current_value + "\n" + str(content)
|
||||||
agent_state.memory.update_block_value(label=label, value=new_value)
|
agent_state.memory.update_block_value(label=label, value=new_value)
|
||||||
return None
|
return new_value
|
||||||
|
|
||||||
|
|
||||||
def core_memory_replace(agent_state: "AgentState", label: str, old_content: str, new_content: str) -> Optional[str]: # type: ignore
|
def core_memory_replace(agent_state: "AgentState", label: str, old_content: str, new_content: str) -> str: # type: ignore
|
||||||
"""
|
"""
|
||||||
Replace the contents of core memory. To delete memories, use an empty string for new_content.
|
Replace the contents of core memory. To delete memories, use an empty string for new_content.
|
||||||
|
|
||||||
@@ -269,14 +269,14 @@ def core_memory_replace(agent_state: "AgentState", label: str, old_content: str,
|
|||||||
new_content (str): Content to write to the memory. All unicode (including emojis) are supported.
|
new_content (str): Content to write to the memory. All unicode (including emojis) are supported.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Optional[str]: None is always returned as this function does not produce a response.
|
str: The updated value of the memory block.
|
||||||
"""
|
"""
|
||||||
current_value = str(agent_state.memory.get_block(label).value)
|
current_value = str(agent_state.memory.get_block(label).value)
|
||||||
if old_content not in current_value:
|
if old_content not in current_value:
|
||||||
raise ValueError(f"Old content '{old_content}' not found in memory block '{label}'")
|
raise ValueError(f"Old content '{old_content}' not found in memory block '{label}'")
|
||||||
new_value = current_value.replace(str(old_content), str(new_content))
|
new_value = current_value.replace(str(old_content), str(new_content))
|
||||||
agent_state.memory.update_block_value(label=label, value=new_value)
|
agent_state.memory.update_block_value(label=label, value=new_value)
|
||||||
return None
|
return new_value
|
||||||
|
|
||||||
|
|
||||||
def rethink_memory(agent_state: "AgentState", new_memory: str, target_block_label: str) -> None:
|
def rethink_memory(agent_state: "AgentState", new_memory: str, target_block_label: str) -> None:
|
||||||
@@ -337,7 +337,7 @@ def memory_replace(agent_state: "AgentState", label: str, old_str: str, new_str:
|
|||||||
memory_replace(label="human", old_str="Their name is Alice", new_str="Their name is Bob")
|
memory_replace(label="human", old_str="Their name is Alice", new_str="Their name is Bob")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: The success message
|
str: The updated value of the memory block.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -382,19 +382,10 @@ def memory_replace(agent_state: "AgentState", label: str, old_str: str, new_str:
|
|||||||
# end_line = replacement_line + SNIPPET_LINES + new_str.count("\n")
|
# end_line = replacement_line + SNIPPET_LINES + new_str.count("\n")
|
||||||
# snippet = "\n".join(new_value.split("\n")[start_line : end_line + 1])
|
# snippet = "\n".join(new_value.split("\n")[start_line : end_line + 1])
|
||||||
|
|
||||||
# Prepare the success message
|
return new_value
|
||||||
success_msg = (
|
|
||||||
f"The core memory block with label `{label}` has been successfully edited. "
|
|
||||||
f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. "
|
|
||||||
f"Review the changes and make sure they are as expected (correct indentation, "
|
|
||||||
f"no duplicate lines, etc)."
|
|
||||||
)
|
|
||||||
|
|
||||||
# return None
|
|
||||||
return success_msg
|
|
||||||
|
|
||||||
|
|
||||||
def memory_insert(agent_state: "AgentState", label: str, new_str: str, insert_line: int = -1) -> Optional[str]: # type: ignore
|
def memory_insert(agent_state: "AgentState", label: str, new_str: str, insert_line: int = -1) -> str: # type: ignore
|
||||||
"""
|
"""
|
||||||
The memory_insert command allows you to insert text at a specific location in a memory block.
|
The memory_insert command allows you to insert text at a specific location in a memory block.
|
||||||
|
|
||||||
@@ -453,15 +444,7 @@ def memory_insert(agent_state: "AgentState", label: str, new_str: str, insert_li
|
|||||||
# Write into the block
|
# Write into the block
|
||||||
agent_state.memory.update_block_value(label=label, value=new_value)
|
agent_state.memory.update_block_value(label=label, value=new_value)
|
||||||
|
|
||||||
# Prepare the success message
|
return new_value
|
||||||
success_msg = (
|
|
||||||
f"The core memory block with label `{label}` has been successfully edited. "
|
|
||||||
f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. "
|
|
||||||
f"Review the changes and make sure they are as expected (correct indentation, "
|
|
||||||
f"no duplicate lines, etc)."
|
|
||||||
)
|
|
||||||
|
|
||||||
return success_msg
|
|
||||||
|
|
||||||
|
|
||||||
def memory_apply_patch(agent_state: "AgentState", label: str, patch: str) -> str: # type: ignore
|
def memory_apply_patch(agent_state: "AgentState", label: str, patch: str) -> str: # type: ignore
|
||||||
@@ -499,7 +482,7 @@ def memory_apply_patch(agent_state: "AgentState", label: str, patch: str) -> str
|
|||||||
raise NotImplementedError("This should never be invoked directly. Contact Letta if you see this error message.")
|
raise NotImplementedError("This should never be invoked directly. Contact Letta if you see this error message.")
|
||||||
|
|
||||||
|
|
||||||
def memory_rethink(agent_state: "AgentState", label: str, new_memory: str) -> None:
|
def memory_rethink(agent_state: "AgentState", label: str, new_memory: str) -> str:
|
||||||
"""
|
"""
|
||||||
The memory_rethink command allows you to completely rewrite the contents of a memory block. Use this tool to make large sweeping changes (e.g. when you want to condense or reorganize the memory blocks), do NOT use this tool to make small precise edits (e.g. add or remove a line, replace a specific string, etc).
|
The memory_rethink command allows you to completely rewrite the contents of a memory block. Use this tool to make large sweeping changes (e.g. when you want to condense or reorganize the memory blocks), do NOT use this tool to make small precise edits (e.g. add or remove a line, replace a specific string, etc).
|
||||||
|
|
||||||
@@ -528,17 +511,7 @@ def memory_rethink(agent_state: "AgentState", label: str, new_memory: str) -> No
|
|||||||
agent_state.memory.set_block(new_block)
|
agent_state.memory.set_block(new_block)
|
||||||
|
|
||||||
agent_state.memory.update_block_value(label=label, value=new_memory)
|
agent_state.memory.update_block_value(label=label, value=new_memory)
|
||||||
|
return new_memory
|
||||||
# Prepare the success message
|
|
||||||
success_msg = (
|
|
||||||
f"The core memory block with label `{label}` has been successfully edited. "
|
|
||||||
f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. "
|
|
||||||
f"Review the changes and make sure they are as expected (correct indentation, "
|
|
||||||
f"no duplicate lines, etc)."
|
|
||||||
)
|
|
||||||
|
|
||||||
# return None
|
|
||||||
return success_msg
|
|
||||||
|
|
||||||
|
|
||||||
def memory_finish_edits(agent_state: "AgentState") -> None: # type: ignore
|
def memory_finish_edits(agent_state: "AgentState") -> None: # type: ignore
|
||||||
|
|||||||
@@ -1263,6 +1263,70 @@ async def modify_block_for_agent(
|
|||||||
return block
|
return block
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/{agent_id}/recompile",
|
||||||
|
response_model=str,
|
||||||
|
operation_id="recompile_agent",
|
||||||
|
)
|
||||||
|
async def recompile_agent(
|
||||||
|
agent_id: AgentId,
|
||||||
|
server: "SyncServer" = Depends(get_letta_server),
|
||||||
|
headers: HeaderParams = Depends(get_headers),
|
||||||
|
update_timestamp: bool = Query(
|
||||||
|
False,
|
||||||
|
description="If True, update the in-context memory last edit timestamp embedded in the system prompt.",
|
||||||
|
),
|
||||||
|
dry_run: bool = Query(
|
||||||
|
False,
|
||||||
|
description="If True, do not persist changes; still returns the compiled system prompt.",
|
||||||
|
),
|
||||||
|
):
|
||||||
|
"""Manually trigger system prompt recompilation for an agent."""
|
||||||
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=headers.actor_id)
|
||||||
|
|
||||||
|
_, system_message, _, _ = await server.agent_manager.rebuild_system_prompt_async(
|
||||||
|
agent_id=agent_id,
|
||||||
|
actor=actor,
|
||||||
|
force=True,
|
||||||
|
update_timestamp=update_timestamp,
|
||||||
|
dry_run=dry_run,
|
||||||
|
)
|
||||||
|
|
||||||
|
if system_message is None:
|
||||||
|
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"No system message found for agent '{agent_id}'")
|
||||||
|
|
||||||
|
return system_message.to_openai_dict().get("content", "")
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/{agent_id}/system-prompt/recompile",
|
||||||
|
response_model=str,
|
||||||
|
operation_id="recompile_agent_system_prompt",
|
||||||
|
deprecated=True,
|
||||||
|
)
|
||||||
|
async def recompile_agent_system_prompt(
|
||||||
|
agent_id: AgentId,
|
||||||
|
server: "SyncServer" = Depends(get_letta_server),
|
||||||
|
headers: HeaderParams = Depends(get_headers),
|
||||||
|
update_timestamp: bool = Query(
|
||||||
|
False,
|
||||||
|
description="If True, update the in-context memory last edit timestamp embedded in the system prompt.",
|
||||||
|
),
|
||||||
|
dry_run: bool = Query(
|
||||||
|
False,
|
||||||
|
description="If True, do not persist changes; still returns the compiled system prompt.",
|
||||||
|
),
|
||||||
|
):
|
||||||
|
"""Deprecated alias for POST /v1/agents/{agent_id}/recompile."""
|
||||||
|
return await recompile_agent(
|
||||||
|
agent_id=agent_id,
|
||||||
|
server=server,
|
||||||
|
headers=headers,
|
||||||
|
update_timestamp=update_timestamp,
|
||||||
|
dry_run=dry_run,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.patch("/{agent_id}/core-memory/blocks/attach/{block_id}", response_model=AgentState, operation_id="attach_core_memory_block")
|
@router.patch("/{agent_id}/core-memory/blocks/attach/{block_id}", response_model=AgentState, operation_id="attach_core_memory_block")
|
||||||
async def attach_block_to_agent(
|
async def attach_block_to_agent(
|
||||||
block_id: BlockId,
|
block_id: BlockId,
|
||||||
|
|||||||
@@ -1567,21 +1567,30 @@ class AgentManager:
|
|||||||
@enforce_types
|
@enforce_types
|
||||||
@trace_method
|
@trace_method
|
||||||
async def reset_messages_async(
|
async def reset_messages_async(
|
||||||
self, agent_id: str, actor: PydanticUser, add_default_initial_messages: bool = False, needs_agent_state: bool = True
|
self,
|
||||||
|
agent_id: str,
|
||||||
|
actor: PydanticUser,
|
||||||
|
add_default_initial_messages: bool = False,
|
||||||
|
needs_agent_state: bool = True,
|
||||||
|
rebuild_system_prompt: bool = False,
|
||||||
) -> Optional[PydanticAgentState]:
|
) -> Optional[PydanticAgentState]:
|
||||||
"""
|
"""
|
||||||
Clears all in-context messages for the specified agent except the original system message by:
|
Clears all in-context messages for the specified agent except the original system message by:
|
||||||
1) Preserving the first message ID (original system message).
|
1) Preserving the first message ID (original system message).
|
||||||
2) Updating the agent's message_ids to only contain the system message.
|
2) Updating the agent's message_ids to only contain the system message.
|
||||||
3) Optionally adding default initial messages after the system message.
|
3) Optionally rebuilding the system prompt with current memory blocks (for prefix caching optimization).
|
||||||
|
4) Optionally adding default initial messages after the system message.
|
||||||
|
|
||||||
Note: This only clears messages from the agent's context, it does not delete them from the database.
|
Note: This only clears messages from the agent's context, it does not delete them from the database.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
add_default_initial_messages: If true, adds the default initial messages after resetting.
|
|
||||||
agent_id (str): The ID of the agent whose messages will be reset.
|
agent_id (str): The ID of the agent whose messages will be reset.
|
||||||
actor (PydanticUser): The user performing this action.
|
actor (PydanticUser): The user performing this action.
|
||||||
|
add_default_initial_messages: If true, adds the default initial messages after resetting.
|
||||||
needs_agent_state: If True, returns the updated agent state. If False, returns None (for performance optimization)
|
needs_agent_state: If True, returns the updated agent state. If False, returns None (for performance optimization)
|
||||||
|
rebuild_system_prompt: If True, rebuilds the system prompt with current memory blocks.
|
||||||
|
This ensures the system prompt reflects the latest memory state after reset.
|
||||||
|
Defaults to False to preserve the original system message content.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Optional[PydanticAgentState]: The updated agent state with only the original system message preserved, or None if needs_agent_state=False.
|
Optional[PydanticAgentState]: The updated agent state with only the original system message preserved, or None if needs_agent_state=False.
|
||||||
@@ -1601,12 +1610,17 @@ class AgentManager:
|
|||||||
agent.message_ids = [system_message_id]
|
agent.message_ids = [system_message_id]
|
||||||
await agent.update_async(db_session=session, actor=actor)
|
await agent.update_async(db_session=session, actor=actor)
|
||||||
|
|
||||||
# Only convert to pydantic if we need to return it or add initial messages
|
# Only convert to pydantic if we need to return it or add initial messages or rebuild system prompt
|
||||||
if add_default_initial_messages or needs_agent_state:
|
if add_default_initial_messages or needs_agent_state or rebuild_system_prompt:
|
||||||
agent_state = await agent.to_pydantic_async(include_relationships=["sources"] if add_default_initial_messages else None)
|
include_rels = ["sources", "memory"] if (add_default_initial_messages or rebuild_system_prompt) else None
|
||||||
|
agent_state = await agent.to_pydantic_async(include_relationships=include_rels)
|
||||||
else:
|
else:
|
||||||
agent_state = None
|
agent_state = None
|
||||||
|
|
||||||
|
# Optionally rebuild the system prompt with current memory blocks
|
||||||
|
if rebuild_system_prompt and agent_state:
|
||||||
|
agent_state, _, _, _ = await self.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True)
|
||||||
|
|
||||||
# Optionally add default initial messages after the system message
|
# Optionally add default initial messages after the system message
|
||||||
if add_default_initial_messages:
|
if add_default_initial_messages:
|
||||||
init_messages = await initialize_message_sequence_async(
|
init_messages = await initialize_message_sequence_async(
|
||||||
|
|||||||
@@ -318,14 +318,14 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|||||||
await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True)
|
await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def core_memory_append(self, agent_state: AgentState, actor: User, label: str, content: str) -> Optional[str]:
|
async def core_memory_append(self, agent_state: AgentState, actor: User, label: str, content: str) -> str:
|
||||||
if agent_state.memory.get_block(label).read_only:
|
if agent_state.memory.get_block(label).read_only:
|
||||||
raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
|
raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
|
||||||
current_value = str(agent_state.memory.get_block(label).value)
|
current_value = str(agent_state.memory.get_block(label).value)
|
||||||
new_value = current_value + "\n" + str(content)
|
new_value = current_value + "\n" + str(content)
|
||||||
agent_state.memory.update_block_value(label=label, value=new_value)
|
agent_state.memory.update_block_value(label=label, value=new_value)
|
||||||
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
||||||
return None
|
return new_value
|
||||||
|
|
||||||
async def core_memory_replace(
|
async def core_memory_replace(
|
||||||
self,
|
self,
|
||||||
@@ -334,7 +334,7 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|||||||
label: str,
|
label: str,
|
||||||
old_content: str,
|
old_content: str,
|
||||||
new_content: str,
|
new_content: str,
|
||||||
) -> Optional[str]:
|
) -> str:
|
||||||
if agent_state.memory.get_block(label).read_only:
|
if agent_state.memory.get_block(label).read_only:
|
||||||
raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
|
raise ValueError(f"{READ_ONLY_BLOCK_EDIT_ERROR}")
|
||||||
current_value = str(agent_state.memory.get_block(label).value)
|
current_value = str(agent_state.memory.get_block(label).value)
|
||||||
@@ -343,7 +343,7 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|||||||
new_value = current_value.replace(str(old_content), str(new_content))
|
new_value = current_value.replace(str(old_content), str(new_content))
|
||||||
agent_state.memory.update_block_value(label=label, value=new_value)
|
agent_state.memory.update_block_value(label=label, value=new_value)
|
||||||
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
||||||
return None
|
return new_value
|
||||||
|
|
||||||
async def memory_replace(self, agent_state: AgentState, actor: User, label: str, old_str: str, new_str: str) -> str:
|
async def memory_replace(self, agent_state: AgentState, actor: User, label: str, old_str: str, new_str: str) -> str:
|
||||||
if agent_state.memory.get_block(label).read_only:
|
if agent_state.memory.get_block(label).read_only:
|
||||||
@@ -393,23 +393,7 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|||||||
|
|
||||||
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
||||||
|
|
||||||
# Create a snippet of the edited section
|
return new_value
|
||||||
SNIPPET_LINES = 3
|
|
||||||
replacement_line = current_value.split(old_str)[0].count("\n")
|
|
||||||
start_line = max(0, replacement_line - SNIPPET_LINES)
|
|
||||||
end_line = replacement_line + SNIPPET_LINES + new_str.count("\n")
|
|
||||||
snippet = "\n".join(new_value.split("\n")[start_line : end_line + 1])
|
|
||||||
|
|
||||||
# Prepare the success message
|
|
||||||
success_msg = (
|
|
||||||
f"The core memory block with label `{label}` has been successfully edited. "
|
|
||||||
f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. "
|
|
||||||
f"Review the changes and make sure they are as expected (correct indentation, "
|
|
||||||
f"no duplicate lines, etc)."
|
|
||||||
)
|
|
||||||
|
|
||||||
# return None
|
|
||||||
return success_msg
|
|
||||||
|
|
||||||
async def memory_apply_patch(self, agent_state: AgentState, actor: User, label: str, patch: str) -> str:
|
async def memory_apply_patch(self, agent_state: AgentState, actor: User, label: str, patch: str) -> str:
|
||||||
"""Apply a simplified unified-diff style patch to one or more memory blocks.
|
"""Apply a simplified unified-diff style patch to one or more memory blocks.
|
||||||
@@ -545,11 +529,7 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|||||||
agent_state.memory.update_block_value(label=label, value=new_value)
|
agent_state.memory.update_block_value(label=label, value=new_value)
|
||||||
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
||||||
|
|
||||||
return (
|
return new_value
|
||||||
f"The core memory block with label `{label}` has been successfully edited. "
|
|
||||||
f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. "
|
|
||||||
f"Review the changes and make sure they are as expected (correct indentation, no duplicate lines, etc)."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extended mode: parse codex-like patch operations for memory blocks
|
# Extended mode: parse codex-like patch operations for memory blocks
|
||||||
lines = patch.splitlines()
|
lines = patch.splitlines()
|
||||||
@@ -753,15 +733,7 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|||||||
|
|
||||||
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
||||||
|
|
||||||
# Prepare the success message
|
return new_value
|
||||||
success_msg = (
|
|
||||||
f"The core memory block with label `{label}` has been successfully edited. "
|
|
||||||
f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. "
|
|
||||||
f"Review the changes and make sure they are as expected (correct indentation, "
|
|
||||||
f"no duplicate lines, etc)."
|
|
||||||
)
|
|
||||||
|
|
||||||
return success_msg
|
|
||||||
|
|
||||||
async def memory_rethink(self, agent_state: AgentState, actor: User, label: str, new_memory: str) -> str:
|
async def memory_rethink(self, agent_state: AgentState, actor: User, label: str, new_memory: str) -> str:
|
||||||
if agent_state.memory.get_block(label).read_only:
|
if agent_state.memory.get_block(label).read_only:
|
||||||
@@ -793,16 +765,7 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|||||||
|
|
||||||
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
await self.agent_manager.update_memory_if_changed_async(agent_id=agent_state.id, new_memory=agent_state.memory, actor=actor)
|
||||||
|
|
||||||
# Prepare the success message
|
return new_memory
|
||||||
success_msg = (
|
|
||||||
f"The core memory block with label `{label}` has been successfully edited. "
|
|
||||||
f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. "
|
|
||||||
f"Review the changes and make sure they are as expected (correct indentation, "
|
|
||||||
f"no duplicate lines, etc)."
|
|
||||||
)
|
|
||||||
|
|
||||||
# return None
|
|
||||||
return success_msg
|
|
||||||
|
|
||||||
async def memory_finish_edits(self, agent_state: AgentState, actor: User) -> None:
|
async def memory_finish_edits(self, agent_state: AgentState, actor: User) -> None:
|
||||||
return None
|
return None
|
||||||
@@ -965,17 +928,13 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|||||||
|
|
||||||
# Write the new content to the block
|
# Write the new content to the block
|
||||||
await self.block_manager.update_block_async(block_id=memory_block.id, block_update=BlockUpdate(value=new_value), actor=actor)
|
await self.block_manager.update_block_async(block_id=memory_block.id, block_update=BlockUpdate(value=new_value), actor=actor)
|
||||||
|
|
||||||
|
# Keep in-memory AgentState consistent with DB
|
||||||
|
agent_state.memory.update_block_value(label=label, value=new_value)
|
||||||
|
|
||||||
await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True)
|
await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True)
|
||||||
|
|
||||||
# Prepare the success message
|
return new_value
|
||||||
success_msg = (
|
|
||||||
f"The core memory block with label `{label}` has been successfully edited. "
|
|
||||||
f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. "
|
|
||||||
f"Review the changes and make sure they are as expected (correct indentation, "
|
|
||||||
f"no duplicate lines, etc)."
|
|
||||||
)
|
|
||||||
|
|
||||||
return success_msg
|
|
||||||
|
|
||||||
async def memory_str_insert(self, agent_state: AgentState, actor: User, path: str, insert_text: str, insert_line: int = -1) -> str:
|
async def memory_str_insert(self, agent_state: AgentState, actor: User, path: str, insert_text: str, insert_line: int = -1) -> str:
|
||||||
"""Insert text into a memory block at a specific line."""
|
"""Insert text into a memory block at a specific line."""
|
||||||
@@ -1032,17 +991,13 @@ class LettaCoreToolExecutor(ToolExecutor):
|
|||||||
|
|
||||||
# Write into the block
|
# Write into the block
|
||||||
await self.block_manager.update_block_async(block_id=memory_block.id, block_update=BlockUpdate(value=new_value), actor=actor)
|
await self.block_manager.update_block_async(block_id=memory_block.id, block_update=BlockUpdate(value=new_value), actor=actor)
|
||||||
|
|
||||||
|
# Keep in-memory AgentState consistent with DB
|
||||||
|
agent_state.memory.update_block_value(label=label, value=new_value)
|
||||||
|
|
||||||
await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True)
|
await self.agent_manager.rebuild_system_prompt_async(agent_id=agent_state.id, actor=actor, force=True)
|
||||||
|
|
||||||
# Prepare the success message
|
return new_value
|
||||||
success_msg = (
|
|
||||||
f"The core memory block with label `{label}` has been successfully edited. "
|
|
||||||
f"Your system prompt has been recompiled with the updated memory contents and is now active in your context. "
|
|
||||||
f"Review the changes and make sure they are as expected (correct indentation, "
|
|
||||||
f"no duplicate lines, etc)."
|
|
||||||
)
|
|
||||||
|
|
||||||
return success_msg
|
|
||||||
|
|
||||||
async def memory(
|
async def memory(
|
||||||
self,
|
self,
|
||||||
|
|||||||
178
tests/integration_test_system_prompt_prefix_caching.py
Normal file
178
tests/integration_test_system_prompt_prefix_caching.py
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
"""
|
||||||
|
Integration tests for system prompt prefix caching optimization.
|
||||||
|
|
||||||
|
These tests verify that the system prompt is NOT rebuilt on every step,
|
||||||
|
only after compaction or message reset. This helps preserve prefix caching
|
||||||
|
for LLM providers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from letta_client import Letta
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def client(server_url: str) -> Letta:
|
||||||
|
"""Creates and returns a synchronous Letta REST client for testing."""
|
||||||
|
return Letta(base_url=server_url)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function")
|
||||||
|
def agent(client: Letta):
|
||||||
|
"""Create a test agent and clean up after test."""
|
||||||
|
agent_state = client.agents.create(
|
||||||
|
name="test-prefix-cache-agent",
|
||||||
|
include_base_tools=True,
|
||||||
|
model="openai/gpt-4o-mini",
|
||||||
|
embedding="openai/text-embedding-ada-002",
|
||||||
|
)
|
||||||
|
yield agent_state
|
||||||
|
# Cleanup
|
||||||
|
try:
|
||||||
|
client.agents.delete(agent_state.id)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class TestSystemPromptPrefixCaching:
|
||||||
|
"""Test that system prompt stays stable during normal agent execution."""
|
||||||
|
|
||||||
|
def test_system_prompt_stable_after_memory_tool_and_messages(self, client: Letta, agent):
|
||||||
|
"""
|
||||||
|
Test workflow:
|
||||||
|
1. Get initial system prompt and human block value
|
||||||
|
2. Tell agent to update its memory block using the memory tool
|
||||||
|
3. Verify block was modified but system prompt hasn't changed
|
||||||
|
4. Send another message to the agent
|
||||||
|
5. Verify system prompt still hasn't changed
|
||||||
|
6. Manually update a block via API
|
||||||
|
7. Send another message and verify system prompt still hasn't changed
|
||||||
|
(memory block changes are deferred to compaction)
|
||||||
|
"""
|
||||||
|
# Step 1: Get initial context window, system prompt, and human block value
|
||||||
|
initial_context = client.agents.context.retrieve(agent.id)
|
||||||
|
initial_system_prompt = initial_context.system_prompt
|
||||||
|
assert initial_system_prompt, "Initial system prompt should not be empty"
|
||||||
|
|
||||||
|
# Get initial human block value
|
||||||
|
human_block = None
|
||||||
|
for block in agent.memory.blocks:
|
||||||
|
if block.label == "human":
|
||||||
|
human_block = block
|
||||||
|
break
|
||||||
|
assert human_block, "Agent should have a 'human' memory block"
|
||||||
|
initial_block_value = human_block.value
|
||||||
|
|
||||||
|
# Step 2: Tell the agent to update its memory using the memory tool
|
||||||
|
response = client.agents.messages.create(
|
||||||
|
agent_id=agent.id,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Please use the core_memory_append tool to add the following to your 'human' block: 'User likes pizza.'",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert response.messages, "Agent should respond with messages"
|
||||||
|
|
||||||
|
# Step 3: Verify block was modified but system prompt hasn't changed
|
||||||
|
# Check that the block was actually modified
|
||||||
|
updated_block = client.blocks.retrieve(human_block.id)
|
||||||
|
assert updated_block.value != initial_block_value, "Memory block should have been modified by the agent"
|
||||||
|
assert "pizza" in updated_block.value.lower(), "Memory block should contain the new content about pizza"
|
||||||
|
|
||||||
|
# Verify system prompt hasn't changed
|
||||||
|
context_after_memory_update = client.agents.context.retrieve(agent.id)
|
||||||
|
system_prompt_after_memory = context_after_memory_update.system_prompt
|
||||||
|
assert system_prompt_after_memory == initial_system_prompt, (
|
||||||
|
"System prompt should NOT change after agent uses memory tool (deferred to compaction)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 4: Send another message to the agent
|
||||||
|
response2 = client.agents.messages.create(
|
||||||
|
agent_id=agent.id,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What is my favorite food?",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert response2.messages, "Agent should respond with messages"
|
||||||
|
|
||||||
|
# Step 5: Verify system prompt still hasn't changed
|
||||||
|
context_after_second_message = client.agents.context.retrieve(agent.id)
|
||||||
|
system_prompt_after_second = context_after_second_message.system_prompt
|
||||||
|
assert system_prompt_after_second == initial_system_prompt, "System prompt should remain stable after multiple messages"
|
||||||
|
|
||||||
|
# Step 6: Manually update a block via the API
|
||||||
|
# Find the human block
|
||||||
|
human_block = None
|
||||||
|
for block in agent.memory.blocks:
|
||||||
|
if block.label == "human":
|
||||||
|
human_block = block
|
||||||
|
break
|
||||||
|
assert human_block, "Agent should have a 'human' memory block"
|
||||||
|
|
||||||
|
# Update the block directly via API
|
||||||
|
client.blocks.modify(
|
||||||
|
block_id=human_block.id,
|
||||||
|
value=human_block.value + "\nUser also likes sushi.",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 7: Send another message and verify system prompt still hasn't changed
|
||||||
|
response3 = client.agents.messages.create(
|
||||||
|
agent_id=agent.id,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What foods do I like?",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
assert response3.messages, "Agent should respond with messages"
|
||||||
|
|
||||||
|
# Verify system prompt STILL hasn't changed (deferred to compaction/reset)
|
||||||
|
context_after_manual_update = client.agents.context.retrieve(agent.id)
|
||||||
|
system_prompt_after_manual = context_after_manual_update.system_prompt
|
||||||
|
assert system_prompt_after_manual == initial_system_prompt, (
|
||||||
|
"System prompt should NOT change after manual block update (deferred to compaction)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_system_prompt_updates_after_reset(self, client: Letta, agent):
|
||||||
|
"""
|
||||||
|
Test that system prompt IS updated after message reset.
|
||||||
|
1. Get initial system prompt
|
||||||
|
2. Manually update a memory block
|
||||||
|
3. Reset messages
|
||||||
|
4. Verify system prompt HAS changed to include the new memory
|
||||||
|
"""
|
||||||
|
# Step 1: Get initial system prompt
|
||||||
|
initial_context = client.agents.context.retrieve(agent.id)
|
||||||
|
initial_system_prompt = initial_context.system_prompt
|
||||||
|
|
||||||
|
# Step 2: Manually update a block via the API
|
||||||
|
human_block = None
|
||||||
|
for block in agent.memory.blocks:
|
||||||
|
if block.label == "human":
|
||||||
|
human_block = block
|
||||||
|
break
|
||||||
|
assert human_block, "Agent should have a 'human' memory block"
|
||||||
|
|
||||||
|
# Add distinctive text that we can verify in the system prompt
|
||||||
|
new_memory_content = "UNIQUE_TEST_MARKER_12345: User loves ice cream."
|
||||||
|
client.blocks.modify(
|
||||||
|
block_id=human_block.id,
|
||||||
|
value=human_block.value + f"\n{new_memory_content}",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 3: Reset messages (this should trigger system prompt rebuild)
|
||||||
|
client.agents.messages.reset(agent.id)
|
||||||
|
|
||||||
|
# Step 4: Verify system prompt HAS changed and includes the new memory
|
||||||
|
context_after_reset = client.agents.context.retrieve(agent.id)
|
||||||
|
system_prompt_after_reset = context_after_reset.system_prompt
|
||||||
|
|
||||||
|
assert system_prompt_after_reset != initial_system_prompt, "System prompt SHOULD change after message reset"
|
||||||
|
assert "UNIQUE_TEST_MARKER_12345" in system_prompt_after_reset, (
|
||||||
|
"System prompt should include the updated memory block content after reset"
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user