feat: filter out policy system messages for proxy (#7205)

This commit is contained in:
cthomas
2025-12-16 13:42:51 -08:00
committed by Caren Thomas
parent 8729a037b9
commit 6de4f0f437
3 changed files with 40 additions and 6 deletions

View File

@@ -16,6 +16,33 @@ from letta.settings import model_settings
logger = get_logger(__name__)
def strip_policy_specs(text: str) -> str:
"""
Remove Claude policy injection blocks from message text.
Claude injects policy instructions in two forms:
1. Appended with prefix: 'user: <policy_spec>...'
2. As entire message: '<policy_spec>...'
We truncate everything from the policy start marker onwards since it's all injected policy content.
"""
# Check if entire message is a policy spec (starts with tag)
if text.startswith("<policy_spec>"):
logger.info("[Proxy Helpers] Stripped policy injection (entire message)")
return ""
# Check if policy spec is appended (with prefix)
policy_start = text.find("user: <policy_spec>")
if policy_start != -1:
logger.info(f"[Proxy Helpers] Stripped policy injection from position {policy_start}")
# Truncate everything from this point onwards
cleaned = text[:policy_start].strip()
return cleaned
# No policy injection found, return original text
return text
def extract_user_messages(body: bytes) -> list[str]:
"""Extract user messages from request body."""
messages = []
@@ -28,12 +55,19 @@ def extract_user_messages(body: bytes) -> list[str]:
if msg.get("role") == "user":
content = msg.get("content", "")
if isinstance(content, str):
user_messages.append(content)
# Strip policy specs before adding
cleaned = strip_policy_specs(content)
if cleaned: # Only add if not empty after stripping
user_messages.append(cleaned)
elif isinstance(content, list):
for block in content:
if isinstance(block, dict):
if block.get("type") == "text":
user_messages.append(block.get("text", ""))
text = block.get("text", "")
# Strip policy specs from text blocks
cleaned = strip_policy_specs(text)
if cleaned: # Only add if not empty after stripping
user_messages.append(cleaned)
elif block.get("type") == "image":
user_messages.append("[IMAGE]")

View File

@@ -62,8 +62,8 @@ async def anthropic_messages_proxy(
# Claude Code sends full conversation history, but we only want to persist the new message
user_messages = [all_user_messages[-1]] if all_user_messages else []
# Filter out system/metadata requests
user_messages = [s for s in user_messages if not s.startswith("<system-reminder>")]
# Filter out system/metadata requests and policy specs
user_messages = [s for s in user_messages if not s.startswith("<system-reminder>") and not s.startswith("<policy_spec>")]
if not user_messages:
logger.debug(f"[{PROXY_NAME}] Skipping capture/memory for this turn")

View File

@@ -62,8 +62,8 @@ async def zai_messages_proxy(
# Claude Code sends full conversation history, but we only want to persist the new message
user_messages = [all_user_messages[-1]] if all_user_messages else []
# Filter out system/metadata requests
user_messages = [s for s in user_messages if not s.startswith("<system-reminder>")]
# Filter out system/metadata requests and policy specs
user_messages = [s for s in user_messages if not s.startswith("<system-reminder>") and not s.startswith("<policy_spec>")]
if not user_messages:
logger.debug(f"[{PROXY_NAME}] Skipping capture/memory for this turn")