feat: filter out policy system messages for proxy (#7205)

2025-12-16 13:42:51 -08:00
parent 8729a037b9
commit 6de4f0f437
3 changed files with 40 additions and 6 deletions
--- a/letta/server/rest_api/proxy_helpers.py
+++ b/letta/server/rest_api/proxy_helpers.py
@@ -16,6 +16,33 @@ from letta.settings import model_settings
 logger = get_logger(__name__)


+def strip_policy_specs(text: str) -> str:
+    """
+    Remove Claude policy injection blocks from message text.
+
+    Claude injects policy instructions in two forms:
+    1. Appended with prefix: 'user: <policy_spec>...'
+    2. As entire message: '<policy_spec>...'
+
+    We truncate everything from the policy start marker onwards since it's all injected policy content.
+    """
+    # Check if entire message is a policy spec (starts with tag)
+    if text.startswith("<policy_spec>"):
+        logger.info("[Proxy Helpers] Stripped policy injection (entire message)")
+        return ""
+
+    # Check if policy spec is appended (with prefix)
+    policy_start = text.find("user: <policy_spec>")
+    if policy_start != -1:
+        logger.info(f"[Proxy Helpers] Stripped policy injection from position {policy_start}")
+        # Truncate everything from this point onwards
+        cleaned = text[:policy_start].strip()
+        return cleaned
+
+    # No policy injection found, return original text
+    return text
+
+
 def extract_user_messages(body: bytes) -> list[str]:
    """Extract user messages from request body."""
    messages = []
@@ -28,12 +55,19 @@ def extract_user_messages(body: bytes) -> list[str]:
            if msg.get("role") == "user":
                content = msg.get("content", "")
                if isinstance(content, str):
-                    user_messages.append(content)
+                    # Strip policy specs before adding
+                    cleaned = strip_policy_specs(content)
+                    if cleaned:  # Only add if not empty after stripping
+                        user_messages.append(cleaned)
                elif isinstance(content, list):
                    for block in content:
                        if isinstance(block, dict):
                            if block.get("type") == "text":
-                                user_messages.append(block.get("text", ""))
+                                text = block.get("text", "")
+                                # Strip policy specs from text blocks
+                                cleaned = strip_policy_specs(text)
+                                if cleaned:  # Only add if not empty after stripping
+                                    user_messages.append(cleaned)
                            elif block.get("type") == "image":
                                user_messages.append("[IMAGE]")

--- a/letta/server/rest_api/routers/v1/anthropic.py
+++ b/letta/server/rest_api/routers/v1/anthropic.py
@@ -62,8 +62,8 @@ async def anthropic_messages_proxy(
    # Claude Code sends full conversation history, but we only want to persist the new message
    user_messages = [all_user_messages[-1]] if all_user_messages else []

-    # Filter out system/metadata requests
-    user_messages = [s for s in user_messages if not s.startswith("<system-reminder>")]
+    # Filter out system/metadata requests and policy specs
+    user_messages = [s for s in user_messages if not s.startswith("<system-reminder>") and not s.startswith("<policy_spec>")]
    if not user_messages:
        logger.debug(f"[{PROXY_NAME}] Skipping capture/memory for this turn")

--- a/letta/server/rest_api/routers/v1/zai.py
+++ b/letta/server/rest_api/routers/v1/zai.py
@@ -62,8 +62,8 @@ async def zai_messages_proxy(
    # Claude Code sends full conversation history, but we only want to persist the new message
    user_messages = [all_user_messages[-1]] if all_user_messages else []

-    # Filter out system/metadata requests
-    user_messages = [s for s in user_messages if not s.startswith("<system-reminder>")]
+    # Filter out system/metadata requests and policy specs
+    user_messages = [s for s in user_messages if not s.startswith("<system-reminder>") and not s.startswith("<policy_spec>")]
    if not user_messages:
        logger.debug(f"[{PROXY_NAME}] Skipping capture/memory for this turn")