fix: double escape leads to exponential growth in backslash character [LET-6016] (#6087)

2025-11-17 13:11:23 -08:00
parent 19210c01b0
commit ec953d27c0
5 changed files with 73 additions and 31 deletions
--- a/letta/services/tool_executor/core_tool_executor.py
+++ b/letta/services/tool_executor/core_tool_executor.py
@@ -9,6 +9,7 @@ from letta.constants import (
    RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE,
 )
 from letta.helpers.json_helpers import json_dumps
+from letta.helpers.tpuf_client import should_use_tpuf_for_messages
 from letta.log import get_logger
 from letta.schemas.agent import AgentState
 from letta.schemas.block import BlockUpdate
@@ -87,7 +88,7 @@ class LettaCoreToolExecutor(ToolExecutor):
        limit: Optional[int] = None,
        start_date: Optional[str] = None,
        end_date: Optional[str] = None,
-    ) -> Optional[str]:
+    ) -> Optional[dict]:
        try:
            # Parse datetime parameters if provided
            start_datetime = None
@@ -148,10 +149,32 @@ class LettaCoreToolExecutor(ToolExecutor):
                end_date=end_datetime,
            )

-            if len(message_results) == 0:
-                results_str = "No results found."
+            # Filtering of tool messages is only necessary if we aren't using turbopuffer.
+            if should_use_tpuf_for_messages():
+                filtered_results = message_results
+            else:
+                # Filter out tool messages to prevent recursive results and exponential escaping
+                from letta.constants import CONVERSATION_SEARCH_TOOL_NAME
+                from letta.schemas.enums import MessageRole
+
+                filtered_results = []
+                for message, metadata in message_results:
+                    # Skip ALL tool messages - they contain tool execution results
+                    # which can cause recursive nesting and exponential escaping
+                    if message.role == MessageRole.tool:
+                        continue
+
+                    # Also skip assistant messages that call conversation_search
+                    # These can contain the search query which may lead to confusing results
+                    if message.role == MessageRole.assistant and message.tool_calls:
+                        if CONVERSATION_SEARCH_TOOL_NAME in [tool_call.function.name for tool_call in message.tool_calls]:
+                            continue
+
+                    filtered_results.append((message, metadata))
+
+            if len(filtered_results) == 0:
+                return {"message": "No results found.", "results": []}
            else:
-                results_pref = f"Showing {len(message_results)} results:"
                results_formatted = []
                # get current time in UTC, then convert to agent timezone for consistent comparison
                from datetime import timezone
@@ -166,7 +189,7 @@ class LettaCoreToolExecutor(ToolExecutor):
                else:
                    now = now_utc

-                for message, metadata in message_results:
+                for message, metadata in filtered_results:
                    # Format timestamp in agent's timezone if available
                    timestamp = message.created_at
                    time_delta_str = ""
@@ -249,10 +272,11 @@ class LettaCoreToolExecutor(ToolExecutor):

                    results_formatted.append(result_dict)

-                # Don't double-encode - results_formatted already has the parsed content
-                results_str = f"{results_pref} {json_dumps(results_formatted)}"
-
-            return results_str
+                # Return structured dict instead of JSON string to avoid double-encoding
+                return {
+                    "message": f"Showing {len(message_results)} results:",
+                    "results": results_formatted,
+                }

        except Exception as e:
            raise e
--- a/letta/services/tool_executor/tool_execution_manager.py
+++ b/letta/services/tool_executor/tool_execution_manager.py
@@ -1,4 +1,5 @@
 import asyncio
+import json
 import traceback
 from typing import Any, Dict, Optional, Type

@@ -122,9 +123,9 @@ class ToolExecutionManager:
            status = result.status

            # trim result
-            return_str = str(result.func_return)
+            # Convert to string representation, preserving dict structure when within limit
+            return_str = json.dumps(result.func_return) if isinstance(result.func_return, dict) else str(result.func_return)
            if len(return_str) > tool.return_char_limit:
-                # TODO: okay that this become a string?
                result.func_return = FUNCTION_RETURN_VALUE_TRUNCATED(return_str, len(return_str), tool.return_char_limit)
            return result

--- a/letta/system.py
+++ b/letta/system.py
@@ -1,5 +1,5 @@
 import json
-from typing import Optional
+from typing import Any, Optional

 from letta.log import get_logger

@@ -147,11 +147,21 @@ def package_user_message(
    return json_dumps(packaged_message)


-def package_function_response(was_success: bool, response_string: str, timezone: str | None) -> str:
+def package_function_response(was_success: bool, response_string: Any, timezone: str | None) -> str:
+    """Package a function response with status and timestamp.
+
+    Args:
+        was_success: Whether the function execution succeeded
+        response_string: The function response - can be a string or dict. Dicts are NOT pre-encoded to avoid double JSON encoding.
+        timezone: The timezone to use for the timestamp
+
+    Returns:
+        JSON string with status, message, and time
+    """
    formatted_time = get_local_time(timezone=timezone)
    packaged_message = {
        "status": "OK" if was_success else "Failed",
-        "message": response_string,
+        "message": response_string,  # Can be str or dict - json_dumps handles both
        "time": formatted_time,
    }

--- a/letta/utils.py
+++ b/letta/utils.py
@@ -854,11 +854,14 @@ def parse_json(string) -> dict:
        raise e


-def validate_function_response(function_response: Any, return_char_limit: int, strict: bool = False, truncate: bool = True) -> str:
+def validate_function_response(function_response: Any, return_char_limit: int, strict: bool = False, truncate: bool = True) -> Any:
    """Check to make sure that a function used by Letta returned a valid response. Truncates to return_char_limit if necessary.

-    This makes sure that we can coerce the function_response into a string that meets our criteria. We handle some soft coercion.
+    This makes sure that we can coerce the function_response into a string or dict that meets our criteria. We handle some soft coercion.
    If strict is True, we raise a ValueError if function_response is not a string or None.
+
+    Returns:
+        str or dict: Validated response. Dicts are returned as-is to avoid double JSON encoding by package_function_response.
    """
    if isinstance(function_response, str):
        function_response_string = function_response
@@ -870,9 +873,17 @@ def validate_function_response(function_response: Any, return_char_limit: int, s
        raise ValueError(f"Strict mode violation. Function returned type: {type(function_response).__name__}")

    elif isinstance(function_response, dict):
-        # As functions can return arbitrary data, if there's already nesting somewhere in the response, it's difficult
-        # for us to not result in double escapes.
-        function_response_string = json_dumps(function_response)
+        # For dicts, check if truncation is needed
+        if truncate and return_char_limit:
+            # Convert to JSON string to check size
+            json_str = json_dumps(function_response)
+            if len(json_str) > return_char_limit:
+                # If truncation is needed, return truncated string
+                logger.warning(f"function return was over limit ({len(json_str)} > {return_char_limit}) and was truncated")
+                return f"{json_str[:return_char_limit]}... [NOTE: function output was truncated since it exceeded the character limit ({len(json_str)} > {return_char_limit})]"
+        # Otherwise return dict as-is to avoid double JSON encoding
+        # package_function_response will handle the final JSON serialization
+        return function_response
    else:
        logger.debug(f"Function returned type {type(function_response).__name__}. Coercing to string.")
        function_response_string = str(function_response)
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -569,14 +569,12 @@ def test_validate_function_response_none_input():


 def test_validate_function_response_dict_input():
-    """Test that dict inputs are JSON serialized"""
+    """Test that dict inputs are returned as-is (not pre-serialized) to avoid double JSON encoding"""
    test_dict = {"key": "value", "number": 42}
    response = validate_function_response(test_dict, return_char_limit=100)
-    # Response should be valid JSON string
-    import json
-
-    parsed = json.loads(response)
-    assert parsed == test_dict
+    # Response should be the dict itself, not a JSON string
+    assert isinstance(response, dict)
+    assert response == test_dict


 def test_validate_function_response_other_types():
@@ -641,14 +639,12 @@ def test_validate_function_response_exact_limit():


 def test_validate_function_response_complex_dict():
-    """Test with complex nested dictionary"""
+    """Test with complex nested dictionary - should be returned as-is"""
    complex_dict = {"nested": {"key": "value"}, "list": [1, 2, {"inner": "dict"}], "null": None, "bool": True}
    response = validate_function_response(complex_dict, return_char_limit=1000)
-    # Should be valid JSON
-    import json
-
-    parsed = json.loads(response)
-    assert parsed == complex_dict
+    # Should be the dict itself, not a JSON string
+    assert isinstance(response, dict)
+    assert response == complex_dict


 def test_validate_function_response_dict_truncation():