diff --git a/letta/services/tool_executor/core_tool_executor.py b/letta/services/tool_executor/core_tool_executor.py index 6918daaa..e9e1ccf6 100644 --- a/letta/services/tool_executor/core_tool_executor.py +++ b/letta/services/tool_executor/core_tool_executor.py @@ -9,6 +9,7 @@ from letta.constants import ( RETRIEVAL_QUERY_DEFAULT_PAGE_SIZE, ) from letta.helpers.json_helpers import json_dumps +from letta.helpers.tpuf_client import should_use_tpuf_for_messages from letta.log import get_logger from letta.schemas.agent import AgentState from letta.schemas.block import BlockUpdate @@ -87,7 +88,7 @@ class LettaCoreToolExecutor(ToolExecutor): limit: Optional[int] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, - ) -> Optional[str]: + ) -> Optional[dict]: try: # Parse datetime parameters if provided start_datetime = None @@ -148,10 +149,32 @@ class LettaCoreToolExecutor(ToolExecutor): end_date=end_datetime, ) - if len(message_results) == 0: - results_str = "No results found." + # Filtering of tool messages is only necessary if we aren't using turbopuffer. + if should_use_tpuf_for_messages(): + filtered_results = message_results + else: + # Filter out tool messages to prevent recursive results and exponential escaping + from letta.constants import CONVERSATION_SEARCH_TOOL_NAME + from letta.schemas.enums import MessageRole + + filtered_results = [] + for message, metadata in message_results: + # Skip ALL tool messages - they contain tool execution results + # which can cause recursive nesting and exponential escaping + if message.role == MessageRole.tool: + continue + + # Also skip assistant messages that call conversation_search + # These can contain the search query which may lead to confusing results + if message.role == MessageRole.assistant and message.tool_calls: + if CONVERSATION_SEARCH_TOOL_NAME in [tool_call.function.name for tool_call in message.tool_calls]: + continue + + filtered_results.append((message, metadata)) + + if len(filtered_results) == 0: + return {"message": "No results found.", "results": []} else: - results_pref = f"Showing {len(message_results)} results:" results_formatted = [] # get current time in UTC, then convert to agent timezone for consistent comparison from datetime import timezone @@ -166,7 +189,7 @@ class LettaCoreToolExecutor(ToolExecutor): else: now = now_utc - for message, metadata in message_results: + for message, metadata in filtered_results: # Format timestamp in agent's timezone if available timestamp = message.created_at time_delta_str = "" @@ -249,10 +272,11 @@ class LettaCoreToolExecutor(ToolExecutor): results_formatted.append(result_dict) - # Don't double-encode - results_formatted already has the parsed content - results_str = f"{results_pref} {json_dumps(results_formatted)}" - - return results_str + # Return structured dict instead of JSON string to avoid double-encoding + return { + "message": f"Showing {len(message_results)} results:", + "results": results_formatted, + } except Exception as e: raise e diff --git a/letta/services/tool_executor/tool_execution_manager.py b/letta/services/tool_executor/tool_execution_manager.py index 9a190d5d..bffce487 100644 --- a/letta/services/tool_executor/tool_execution_manager.py +++ b/letta/services/tool_executor/tool_execution_manager.py @@ -1,4 +1,5 @@ import asyncio +import json import traceback from typing import Any, Dict, Optional, Type @@ -122,9 +123,9 @@ class ToolExecutionManager: status = result.status # trim result - return_str = str(result.func_return) + # Convert to string representation, preserving dict structure when within limit + return_str = json.dumps(result.func_return) if isinstance(result.func_return, dict) else str(result.func_return) if len(return_str) > tool.return_char_limit: - # TODO: okay that this become a string? result.func_return = FUNCTION_RETURN_VALUE_TRUNCATED(return_str, len(return_str), tool.return_char_limit) return result diff --git a/letta/system.py b/letta/system.py index dfbf5b28..c545fbbb 100644 --- a/letta/system.py +++ b/letta/system.py @@ -1,5 +1,5 @@ import json -from typing import Optional +from typing import Any, Optional from letta.log import get_logger @@ -147,11 +147,21 @@ def package_user_message( return json_dumps(packaged_message) -def package_function_response(was_success: bool, response_string: str, timezone: str | None) -> str: +def package_function_response(was_success: bool, response_string: Any, timezone: str | None) -> str: + """Package a function response with status and timestamp. + + Args: + was_success: Whether the function execution succeeded + response_string: The function response - can be a string or dict. Dicts are NOT pre-encoded to avoid double JSON encoding. + timezone: The timezone to use for the timestamp + + Returns: + JSON string with status, message, and time + """ formatted_time = get_local_time(timezone=timezone) packaged_message = { "status": "OK" if was_success else "Failed", - "message": response_string, + "message": response_string, # Can be str or dict - json_dumps handles both "time": formatted_time, } diff --git a/letta/utils.py b/letta/utils.py index 5f02d429..ae2f6d2c 100644 --- a/letta/utils.py +++ b/letta/utils.py @@ -854,11 +854,14 @@ def parse_json(string) -> dict: raise e -def validate_function_response(function_response: Any, return_char_limit: int, strict: bool = False, truncate: bool = True) -> str: +def validate_function_response(function_response: Any, return_char_limit: int, strict: bool = False, truncate: bool = True) -> Any: """Check to make sure that a function used by Letta returned a valid response. Truncates to return_char_limit if necessary. - This makes sure that we can coerce the function_response into a string that meets our criteria. We handle some soft coercion. + This makes sure that we can coerce the function_response into a string or dict that meets our criteria. We handle some soft coercion. If strict is True, we raise a ValueError if function_response is not a string or None. + + Returns: + str or dict: Validated response. Dicts are returned as-is to avoid double JSON encoding by package_function_response. """ if isinstance(function_response, str): function_response_string = function_response @@ -870,9 +873,17 @@ def validate_function_response(function_response: Any, return_char_limit: int, s raise ValueError(f"Strict mode violation. Function returned type: {type(function_response).__name__}") elif isinstance(function_response, dict): - # As functions can return arbitrary data, if there's already nesting somewhere in the response, it's difficult - # for us to not result in double escapes. - function_response_string = json_dumps(function_response) + # For dicts, check if truncation is needed + if truncate and return_char_limit: + # Convert to JSON string to check size + json_str = json_dumps(function_response) + if len(json_str) > return_char_limit: + # If truncation is needed, return truncated string + logger.warning(f"function return was over limit ({len(json_str)} > {return_char_limit}) and was truncated") + return f"{json_str[:return_char_limit]}... [NOTE: function output was truncated since it exceeded the character limit ({len(json_str)} > {return_char_limit})]" + # Otherwise return dict as-is to avoid double JSON encoding + # package_function_response will handle the final JSON serialization + return function_response else: logger.debug(f"Function returned type {type(function_response).__name__}. Coercing to string.") function_response_string = str(function_response) diff --git a/tests/test_utils.py b/tests/test_utils.py index bf89ed0b..3d16039e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -569,14 +569,12 @@ def test_validate_function_response_none_input(): def test_validate_function_response_dict_input(): - """Test that dict inputs are JSON serialized""" + """Test that dict inputs are returned as-is (not pre-serialized) to avoid double JSON encoding""" test_dict = {"key": "value", "number": 42} response = validate_function_response(test_dict, return_char_limit=100) - # Response should be valid JSON string - import json - - parsed = json.loads(response) - assert parsed == test_dict + # Response should be the dict itself, not a JSON string + assert isinstance(response, dict) + assert response == test_dict def test_validate_function_response_other_types(): @@ -641,14 +639,12 @@ def test_validate_function_response_exact_limit(): def test_validate_function_response_complex_dict(): - """Test with complex nested dictionary""" + """Test with complex nested dictionary - should be returned as-is""" complex_dict = {"nested": {"key": "value"}, "list": [1, 2, {"inner": "dict"}], "null": None, "bool": True} response = validate_function_response(complex_dict, return_char_limit=1000) - # Should be valid JSON - import json - - parsed = json.loads(response) - assert parsed == complex_dict + # Should be the dict itself, not a JSON string + assert isinstance(response, dict) + assert response == complex_dict def test_validate_function_response_dict_truncation():