fix(core): big context overflow handling patch (#5901)

2025-11-01 14:30:33 -07:00
parent 7427c0998e
commit a44c05040a
2 changed files with 49 additions and 1 deletions
--- a/letta/agents/letta_agent_v3.py
+++ b/letta/agents/letta_agent_v3.py
@@ -71,6 +71,18 @@ class LettaAgentV3(LettaAgentV2):
        self.last_step_usage = None
        self.response_messages_for_metadata = []  # Separate accumulator for streaming job metadata

+    def _compute_tool_return_truncation_chars(self) -> int:
+        """Compute a dynamic cap for tool returns in requests.
+
+        Heuristic: ~20% of context window × 4 chars/token, minimum 5k chars.
+        This prevents any single tool return from consuming too much context.
+        """
+        try:
+            cap = int(self.agent_state.llm_config.context_window * 0.2 * 4)  # 20% of tokens → chars
+        except Exception:
+            cap = 5000
+        return max(5000, cap)
+
    def _update_global_usage_stats(self, step_usage_stats: LettaUsageStatistics):
        """Override to track per-step usage for context limit checks"""
        self.last_step_usage = step_usage_stats
@@ -424,6 +436,7 @@ class LettaAgentV3(LettaAgentV2):
                            tools=valid_tools,
                            force_tool_call=force_tool_call,
                            requires_subsequent_tool_call=self._require_tool_call,
+                            tool_return_truncation_chars=self._compute_tool_return_truncation_chars(),
                        )
                        # TODO: Extend to more providers, and also approval tool rules
                        # Enable parallel tool use when no tool rules are attached
@@ -801,6 +814,39 @@ class LettaAgentV3(LettaAgentV2):

        # 3. Handle client side tool execution
        if tool_returns:
+            # Clamp client-side tool returns before persisting (JSON-aware: truncate only the 'message' field)
+            try:
+                cap = self._compute_tool_return_truncation_chars()
+            except Exception:
+                cap = 5000
+
+            for tr in tool_returns:
+                try:
+                    if tr.func_response and isinstance(tr.func_response, str):
+                        parsed = json.loads(tr.func_response)
+                        if isinstance(parsed, dict) and "message" in parsed and isinstance(parsed["message"], str):
+                            msg = parsed["message"]
+                            if len(msg) > cap:
+                                original_len = len(msg)
+                                parsed["message"] = msg[:cap] + f"... [truncated {original_len - cap} chars]"
+                                tr.func_response = json.dumps(parsed)
+                                self.logger.warning(f"Truncated client-side tool return message from {original_len} to {cap} chars")
+                        else:
+                            # Fallback to raw string truncation if not a dict with 'message'
+                            if len(tr.func_response) > cap:
+                                original_len = len(tr.func_response)
+                                tr.func_response = tr.func_response[:cap] + f"... [truncated {original_len - cap} chars]"
+                                self.logger.warning(f"Truncated client-side tool return (raw) from {original_len} to {cap} chars")
+                except json.JSONDecodeError:
+                    # Non-JSON or unexpected shape; truncate as raw string
+                    if tr.func_response and len(tr.func_response) > cap:
+                        original_len = len(tr.func_response)
+                        tr.func_response = tr.func_response[:cap] + f"... [truncated {original_len - cap} chars]"
+                        self.logger.warning(f"Truncated client-side tool return (non-JSON) from {original_len} to {cap} chars")
+                except Exception as e:
+                    # Unexpected error; log and skip truncation for this return
+                    self.logger.warning(f"Failed to truncate client-side tool return: {e}")
+
            continue_stepping = True
            stop_reason = None
            result_tool_returns = tool_returns
--- a/letta/schemas/message.py
+++ b/letta/schemas/message.py
@@ -1255,9 +1255,11 @@ class Message(BaseMessage):
                for tr in m.tool_returns:
                    if not tr.tool_call_id:
                        raise TypeError("ToolReturn came back without a tool_call_id.")
+                    # Ensure explicit tool_returns are truncated for Chat Completions
+                    func_response = truncate_tool_return(tr.func_response, tool_return_truncation_chars)
                    result.append(
                        {
-                            "content": tr.func_response,
+                            "content": func_response,
                            "role": "tool",
                            "tool_call_id": tr.tool_call_id[:max_tool_id_length] if max_tool_id_length else tr.tool_call_id,
                        }