feat: include tools as part of token estimate in compact (#9242)

* base * fix
2026-02-02 15:58:49 -08:00
parent 0bbb9c9bc0
commit 24ea7dbaed
3 changed files with 61 additions and 7 deletions
--- a/letta/agents/letta_agent_v3.py
+++ b/letta/agents/letta_agent_v3.py
@@ -63,6 +63,7 @@ from letta.services.summarizer.summarizer_all import summarize_all
 from letta.services.summarizer.summarizer_config import CompactionSettings
 from letta.services.summarizer.summarizer_sliding_window import (
    count_tokens,
+    count_tokens_with_tools,
    summarize_via_sliding_window,
 )
 from letta.settings import settings, summarizer_settings
@@ -1740,9 +1741,12 @@ class LettaAgentV3(LettaAgentV2):
        else:
            raise ValueError(f"Invalid summarizer mode: {summarizer_config.mode}")

-        # update the token count
-        self.context_token_estimate = await count_tokens(
-            actor=self.actor, llm_config=self.agent_state.llm_config, messages=compacted_messages
+        # update the token count (including tools for accurate comparison with LLM's prompt_tokens)
+        self.context_token_estimate = await count_tokens_with_tools(
+            actor=self.actor,
+            llm_config=self.agent_state.llm_config,
+            messages=compacted_messages,
+            tools=self.agent_state.tools,
        )
        self.logger.info(f"Context token estimate after summarization: {self.context_token_estimate}")

@@ -1775,8 +1779,11 @@ class LettaAgentV3(LettaAgentV2):
                )
                summarization_mode_used = "all"

-            self.context_token_estimate = await count_tokens(
-                actor=self.actor, llm_config=self.agent_state.llm_config, messages=compacted_messages
+            self.context_token_estimate = await count_tokens_with_tools(
+                actor=self.actor,
+                llm_config=self.agent_state.llm_config,
+                messages=compacted_messages,
+                tools=self.agent_state.tools,
            )

            # final edge case: the system prompt is the cause of the context overflow (raise error)
--- a/letta/services/summarizer/summarizer_sliding_window.py
+++ b/letta/services/summarizer/summarizer_sliding_window.py
@@ -42,6 +42,53 @@ async def count_tokens(actor: User, llm_config: LLMConfig, messages: List[Messag
    return tokens


+async def count_tokens_with_tools(
+    actor: User,
+    llm_config: LLMConfig,
+    messages: List[Message],
+    tools: Optional[List["Tool"]] = None,
+) -> int:
+    """Count tokens in messages AND tool definitions.
+
+    This provides a more accurate context token count by including tool definitions,
+    which are sent to the LLM but not included in the messages list.
+
+    Args:
+        actor: The user making the request.
+        llm_config: The LLM configuration for selecting the appropriate tokenizer.
+        messages: The in-context messages (including system message).
+        tools: Optional list of Tool objects. If provided, their schemas are counted.
+
+    Returns:
+        Total token count for messages + tools.
+    """
+    # Delegate message counting to existing function
+    message_tokens = await count_tokens(actor, llm_config, messages)
+
+    if not tools:
+        return message_tokens
+
+    # Count tools
+    from openai.types.beta.function_tool import FunctionTool as OpenAITool
+
+    from letta.services.context_window_calculator.token_counter import ApproxTokenCounter
+
+    token_counter = create_token_counter(
+        model_endpoint_type=llm_config.model_endpoint_type,
+        model=llm_config.model,
+        actor=actor,
+    )
+
+    tool_definitions = [OpenAITool(type="function", function=t.json_schema) for t in tools if t.json_schema]
+    tool_tokens = await token_counter.count_tool_tokens(tool_definitions) if tool_definitions else 0
+
+    # Apply safety margin for approximate counting (message_tokens already has margin applied)
+    if isinstance(token_counter, ApproxTokenCounter):
+        tool_tokens = int(tool_tokens * APPROX_TOKEN_SAFETY_MARGIN)
+
+    return message_tokens + tool_tokens
+
+
@trace_method
 async def summarize_via_sliding_window(
    # Required to tag LLM calls
--- a/tests/integration_test_summarizer.py
+++ b/tests/integration_test_summarizer.py
@@ -1046,10 +1046,10 @@ async def test_v3_summarize_hard_eviction_when_still_over_threshold(
    # summarize_conversation_history to run and then hit the branch where the
    # *post*-summarization token count is still above the proactive
    # summarization threshold. We simulate that by patching the
-    # letta_agent_v3-level count_tokens helper to report an extremely large
+    # letta_agent_v3-level count_tokens_with_tools helper to report an extremely large
    # token count for the first call (post-summary) and a small count for the
    # second call (after hard eviction).
-    with patch("letta.agents.letta_agent_v3.count_tokens") as mock_count_tokens:
+    with patch("letta.agents.letta_agent_v3.count_tokens_with_tools") as mock_count_tokens:
        # First call: pretend the summarized context is still huge relative to
        # this model's context window so that we always trigger the
        # hard-eviction path. Second call: minimal context (system only) is