feat: include tools as part of token estimate in compact (#9242)

* base

* fix
This commit is contained in:
jnjpng
2026-02-02 15:58:49 -08:00
committed by Caren Thomas
parent 0bbb9c9bc0
commit 24ea7dbaed
3 changed files with 61 additions and 7 deletions

View File

@@ -63,6 +63,7 @@ from letta.services.summarizer.summarizer_all import summarize_all
from letta.services.summarizer.summarizer_config import CompactionSettings
from letta.services.summarizer.summarizer_sliding_window import (
count_tokens,
count_tokens_with_tools,
summarize_via_sliding_window,
)
from letta.settings import settings, summarizer_settings
@@ -1740,9 +1741,12 @@ class LettaAgentV3(LettaAgentV2):
else:
raise ValueError(f"Invalid summarizer mode: {summarizer_config.mode}")
# update the token count
self.context_token_estimate = await count_tokens(
actor=self.actor, llm_config=self.agent_state.llm_config, messages=compacted_messages
# update the token count (including tools for accurate comparison with LLM's prompt_tokens)
self.context_token_estimate = await count_tokens_with_tools(
actor=self.actor,
llm_config=self.agent_state.llm_config,
messages=compacted_messages,
tools=self.agent_state.tools,
)
self.logger.info(f"Context token estimate after summarization: {self.context_token_estimate}")
@@ -1775,8 +1779,11 @@ class LettaAgentV3(LettaAgentV2):
)
summarization_mode_used = "all"
self.context_token_estimate = await count_tokens(
actor=self.actor, llm_config=self.agent_state.llm_config, messages=compacted_messages
self.context_token_estimate = await count_tokens_with_tools(
actor=self.actor,
llm_config=self.agent_state.llm_config,
messages=compacted_messages,
tools=self.agent_state.tools,
)
# final edge case: the system prompt is the cause of the context overflow (raise error)

View File

@@ -42,6 +42,53 @@ async def count_tokens(actor: User, llm_config: LLMConfig, messages: List[Messag
return tokens
async def count_tokens_with_tools(
actor: User,
llm_config: LLMConfig,
messages: List[Message],
tools: Optional[List["Tool"]] = None,
) -> int:
"""Count tokens in messages AND tool definitions.
This provides a more accurate context token count by including tool definitions,
which are sent to the LLM but not included in the messages list.
Args:
actor: The user making the request.
llm_config: The LLM configuration for selecting the appropriate tokenizer.
messages: The in-context messages (including system message).
tools: Optional list of Tool objects. If provided, their schemas are counted.
Returns:
Total token count for messages + tools.
"""
# Delegate message counting to existing function
message_tokens = await count_tokens(actor, llm_config, messages)
if not tools:
return message_tokens
# Count tools
from openai.types.beta.function_tool import FunctionTool as OpenAITool
from letta.services.context_window_calculator.token_counter import ApproxTokenCounter
token_counter = create_token_counter(
model_endpoint_type=llm_config.model_endpoint_type,
model=llm_config.model,
actor=actor,
)
tool_definitions = [OpenAITool(type="function", function=t.json_schema) for t in tools if t.json_schema]
tool_tokens = await token_counter.count_tool_tokens(tool_definitions) if tool_definitions else 0
# Apply safety margin for approximate counting (message_tokens already has margin applied)
if isinstance(token_counter, ApproxTokenCounter):
tool_tokens = int(tool_tokens * APPROX_TOKEN_SAFETY_MARGIN)
return message_tokens + tool_tokens
@trace_method
async def summarize_via_sliding_window(
# Required to tag LLM calls

View File

@@ -1046,10 +1046,10 @@ async def test_v3_summarize_hard_eviction_when_still_over_threshold(
# summarize_conversation_history to run and then hit the branch where the
# *post*-summarization token count is still above the proactive
# summarization threshold. We simulate that by patching the
# letta_agent_v3-level count_tokens helper to report an extremely large
# letta_agent_v3-level count_tokens_with_tools helper to report an extremely large
# token count for the first call (post-summary) and a small count for the
# second call (after hard eviction).
with patch("letta.agents.letta_agent_v3.count_tokens") as mock_count_tokens:
with patch("letta.agents.letta_agent_v3.count_tokens_with_tools") as mock_count_tokens:
# First call: pretend the summarized context is still huge relative to
# this model's context window so that we always trigger the
# hard-eviction path. Second call: minimal context (system only) is