feat: include tools as part of token estimate in compact (#9242)
* base * fix
This commit is contained in:
@@ -63,6 +63,7 @@ from letta.services.summarizer.summarizer_all import summarize_all
|
||||
from letta.services.summarizer.summarizer_config import CompactionSettings
|
||||
from letta.services.summarizer.summarizer_sliding_window import (
|
||||
count_tokens,
|
||||
count_tokens_with_tools,
|
||||
summarize_via_sliding_window,
|
||||
)
|
||||
from letta.settings import settings, summarizer_settings
|
||||
@@ -1740,9 +1741,12 @@ class LettaAgentV3(LettaAgentV2):
|
||||
else:
|
||||
raise ValueError(f"Invalid summarizer mode: {summarizer_config.mode}")
|
||||
|
||||
# update the token count
|
||||
self.context_token_estimate = await count_tokens(
|
||||
actor=self.actor, llm_config=self.agent_state.llm_config, messages=compacted_messages
|
||||
# update the token count (including tools for accurate comparison with LLM's prompt_tokens)
|
||||
self.context_token_estimate = await count_tokens_with_tools(
|
||||
actor=self.actor,
|
||||
llm_config=self.agent_state.llm_config,
|
||||
messages=compacted_messages,
|
||||
tools=self.agent_state.tools,
|
||||
)
|
||||
self.logger.info(f"Context token estimate after summarization: {self.context_token_estimate}")
|
||||
|
||||
@@ -1775,8 +1779,11 @@ class LettaAgentV3(LettaAgentV2):
|
||||
)
|
||||
summarization_mode_used = "all"
|
||||
|
||||
self.context_token_estimate = await count_tokens(
|
||||
actor=self.actor, llm_config=self.agent_state.llm_config, messages=compacted_messages
|
||||
self.context_token_estimate = await count_tokens_with_tools(
|
||||
actor=self.actor,
|
||||
llm_config=self.agent_state.llm_config,
|
||||
messages=compacted_messages,
|
||||
tools=self.agent_state.tools,
|
||||
)
|
||||
|
||||
# final edge case: the system prompt is the cause of the context overflow (raise error)
|
||||
|
||||
@@ -42,6 +42,53 @@ async def count_tokens(actor: User, llm_config: LLMConfig, messages: List[Messag
|
||||
return tokens
|
||||
|
||||
|
||||
async def count_tokens_with_tools(
|
||||
actor: User,
|
||||
llm_config: LLMConfig,
|
||||
messages: List[Message],
|
||||
tools: Optional[List["Tool"]] = None,
|
||||
) -> int:
|
||||
"""Count tokens in messages AND tool definitions.
|
||||
|
||||
This provides a more accurate context token count by including tool definitions,
|
||||
which are sent to the LLM but not included in the messages list.
|
||||
|
||||
Args:
|
||||
actor: The user making the request.
|
||||
llm_config: The LLM configuration for selecting the appropriate tokenizer.
|
||||
messages: The in-context messages (including system message).
|
||||
tools: Optional list of Tool objects. If provided, their schemas are counted.
|
||||
|
||||
Returns:
|
||||
Total token count for messages + tools.
|
||||
"""
|
||||
# Delegate message counting to existing function
|
||||
message_tokens = await count_tokens(actor, llm_config, messages)
|
||||
|
||||
if not tools:
|
||||
return message_tokens
|
||||
|
||||
# Count tools
|
||||
from openai.types.beta.function_tool import FunctionTool as OpenAITool
|
||||
|
||||
from letta.services.context_window_calculator.token_counter import ApproxTokenCounter
|
||||
|
||||
token_counter = create_token_counter(
|
||||
model_endpoint_type=llm_config.model_endpoint_type,
|
||||
model=llm_config.model,
|
||||
actor=actor,
|
||||
)
|
||||
|
||||
tool_definitions = [OpenAITool(type="function", function=t.json_schema) for t in tools if t.json_schema]
|
||||
tool_tokens = await token_counter.count_tool_tokens(tool_definitions) if tool_definitions else 0
|
||||
|
||||
# Apply safety margin for approximate counting (message_tokens already has margin applied)
|
||||
if isinstance(token_counter, ApproxTokenCounter):
|
||||
tool_tokens = int(tool_tokens * APPROX_TOKEN_SAFETY_MARGIN)
|
||||
|
||||
return message_tokens + tool_tokens
|
||||
|
||||
|
||||
@trace_method
|
||||
async def summarize_via_sliding_window(
|
||||
# Required to tag LLM calls
|
||||
|
||||
@@ -1046,10 +1046,10 @@ async def test_v3_summarize_hard_eviction_when_still_over_threshold(
|
||||
# summarize_conversation_history to run and then hit the branch where the
|
||||
# *post*-summarization token count is still above the proactive
|
||||
# summarization threshold. We simulate that by patching the
|
||||
# letta_agent_v3-level count_tokens helper to report an extremely large
|
||||
# letta_agent_v3-level count_tokens_with_tools helper to report an extremely large
|
||||
# token count for the first call (post-summary) and a small count for the
|
||||
# second call (after hard eviction).
|
||||
with patch("letta.agents.letta_agent_v3.count_tokens") as mock_count_tokens:
|
||||
with patch("letta.agents.letta_agent_v3.count_tokens_with_tools") as mock_count_tokens:
|
||||
# First call: pretend the summarized context is still huge relative to
|
||||
# this model's context window so that we always trigger the
|
||||
# hard-eviction path. Second call: minimal context (system only) is
|
||||
|
||||
Reference in New Issue
Block a user