feat: add tool return truncation to summarization as a fallback [LET-5970] (#5859)
This commit is contained in:
committed by
Caren Thomas
parent
cdde791b11
commit
57bb051ea4
@@ -39,13 +39,15 @@ def get_llm_config(filename: str, llm_config_dir: str = "tests/configs/llm_model
|
||||
# Test configurations - using a subset of models for summarization tests
|
||||
all_configs = [
|
||||
"openai-gpt-5-mini.json",
|
||||
"claude-4-5-haiku.json",
|
||||
"gemini-2.5-flash.json",
|
||||
# "gemini-2.5-flash-vertex.json", # Requires Vertex AI credentials
|
||||
# "openai-gpt-4.1.json",
|
||||
# "openai-o1.json",
|
||||
# "openai-o3.json",
|
||||
# "openai-o4-mini.json",
|
||||
# "claude-4-sonnet.json",
|
||||
# "claude-3-7-sonnet.json",
|
||||
# "gemini-2.5-flash-vertex.json",
|
||||
# "gemini-2.5-pro-vertex.json",
|
||||
]
|
||||
|
||||
@@ -517,3 +519,86 @@ async def test_summarize_multiple_large_tool_calls(server: SyncServer, actor, ll
|
||||
assert hasattr(msg, "content")
|
||||
|
||||
print(f"Summarized {len(in_context_messages)} messages with {total_content_size} chars to {len(result)} messages")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"llm_config",
|
||||
TESTED_LLM_CONFIGS,
|
||||
ids=[c.model for c in TESTED_LLM_CONFIGS],
|
||||
)
|
||||
async def test_summarize_truncates_large_tool_return(server: SyncServer, actor, llm_config: LLMConfig):
|
||||
"""
|
||||
Test that summarization properly truncates very large tool returns.
|
||||
This ensures that oversized tool returns don't consume excessive context.
|
||||
"""
|
||||
# Create an extremely large tool return (100k chars)
|
||||
large_return = create_large_tool_return(100000)
|
||||
original_size = len(large_return)
|
||||
|
||||
# Create messages with a large tool return
|
||||
messages = [
|
||||
PydanticMessage(
|
||||
role=MessageRole.user,
|
||||
content=[TextContent(type="text", text="Please run the database query.")],
|
||||
),
|
||||
PydanticMessage(
|
||||
role=MessageRole.assistant,
|
||||
content=[
|
||||
TextContent(type="text", text="Running query..."),
|
||||
ToolCallContent(
|
||||
type="tool_call",
|
||||
id="call_1",
|
||||
name="run_query",
|
||||
input={"query": "SELECT * FROM large_table"},
|
||||
),
|
||||
],
|
||||
),
|
||||
PydanticMessage(
|
||||
role=MessageRole.tool,
|
||||
tool_call_id="call_1",
|
||||
content=[
|
||||
ToolReturnContent(
|
||||
type="tool_return",
|
||||
tool_call_id="call_1",
|
||||
content=large_return,
|
||||
is_error=False,
|
||||
)
|
||||
],
|
||||
),
|
||||
PydanticMessage(
|
||||
role=MessageRole.assistant,
|
||||
content=[TextContent(type="text", text="Query completed successfully with many results.")],
|
||||
),
|
||||
]
|
||||
|
||||
agent_state, in_context_messages = await create_agent_with_messages(server, actor, llm_config, messages)
|
||||
|
||||
# Verify the original tool return is indeed large
|
||||
assert original_size > 90000, f"Expected tool return >90k chars, got {original_size}"
|
||||
|
||||
# Run summarization
|
||||
result = await run_summarization(server, agent_state, in_context_messages, actor)
|
||||
|
||||
# Verify result
|
||||
assert isinstance(result, list)
|
||||
assert len(result) >= 1
|
||||
|
||||
# Find tool return messages in the result and verify truncation occurred
|
||||
tool_returns_found = False
|
||||
for msg in result:
|
||||
if msg.role == MessageRole.tool:
|
||||
for content in msg.content:
|
||||
if isinstance(content, ToolReturnContent):
|
||||
tool_returns_found = True
|
||||
result_size = len(content.content)
|
||||
# Verify that the tool return has been truncated
|
||||
assert result_size < original_size, (
|
||||
f"Expected tool return to be truncated from {original_size} chars, but got {result_size} chars"
|
||||
)
|
||||
print(f"Tool return successfully truncated from {original_size} to {result_size} chars")
|
||||
|
||||
# If we didn't find any tool returns in the result, that's also acceptable
|
||||
# (they may have been completely removed during aggressive summarization)
|
||||
if not tool_returns_found:
|
||||
print("Tool returns were completely removed during summarization")
|
||||
|
||||
Reference in New Issue
Block a user