fix: update ContextWindowCalculator to parse new system message sections (#9398)
* fix: update ContextWindowCalculator to parse new system message sections The context window calculator was using outdated position-based parsing that only handled 3 sections (base_instructions, memory_blocks, memory_metadata). The actual system message now includes additional sections that were not being tracked: - <memory_filesystem> (git-enabled agents) - <tool_usage_rules> (when tool rules configured) - <directories> (when sources attached) Changes: - Add _extract_tag_content() helper for proper XML tag extraction - Rewrite extract_system_components() to return a Dict with all 6 sections - Update calculate_context_window() to count tokens for new sections - Add new fields to ContextWindowOverview schema with backward-compatible defaults - Add unit tests for the extraction logic * update * generate * fix: check attached file in directories section instead of core_memory Files are rendered inside <directories> tags, not <memory_blocks>. Update validate_context_window_overview assertions accordingly. * fix: address review feedback for context window parser - Fix git-enabled agents regression: capture bare file blocks (e.g. <system/human.md>) rendered after </memory_filesystem> as core_memory via new _extract_git_core_memory() method - Make _extract_top_level_tag robust: scan all occurrences to find tag outside container, handling nested-first + top-level-later case - Document system_prompt tag inconsistency in docstring - Add TODO to base_agent.py extract_dynamic_section linking to ContextWindowCalculator to flag parallel parser tech debt - Add tests: git-enabled agent parsing, dual-occurrence tag extraction, pure text system prompt, git-enabled integration test
This commit is contained in:
@@ -186,6 +186,9 @@ def validate_context_window_overview(
|
||||
# 2. All token counts should be non-negative
|
||||
assert overview.num_tokens_system >= 0, "System token count cannot be negative"
|
||||
assert overview.num_tokens_core_memory >= 0, "Core memory token count cannot be negative"
|
||||
assert overview.num_tokens_memory_filesystem >= 0, "Memory filesystem token count cannot be negative"
|
||||
assert overview.num_tokens_tool_usage_rules >= 0, "Tool usage rules token count cannot be negative"
|
||||
assert overview.num_tokens_directories >= 0, "Directories token count cannot be negative"
|
||||
assert overview.num_tokens_external_memory_summary >= 0, "External memory summary token count cannot be negative"
|
||||
assert overview.num_tokens_summary_memory >= 0, "Summary memory token count cannot be negative"
|
||||
assert overview.num_tokens_messages >= 0, "Messages token count cannot be negative"
|
||||
@@ -195,6 +198,9 @@ def validate_context_window_overview(
|
||||
expected_total = (
|
||||
overview.num_tokens_system
|
||||
+ overview.num_tokens_core_memory
|
||||
+ overview.num_tokens_memory_filesystem
|
||||
+ overview.num_tokens_tool_usage_rules
|
||||
+ overview.num_tokens_directories
|
||||
+ overview.num_tokens_external_memory_summary
|
||||
+ overview.num_tokens_summary_memory
|
||||
+ overview.num_tokens_messages
|
||||
@@ -244,13 +250,14 @@ def validate_context_window_overview(
|
||||
avg_tokens_per_message = overview.num_tokens_messages / overview.num_messages
|
||||
assert avg_tokens_per_message >= 0, "Average tokens per message should be non-negative"
|
||||
|
||||
# 16. Check attached file is visible
|
||||
# 16. Check attached file is visible in the directories section
|
||||
if attached_file:
|
||||
assert attached_file.visible_content in overview.core_memory, "File must be attached in core memory"
|
||||
assert '<file status="open"' in overview.core_memory
|
||||
assert "</file>" in overview.core_memory
|
||||
assert "max_files_open" in overview.core_memory, "Max files should be set in core memory"
|
||||
assert "current_files_open" in overview.core_memory, "Current files should be set in core memory"
|
||||
assert overview.directories is not None, "Directories section must exist when files are attached"
|
||||
assert attached_file.visible_content in overview.directories, "File must be attached in directories"
|
||||
assert '<file status="open"' in overview.directories
|
||||
assert "</file>" in overview.directories
|
||||
assert "max_files_open" in overview.directories, "Max files should be set in directories"
|
||||
assert "current_files_open" in overview.directories, "Current files should be set in directories"
|
||||
|
||||
# Check for tools
|
||||
assert overview.num_tokens_functions_definitions > 0
|
||||
|
||||
Reference in New Issue
Block a user