From 4c8e9af4bd0202c48befb9c6303e34c33a02df00 Mon Sep 17 00:00:00 2001 From: Matthew Zhou Date: Mon, 7 Jul 2025 16:40:25 -0700 Subject: [PATCH] feat: Add error on out of range for open_files (#3214) --- .../file_processor/chunker/line_chunker.py | 38 +++++-- .../tool_executor/files_tool_executor.py | 2 +- tests/test_utils.py | 102 ++++++++++++++++++ 3 files changed, 135 insertions(+), 7 deletions(-) diff --git a/letta/services/file_processor/chunker/line_chunker.py b/letta/services/file_processor/chunker/line_chunker.py index 3b49a43f..c06f024b 100644 --- a/letta/services/file_processor/chunker/line_chunker.py +++ b/letta/services/file_processor/chunker/line_chunker.py @@ -99,7 +99,12 @@ class LineChunker: return [line for line in lines if line.strip()] def chunk_text( - self, file_metadata: FileMetadata, start: Optional[int] = None, end: Optional[int] = None, add_metadata: bool = True + self, + file_metadata: FileMetadata, + start: Optional[int] = None, + end: Optional[int] = None, + add_metadata: bool = True, + validate_range: bool = False, ) -> List[str]: """Content-aware text chunking based on file type""" strategy = self._determine_chunking_strategy(file_metadata) @@ -116,11 +121,31 @@ class LineChunker: content_lines = self._chunk_by_lines(text, preserve_indentation=False) total_chunks = len(content_lines) + chunk_type = ( + "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines" + ) + + # Validate range if requested + if validate_range and (start is not None or end is not None): + if start is not None and start >= total_chunks: + # Convert to 1-indexed for user-friendly error message + start_display = start + 1 + raise ValueError( + f"File {file_metadata.file_name} has only {total_chunks} lines, but requested offset {start_display} is out of range" + ) + + if start is not None and end is not None and end > total_chunks: + # Convert to 1-indexed for user-friendly error message + start_display = start + 1 + end_display = end + raise ValueError( + f"File {file_metadata.file_name} has only {total_chunks} lines, but requested range {start_display} to {end_display} extends beyond file bounds" + ) # Handle start/end slicing - if start is not None and end is not None: + if start is not None or end is not None: content_lines = content_lines[start:end] - line_offset = start + line_offset = start if start is not None else 0 else: line_offset = 0 @@ -129,14 +154,15 @@ class LineChunker: # Add metadata about total chunks if add_metadata: - chunk_type = ( - "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines" - ) if start is not None and end is not None: # Display 1-indexed ranges for users start_display = start + 1 end_display = end content_lines.insert(0, f"[Viewing {chunk_type} {start_display} to {end_display} (out of {total_chunks} {chunk_type})]") + elif start is not None: + # Only start specified - viewing from start to end + start_display = start + 1 + content_lines.insert(0, f"[Viewing {chunk_type} {start_display} to end (out of {total_chunks} {chunk_type})]") else: content_lines.insert(0, f"[Viewing file start (out of {total_chunks} {chunk_type})]") diff --git a/letta/services/tool_executor/files_tool_executor.py b/letta/services/tool_executor/files_tool_executor.py index 1de53724..4815243a 100644 --- a/letta/services/tool_executor/files_tool_executor.py +++ b/letta/services/tool_executor/files_tool_executor.py @@ -175,7 +175,7 @@ class LettaFileToolExecutor(ToolExecutor): file = await self.file_manager.get_file_by_id(file_id=file_id, actor=self.actor, include_content=True) # Process file content - content_lines = LineChunker().chunk_text(file_metadata=file, start=start, end=end) + content_lines = LineChunker().chunk_text(file_metadata=file, start=start, end=end, validate_range=True) visible_content = "\n".join(content_lines) # Handle LRU eviction and file opening diff --git a/tests/test_utils.py b/tests/test_utils.py index 1af23e62..5b0e724c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,6 +2,8 @@ import pytest from letta.constants import MAX_FILENAME_LENGTH from letta.functions.ast_parsers import coerce_dict_args_by_annotations, get_function_annotations_from_source +from letta.schemas.file import FileMetadata +from letta.services.file_processor.chunker.line_chunker import LineChunker from letta.services.helpers.agent_manager_helper import safe_format from letta.utils import sanitize_filename @@ -407,3 +409,103 @@ def test_formatter(): """ assert UNUSED_AND_EMPRY_VAR_SOL == safe_format(UNUSED_AND_EMPRY_VAR, VARS_DICT) + + +# ---------------------- LineChunker TESTS ---------------------- # + + +def test_line_chunker_valid_range(): + """Test that LineChunker works correctly with valid ranges""" + file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3\nline4") + chunker = LineChunker() + + # Test valid range with validation + result = chunker.chunk_text(file, start=1, end=3, validate_range=True) + # Should return lines 2 and 3 (0-indexed 1:3) + assert "[Viewing lines 2 to 3 (out of 4 lines)]" in result[0] + assert "2: line2" in result[1] + assert "3: line3" in result[2] + + +def test_line_chunker_valid_range_no_validation(): + """Test that LineChunker works the same without validation for valid ranges""" + file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3\nline4") + chunker = LineChunker() + + # Test same range without validation + result = chunker.chunk_text(file, start=1, end=3, validate_range=False) + assert "[Viewing lines 2 to 3 (out of 4 lines)]" in result[0] + assert "2: line2" in result[1] + assert "3: line3" in result[2] + + +def test_line_chunker_out_of_range_start(): + """Test that LineChunker throws error when start is out of range""" + file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3") + chunker = LineChunker() + + # Test with start beyond file length (3 lines, requesting start=5 which is 0-indexed 4) + with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 6 is out of range"): + chunker.chunk_text(file, start=5, end=6, validate_range=True) + + +def test_line_chunker_out_of_range_end(): + """Test that LineChunker throws error when end extends beyond file bounds""" + file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3") + chunker = LineChunker() + + # Test with end beyond file length (3 lines, requesting 1 to 10) + with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested range 1 to 10 extends beyond file bounds"): + chunker.chunk_text(file, start=0, end=10, validate_range=True) + + +def test_line_chunker_edge_case_empty_file(): + """Test that LineChunker handles empty files correctly""" + file = FileMetadata(file_name="empty.py", source_id="test_source", content="") + chunker = LineChunker() + + # Test requesting lines from empty file + with pytest.raises(ValueError, match="File empty.py has only 0 lines, but requested offset 1 is out of range"): + chunker.chunk_text(file, start=0, end=1, validate_range=True) + + +def test_line_chunker_edge_case_single_line(): + """Test that LineChunker handles single line files correctly""" + file = FileMetadata(file_name="single.py", source_id="test_source", content="only line") + chunker = LineChunker() + + # Test valid single line access + result = chunker.chunk_text(file, start=0, end=1, validate_range=True) + assert "1: only line" in result[1] + + # Test out of range for single line file + with pytest.raises(ValueError, match="File single.py has only 1 lines, but requested offset 2 is out of range"): + chunker.chunk_text(file, start=1, end=2, validate_range=True) + + +def test_line_chunker_validation_disabled_allows_out_of_range(): + """Test that when validation is disabled, out of range silently returns partial results""" + file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3") + chunker = LineChunker() + + # Test with validation disabled - should not raise error + result = chunker.chunk_text(file, start=5, end=10, validate_range=False) + # Should return empty content (except metadata header) since slice is out of bounds + assert len(result) == 1 # Only metadata header + assert "[Viewing lines 6 to 10 (out of 3 lines)]" in result[0] + + +def test_line_chunker_only_start_parameter(): + """Test validation with only start parameter specified""" + file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3") + chunker = LineChunker() + + # Test valid start only + result = chunker.chunk_text(file, start=1, validate_range=True) + assert "[Viewing lines 2 to end (out of 3 lines)]" in result[0] + assert "2: line2" in result[1] + assert "3: line3" in result[2] + + # Test invalid start only + with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 4 is out of range"): + chunker.chunk_text(file, start=3, validate_range=True)