feat: Add content aware line chunking (#2707)

2025-06-09 13:03:25 -07:00
parent 71fcbbc863
commit 951773d0ec
7 changed files with 333 additions and 172 deletions
--- a/letta/services/file_processor/chunker/line_chunker.py
+++ b/letta/services/file_processor/chunker/line_chunker.py
@@ -1,34 +1,139 @@
 import re
 from typing import List, Optional
 from letta.log import get_logger
 from letta.schemas.file import FileMetadata
 from letta.services.file_processor.file_types import ChunkingStrategy, file_type_registry
 logger = get_logger(__name__)
 class LineChunker:
-    """Newline chunker"""
+    """Content-aware line chunker that adapts chunking strategy based on file type"""
    def __init__(self):
-        pass
+        self.file_type_registry = file_type_registry
-    def chunk_text(self, text: str, start: Optional[int] = None, end: Optional[int] = None, add_metadata: bool = True) -> List[str]:
+    def _determine_chunking_strategy(self, file_metadata: FileMetadata) -> ChunkingStrategy:
-        """Split lines"""
+        """Determine the best chunking strategy based on file metadata"""
-        content_lines = [line.strip() for line in text.splitlines() if line.strip()]
+        # Try to get strategy from MIME type first
-        total_lines = len(content_lines)
+        if file_metadata.file_type:
            try:
                return self.file_type_registry.get_chunking_strategy_by_mime_type(file_metadata.file_type)
            except Exception:
                pass
-        if start and end:
+        # Fallback to filename extension
        if file_metadata.file_name:
            try:
                # Extract extension from filename
                import os
                _, ext = os.path.splitext(file_metadata.file_name)
                if ext:
                    return self.file_type_registry.get_chunking_strategy_by_extension(ext)
            except Exception:
                pass
        # Default fallback
        return ChunkingStrategy.LINE_BASED
    def _chunk_by_lines(self, text: str, preserve_indentation: bool = False) -> List[str]:
        """Traditional line-based chunking for code and structured data"""
        lines = []
        for line in text.splitlines():
            if preserve_indentation:
                # For code: preserve leading whitespace (indentation), remove trailing whitespace
                line = line.rstrip()
                # Only skip completely empty lines
                if line:
                    lines.append(line)
            else:
                # For structured data: strip all whitespace
                line = line.strip()
                if line:
                    lines.append(line)
        return lines
    def _chunk_by_sentences(self, text: str) -> List[str]:
        """Sentence-based chunking for documentation and markup"""
        # Simple sentence splitting on periods, exclamation marks, and question marks
        # followed by whitespace or end of string
        sentence_pattern = r"(?<=[.!?])\s+(?=[A-Z])"
        # Split text into sentences
        sentences = re.split(sentence_pattern, text.strip())
        # Clean up sentences - remove extra whitespace and empty sentences
        cleaned_sentences = []
        for sentence in sentences:
            sentence = re.sub(r"\s+", " ", sentence.strip())  # Normalize whitespace
            if sentence:
                cleaned_sentences.append(sentence)
        return cleaned_sentences
    def _chunk_by_characters(self, text: str, target_line_length: int = 100) -> List[str]:
        """Character-based wrapping for prose text"""
        words = text.split()
        lines = []
        current_line = []
        current_length = 0
        for word in words:
            # Check if adding this word would exceed the target length
            word_length = len(word)
            if current_length + word_length + len(current_line) > target_line_length and current_line:
                # Start a new line
                lines.append(" ".join(current_line))
                current_line = [word]
                current_length = word_length
            else:
                current_line.append(word)
                current_length += word_length
        # Add the last line if there's content
        if current_line:
            lines.append(" ".join(current_line))
        return [line for line in lines if line.strip()]
    def chunk_text(
        self, text: str, file_metadata: FileMetadata, start: Optional[int] = None, end: Optional[int] = None, add_metadata: bool = True
    ) -> List[str]:
        """Content-aware text chunking based on file type"""
        strategy = self._determine_chunking_strategy(file_metadata)
        # Apply the appropriate chunking strategy
        if strategy == ChunkingStrategy.DOCUMENTATION:
            content_lines = self._chunk_by_sentences(text)
        elif strategy == ChunkingStrategy.PROSE:
            content_lines = self._chunk_by_characters(text)
        elif strategy == ChunkingStrategy.CODE:
            content_lines = self._chunk_by_lines(text, preserve_indentation=True)
        else:  # STRUCTURED_DATA or LINE_BASED
            content_lines = self._chunk_by_lines(text, preserve_indentation=False)
        total_chunks = len(content_lines)
        # Handle start/end slicing
        if start is not None and end is not None:
            content_lines = content_lines[start:end]
            line_offset = start
        else:
            line_offset = 0
        # Add line numbers for all strategies
        content_lines = [f"{i + line_offset}: {line}" for i, line in enumerate(content_lines)]
-        # Add metadata about total lines
+        # Add metadata about total chunks
        if add_metadata:
-            if start and end:
+            chunk_type = (
-                content_lines.insert(0, f"[Viewing lines {start} to {end-1} (out of {total_lines} lines)]")
+                "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines"
            )
            if start is not None and end is not None:
                content_lines.insert(0, f"[Viewing {chunk_type} {start} to {end-1} (out of {total_chunks} {chunk_type})]")
            else:
-                content_lines.insert(0, f"[Viewing file start (out of {total_lines} lines)]")
+                content_lines.insert(0, f"[Viewing file start (out of {total_chunks} {chunk_type})]")
        return content_lines
--- a/letta/services/file_processor/file_processor.py
+++ b/letta/services/file_processor/file_processor.py
@@ -82,7 +82,7 @@ class FileProcessor:
            # Insert to agent context window
            # TODO: Rethink this line chunking mechanism
-            content_lines = self.line_chunker.chunk_text(text=raw_markdown_text)
+            content_lines = self.line_chunker.chunk_text(text=raw_markdown_text, file_metadata=file_metadata)
            visible_content = "\n".join(content_lines)
            await server.insert_file_into_context_windows(
--- a/letta/services/file_processor/file_types.py
+++ b/letta/services/file_processor/file_types.py
@@ -7,9 +7,20 @@ mime types, and file processing capabilities across the Letta codebase.
 import mimetypes
 from dataclasses import dataclass
 from enum import Enum
 from typing import Dict, Set
 class ChunkingStrategy(str, Enum):
    """Enum for different file chunking strategies."""
    CODE = "code"  # Line-based chunking for code files
    STRUCTURED_DATA = "structured_data"  # Line-based chunking for JSON, XML, etc.
    DOCUMENTATION = "documentation"  # Paragraph-aware chunking for Markdown, HTML
    PROSE = "prose"  # Character-based wrapping for plain text
    LINE_BASED = "line_based"  # Default line-based chunking
@dataclass
 class FileTypeInfo:
    """Information about a supported file type."""
@@ -18,6 +29,7 @@ class FileTypeInfo:
    mime_type: str
    is_simple_text: bool
    description: str
    chunking_strategy: ChunkingStrategy = ChunkingStrategy.LINE_BASED
 class FileTypeRegistry:
@@ -31,63 +43,70 @@ class FileTypeRegistry:
    def _register_default_types(self) -> None:
        """Register all default supported file types."""
        # Document formats
-        self.register(".pdf", "application/pdf", False, "PDF document")
+        self.register(".pdf", "application/pdf", False, "PDF document", ChunkingStrategy.LINE_BASED)
-        self.register(".txt", "text/plain", True, "Plain text file")
+        self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.PROSE)
-        self.register(".md", "text/markdown", True, "Markdown document")
+        self.register(".md", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
-        self.register(".markdown", "text/markdown", True, "Markdown document")
+        self.register(".markdown", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
-        self.register(".json", "application/json", True, "JSON data file")
+        self.register(".json", "application/json", True, "JSON data file", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".jsonl", "application/jsonl", True, "JSON Lines file")
+        self.register(".jsonl", "application/jsonl", True, "JSON Lines file", ChunkingStrategy.STRUCTURED_DATA)
        # Programming languages
-        self.register(".py", "text/x-python", True, "Python source code")
+        self.register(".py", "text/x-python", True, "Python source code", ChunkingStrategy.CODE)
-        self.register(".js", "text/javascript", True, "JavaScript source code")
+        self.register(".js", "text/javascript", True, "JavaScript source code", ChunkingStrategy.CODE)
-        self.register(".ts", "text/x-typescript", True, "TypeScript source code")
+        self.register(".ts", "text/x-typescript", True, "TypeScript source code", ChunkingStrategy.CODE)
-        self.register(".java", "text/x-java-source", True, "Java source code")
+        self.register(".java", "text/x-java-source", True, "Java source code", ChunkingStrategy.CODE)
-        self.register(".cpp", "text/x-c++", True, "C++ source code")
+        self.register(".cpp", "text/x-c++", True, "C++ source code", ChunkingStrategy.CODE)
-        self.register(".cxx", "text/x-c++", True, "C++ source code")
+        self.register(".cxx", "text/x-c++", True, "C++ source code", ChunkingStrategy.CODE)
-        self.register(".c", "text/x-c", True, "C source code")
+        self.register(".c", "text/x-c", True, "C source code", ChunkingStrategy.CODE)
-        self.register(".h", "text/x-c", True, "C/C++ header file")
+        self.register(".h", "text/x-c", True, "C/C++ header file", ChunkingStrategy.CODE)
-        self.register(".cs", "text/x-csharp", True, "C# source code")
+        self.register(".cs", "text/x-csharp", True, "C# source code", ChunkingStrategy.CODE)
-        self.register(".php", "text/x-php", True, "PHP source code")
+        self.register(".php", "text/x-php", True, "PHP source code", ChunkingStrategy.CODE)
-        self.register(".rb", "text/x-ruby", True, "Ruby source code")
+        self.register(".rb", "text/x-ruby", True, "Ruby source code", ChunkingStrategy.CODE)
-        self.register(".go", "text/x-go", True, "Go source code")
+        self.register(".go", "text/x-go", True, "Go source code", ChunkingStrategy.CODE)
-        self.register(".rs", "text/x-rust", True, "Rust source code")
+        self.register(".rs", "text/x-rust", True, "Rust source code", ChunkingStrategy.CODE)
-        self.register(".swift", "text/x-swift", True, "Swift source code")
+        self.register(".swift", "text/x-swift", True, "Swift source code", ChunkingStrategy.CODE)
-        self.register(".kt", "text/x-kotlin", True, "Kotlin source code")
+        self.register(".kt", "text/x-kotlin", True, "Kotlin source code", ChunkingStrategy.CODE)
-        self.register(".scala", "text/x-scala", True, "Scala source code")
+        self.register(".scala", "text/x-scala", True, "Scala source code", ChunkingStrategy.CODE)
-        self.register(".r", "text/x-r", True, "R source code")
+        self.register(".r", "text/x-r", True, "R source code", ChunkingStrategy.CODE)
-        self.register(".m", "text/x-objective-c", True, "Objective-C source code")
+        self.register(".m", "text/x-objective-c", True, "Objective-C source code", ChunkingStrategy.CODE)
        # Web technologies
-        self.register(".html", "text/html", True, "HTML document")
+        self.register(".html", "text/html", True, "HTML document", ChunkingStrategy.CODE)
-        self.register(".htm", "text/html", True, "HTML document")
+        self.register(".htm", "text/html", True, "HTML document", ChunkingStrategy.CODE)
-        self.register(".css", "text/css", True, "CSS stylesheet")
+        self.register(".css", "text/css", True, "CSS stylesheet", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".scss", "text/x-scss", True, "SCSS stylesheet")
+        self.register(".scss", "text/x-scss", True, "SCSS stylesheet", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".sass", "text/x-sass", True, "Sass stylesheet")
+        self.register(".sass", "text/x-sass", True, "Sass stylesheet", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".less", "text/x-less", True, "Less stylesheet")
+        self.register(".less", "text/x-less", True, "Less stylesheet", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".vue", "text/x-vue", True, "Vue.js component")
+        self.register(".vue", "text/x-vue", True, "Vue.js component", ChunkingStrategy.CODE)
-        self.register(".jsx", "text/x-jsx", True, "JSX source code")
+        self.register(".jsx", "text/x-jsx", True, "JSX source code", ChunkingStrategy.CODE)
-        self.register(".tsx", "text/x-tsx", True, "TSX source code")
+        self.register(".tsx", "text/x-tsx", True, "TSX source code", ChunkingStrategy.CODE)
        # Configuration and data formats
-        self.register(".xml", "application/xml", True, "XML document")
+        self.register(".xml", "application/xml", True, "XML document", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".yaml", "text/x-yaml", True, "YAML configuration")
+        self.register(".yaml", "text/x-yaml", True, "YAML configuration", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".yml", "text/x-yaml", True, "YAML configuration")
+        self.register(".yml", "text/x-yaml", True, "YAML configuration", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".toml", "application/toml", True, "TOML configuration")
+        self.register(".toml", "application/toml", True, "TOML configuration", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".ini", "text/x-ini", True, "INI configuration")
+        self.register(".ini", "text/x-ini", True, "INI configuration", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".cfg", "text/x-conf", True, "Configuration file")
+        self.register(".cfg", "text/x-conf", True, "Configuration file", ChunkingStrategy.STRUCTURED_DATA)
-        self.register(".conf", "text/x-conf", True, "Configuration file")
+        self.register(".conf", "text/x-conf", True, "Configuration file", ChunkingStrategy.STRUCTURED_DATA)
        # Scripts and SQL
-        self.register(".sh", "text/x-shellscript", True, "Shell script")
+        self.register(".sh", "text/x-shellscript", True, "Shell script", ChunkingStrategy.CODE)
-        self.register(".bash", "text/x-shellscript", True, "Bash script")
+        self.register(".bash", "text/x-shellscript", True, "Bash script", ChunkingStrategy.CODE)
-        self.register(".ps1", "text/x-powershell", True, "PowerShell script")
+        self.register(".ps1", "text/x-powershell", True, "PowerShell script", ChunkingStrategy.CODE)
-        self.register(".bat", "text/x-batch", True, "Batch script")
+        self.register(".bat", "text/x-batch", True, "Batch script", ChunkingStrategy.CODE)
-        self.register(".cmd", "text/x-batch", True, "Command script")
+        self.register(".cmd", "text/x-batch", True, "Command script", ChunkingStrategy.CODE)
-        self.register(".dockerfile", "text/x-dockerfile", True, "Dockerfile")
+        self.register(".dockerfile", "text/x-dockerfile", True, "Dockerfile", ChunkingStrategy.CODE)
-        self.register(".sql", "text/x-sql", True, "SQL script")
+        self.register(".sql", "text/x-sql", True, "SQL script", ChunkingStrategy.CODE)
-    def register(self, extension: str, mime_type: str, is_simple_text: bool, description: str) -> None:
+    def register(
        self,
        extension: str,
        mime_type: str,
        is_simple_text: bool,
        description: str,
        chunking_strategy: ChunkingStrategy = ChunkingStrategy.LINE_BASED,
    ) -> None:
        """
        Register a new file type.
@@ -96,12 +115,17 @@ class FileTypeRegistry:
            mime_type: MIME type for the file
            is_simple_text: Whether this is a simple text file that can be read directly
            description: Human-readable description of the file type
            chunking_strategy: Strategy for chunking this file type
        """
        if not extension.startswith("."):
            extension = f".{extension}"
        self._file_types[extension] = FileTypeInfo(
-            extension=extension, mime_type=mime_type, is_simple_text=is_simple_text, description=description
+            extension=extension,
            mime_type=mime_type,
            is_simple_text=is_simple_text,
            description=description,
            chunking_strategy=chunking_strategy,
        )
    def register_mime_types(self) -> None:
@@ -217,6 +241,37 @@ class FileTypeRegistry:
            extension = f".{extension}"
        return self._file_types[extension]
    def get_chunking_strategy_by_extension(self, extension: str) -> ChunkingStrategy:
        """
        Get the chunking strategy for a file based on its extension.
        Args:
            extension: File extension (with or without leading dot)
        Returns:
            ChunkingStrategy enum value for the file type
        Raises:
            KeyError: If the extension is not supported
        """
        file_type_info = self.get_file_type_info(extension)
        return file_type_info.chunking_strategy
    def get_chunking_strategy_by_mime_type(self, mime_type: str) -> ChunkingStrategy:
        """
        Get the chunking strategy for a file based on its MIME type.
        Args:
            mime_type: MIME type of the file
        Returns:
            ChunkingStrategy enum value for the file type, or LINE_BASED if not found
        """
        for file_type in self._file_types.values():
            if file_type.mime_type == mime_type:
                return file_type.chunking_strategy
        return ChunkingStrategy.LINE_BASED
 # Global registry instance
 file_type_registry = FileTypeRegistry()
--- a/letta/services/tool_executor/files_tool_executor.py
+++ b/letta/services/tool_executor/files_tool_executor.py
@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple
 from letta.log import get_logger
 from letta.schemas.agent import AgentState
 from letta.schemas.file import FileMetadata
 from letta.schemas.sandbox_config import SandboxConfig
 from letta.schemas.tool import Tool
 from letta.schemas.tool_execution_result import ToolExecutionResult
@@ -119,7 +120,7 @@ class LettaFileToolExecutor(ToolExecutor):
        # TODO: Inefficient, maybe we can pre-compute this
        # TODO: This is also not the best way to split things - would be cool to have "content aware" splitting
        # TODO: Split code differently from large text blurbs
-        content_lines = LineChunker().chunk_text(text=file.content, start=start, end=end)
+        content_lines = LineChunker().chunk_text(text=file.content, file_metadata=file, start=start, end=end)
        visible_content = "\n".join(content_lines)
        await self.files_agents_manager.update_file_agent_by_id(
@@ -146,14 +147,14 @@ class LettaFileToolExecutor(ToolExecutor):
        except re.error as e:
            raise ValueError(f"Invalid regex pattern: {e}")
-    def _get_context_lines(self, text: str, match_line_idx: int, total_lines: int) -> List[str]:
+    def _get_context_lines(self, text: str, file_metadata: FileMetadata, match_line_idx: int, total_lines: int) -> List[str]:
        """Get context lines around a match using LineChunker."""
        start_idx = max(0, match_line_idx - self.MAX_CONTEXT_LINES)
        end_idx = min(total_lines, match_line_idx + self.MAX_CONTEXT_LINES + 1)
        # Use LineChunker to get formatted lines with numbers
        chunker = LineChunker()
-        context_lines = chunker.chunk_text(text, start=start_idx, end=end_idx, add_metadata=False)
+        context_lines = chunker.chunk_text(text, file_metadata=file_metadata, start=start_idx, end=end_idx, add_metadata=False)
        # Add match indicator
        formatted_lines = []
@@ -268,7 +269,7 @@ class LettaFileToolExecutor(ToolExecutor):
                # Use LineChunker to get all lines with proper formatting
                chunker = LineChunker()
-                formatted_lines = chunker.chunk_text(file.content)
+                formatted_lines = chunker.chunk_text(file.content, file_metadata=file)
                # Remove metadata header
                if formatted_lines and formatted_lines[0].startswith("[Viewing"):
@@ -295,7 +296,7 @@ class LettaFileToolExecutor(ToolExecutor):
                        if pattern_regex.search(line_content):
                            # Get context around the match (convert back to 0-based indexing)
-                            context_lines = self._get_context_lines(file.content, line_num - 1, len(file.content.splitlines()))
+                            context_lines = self._get_context_lines(file.content, file, line_num - 1, len(file.content.splitlines()))
                            # Format the match result
                            match_header = f"\n=== {file.file_name}:{line_num} ==="
--- a/tests/data/0_to_99.py
+++ b/tests/data/0_to_99.py
@@ -0,0 +1,100 @@
 x0 = 0
 x1 = 1
 x2 = 2
 x3 = 3
 x4 = 4
 x5 = 5
 x6 = 6
 x7 = 7
 x8 = 8
 x9 = 9
 x10 = 10
 x11 = 11
 x12 = 12
 x13 = 13
 x14 = 14
 x15 = 15
 x16 = 16
 x17 = 17
 x18 = 18
 x19 = 19
 x20 = 20
 x21 = 21
 x22 = 22
 x23 = 23
 x24 = 24
 x25 = 25
 x26 = 26
 x27 = 27
 x28 = 28
 x29 = 29
 x30 = 30
 x31 = 31
 x32 = 32
 x33 = 33
 x34 = 34
 x35 = 35
 x36 = 36
 x37 = 37
 x38 = 38
 x39 = 39
 x40 = 40
 x41 = 41
 x42 = 42
 x43 = 43
 x44 = 44
 x45 = 45
 x46 = 46
 x47 = 47
 x48 = 48
 x49 = 49
 x50 = 50
 x51 = 51
 x52 = 52
 x53 = 53
 x54 = 54
 x55 = 55
 x56 = 56
 x57 = 57
 x58 = 58
 x59 = 59
 x60 = 60
 x61 = 61
 x62 = 62
 x63 = 63
 x64 = 64
 x65 = 65
 x66 = 66
 x67 = 67
 x68 = 68
 x69 = 69
 x70 = 70
 x71 = 71
 x72 = 72
 x73 = 73
 x74 = 74
 x75 = 75
 x76 = 76
 x77 = 77
 x78 = 78
 x79 = 79
 x80 = 80
 x81 = 81
 x82 = 82
 x83 = 83
 x84 = 84
 x85 = 85
 x86 = 86
 x87 = 87
 x88 = 88
 x89 = 89
 x90 = 90
 x91 = 91
 x92 = 92
 x93 = 93
 x94 = 94
 x95 = 95
 x96 = 96
 x97 = 97
 x98 = 98
 x99 = 99
--- a/tests/data/lines_1_to_100.txt
+++ b/tests/data/lines_1_to_100.txt
@@ -1,100 +0,0 @@
 Line 1
 Line 2
 Line 3
 Line 4
 Line 5
 Line 6
 Line 7
 Line 8
 Line 9
 Line 10
 Line 11
 Line 12
 Line 13
 Line 14
 Line 15
 Line 16
 Line 17
 Line 18
 Line 19
 Line 20
 Line 21
 Line 22
 Line 23
 Line 24
 Line 25
 Line 26
 Line 27
 Line 28
 Line 29
 Line 30
 Line 31
 Line 32
 Line 33
 Line 34
 Line 35
 Line 36
 Line 37
 Line 38
 Line 39
 Line 40
 Line 41
 Line 42
 Line 43
 Line 44
 Line 45
 Line 46
 Line 47
 Line 48
 Line 49
 Line 50
 Line 51
 Line 52
 Line 53
 Line 54
 Line 55
 Line 56
 Line 57
 Line 58
 Line 59
 Line 60
 Line 61
 Line 62
 Line 63
 Line 64
 Line 65
 Line 66
 Line 67
 Line 68
 Line 69
 Line 70
 Line 71
 Line 72
 Line 73
 Line 74
 Line 75
 Line 76
 Line 77
 Line 78
 Line 79
 Line 80
 Line 81
 Line 82
 Line 83
 Line 84
 Line 85
 Line 86
 Line 87
 Line 88
 Line 89
 Line 90
 Line 91
 Line 92
 Line 93
 Line 94
 Line 95
 Line 96
 Line 97
 Line 98
 Line 99
 Line 100
--- a/tests/test_sources.py
+++ b/tests/test_sources.py
@@ -499,7 +499,7 @@ def test_view_ranges_have_metadata(client: LettaSDKClient, agent_state: AgentSta
    client.agents.sources.attach(source_id=source.id, agent_id=agent_state.id)
    # Load files into the source
-    file_path = "tests/data/lines_1_to_100.txt"
+    file_path = "tests/data/0_to_99.py"
    # Upload the files
    with open(file_path, "rb") as f:
@@ -548,10 +548,10 @@ def test_view_ranges_have_metadata(client: LettaSDKClient, agent_state: AgentSta
        block.value
        == """
    [Viewing lines 50 to 54 (out of 100 lines)]
-50: Line 51
+50: x50 = 50
-51: Line 52
+51: x51 = 51
-52: Line 53
+52: x52 = 52
-53: Line 54
+53: x53 = 53
-54: Line 55
+54: x54 = 54
    """.strip()
    )