feat: Polishing open files tools (#3575)

Co-authored-by: Charles Packer <packercharles@gmail.com>
Co-authored-by: Shubham Naik <shub@letta.com>
Co-authored-by: Shubham Naik <shub@memgpt.ai>
Co-authored-by: cthomas <caren@letta.com>
Co-authored-by: jnjpng <jin@letta.com>
Co-authored-by: Jin Peng <jinjpeng@Jins-MacBook-Pro.local>
Co-authored-by: Cameron Pfiffer <cameron@pfiffer.org>
Co-authored-by: Kian Jones <11655409+kianjones9@users.noreply.github.com>
Co-authored-by: Kian Jones <kian@Kians-MacBook-Pro.local>
This commit is contained in:
Matthew Zhou
2025-07-29 15:46:51 -07:00
committed by GitHub
parent 6f3afbb2e1
commit b16f5ffc99
14 changed files with 281 additions and 107 deletions

View File

@@ -0,0 +1,33 @@
"""Add start end for agent file
Revision ID: 4537f0996495
Revises: 06fbbf65d4f1
Create Date: 2025-07-25 17:44:26.748765
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "4537f0996495"
down_revision: Union[str, None] = "06fbbf65d4f1"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("files_agents", sa.Column("start_line", sa.Integer(), nullable=True))
op.add_column("files_agents", sa.Column("end_line", sa.Integer(), nullable=True))
# ### end Alembic commands ###
def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column("files_agents", "end_line")
op.drop_column("files_agents", "start_line")
# ### end Alembic commands ###

View File

@@ -103,6 +103,5 @@ class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin, AsyncAttrs):
chunks_embedded=self.chunks_embedded,
created_at=self.created_at,
updated_at=self.updated_at,
is_deleted=self.is_deleted,
content=content_text,
)

View File

@@ -2,14 +2,14 @@ import uuid
from datetime import datetime
from typing import TYPE_CHECKING, Optional
from sqlalchemy import Boolean, DateTime, ForeignKey, Index, String, Text, UniqueConstraint, func
from sqlalchemy import Boolean, DateTime, ForeignKey, Index, Integer, String, Text, UniqueConstraint, func
from sqlalchemy.orm import Mapped, mapped_column, relationship
from letta.constants import FILE_IS_TRUNCATED_WARNING
from letta.orm.mixins import OrganizationMixin
from letta.orm.sqlalchemy_base import SqlalchemyBase
from letta.schemas.block import FileBlock as PydanticFileBlock
from letta.schemas.file import FileAgent as PydanticFileAgent
from letta.utils import truncate_file_visible_content
if TYPE_CHECKING:
pass
@@ -77,6 +77,12 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
nullable=False,
doc="UTC timestamp when this agent last accessed the file.",
)
start_line: Mapped[Optional[int]] = mapped_column(
Integer, nullable=True, doc="Starting line number (1-indexed) when file was opened with line range."
)
end_line: Mapped[Optional[int]] = mapped_column(
Integer, nullable=True, doc="Ending line number (exclusive) when file was opened with line range."
)
# relationships
agent: Mapped["Agent"] = relationship(
@@ -87,13 +93,7 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
# TODO: This is temporary as we figure out if we want FileBlock as a first class citizen
def to_pydantic_block(self, per_file_view_window_char_limit: int) -> PydanticFileBlock:
visible_content = self.visible_content if self.visible_content and self.is_open else ""
# Truncate content and add warnings here when converting from FileAgent to Block
if len(visible_content) > per_file_view_window_char_limit:
truncated_warning = f"...[TRUNCATED]\n{FILE_IS_TRUNCATED_WARNING}"
visible_content = visible_content[: per_file_view_window_char_limit - len(truncated_warning)]
visible_content += truncated_warning
visible_content = truncate_file_visible_content(self.visible_content, self.is_open, per_file_view_window_char_limit)
return PydanticFileBlock(
value=visible_content,

View File

@@ -56,7 +56,6 @@ class FileMetadata(FileMetadataBase):
# orm metadata, optional fields
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow, description="The creation date of the file.")
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow, description="The update date of the file.")
is_deleted: bool = Field(False, description="Whether this file is deleted or not.")
class FileAgentBase(LettaBase):
@@ -76,8 +75,10 @@ class FileAgentBase(LettaBase):
)
last_accessed_at: Optional[datetime] = Field(
default_factory=datetime.utcnow,
description="UTC timestamp of the agents most recent access to this file.",
description="UTC timestamp of the agent's most recent access to this file.",
)
start_line: Optional[int] = Field(None, description="Starting line number (1-indexed) when file was opened with line range.")
end_line: Optional[int] = Field(None, description="Ending line number (exclusive) when file was opened with line range.")
class FileAgent(FileAgentBase):
@@ -107,4 +108,3 @@ class FileAgent(FileAgentBase):
default_factory=datetime.utcnow,
description="Row last-update timestamp (UTC).",
)
is_deleted: bool = Field(False, description="Soft-delete flag.")

View File

@@ -41,7 +41,7 @@ from letta.server.server import SyncServer
from letta.services.summarizer.enums import SummarizationMode
from letta.services.telemetry_manager import NoopTelemetryManager
from letta.settings import settings
from letta.utils import safe_create_task
from letta.utils import safe_create_task, truncate_file_visible_content
# These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
@@ -478,14 +478,23 @@ async def open_file(
if not file_metadata:
raise HTTPException(status_code=404, detail=f"File with id={file_id} not found")
# Process file content with line numbers using LineChunker
from letta.services.file_processor.chunker.line_chunker import LineChunker
content_lines = LineChunker().chunk_text(file_metadata=file_metadata, validate_range=False)
visible_content = "\n".join(content_lines)
# Truncate if needed
visible_content = truncate_file_visible_content(visible_content, True, per_file_view_window_char_limit)
# Use enforce_max_open_files_and_open for efficient LRU handling
closed_files, was_already_open = await server.file_agent_manager.enforce_max_open_files_and_open(
closed_files, was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open(
agent_id=agent_id,
file_id=file_id,
file_name=file_metadata.file_name,
source_id=file_metadata.source_id,
actor=actor,
visible_content=file_metadata.content[:per_file_view_window_char_limit] if file_metadata.content else "",
visible_content=visible_content,
max_files_open=max_files_open,
)

View File

@@ -130,37 +130,38 @@ class LineChunker:
# Apply the appropriate chunking strategy
if strategy == ChunkingStrategy.DOCUMENTATION:
content_lines = self._chunk_by_sentences(text)
elif strategy == ChunkingStrategy.PROSE:
content_lines = self._chunk_by_characters(text)
elif strategy == ChunkingStrategy.CODE:
content_lines = self._chunk_by_lines(text, preserve_indentation=True)
else: # STRUCTURED_DATA or LINE_BASED
content_lines = self._chunk_by_lines(text, preserve_indentation=False)
total_chunks = len(content_lines)
chunk_type = (
"sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines"
)
chunk_type = "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "lines"
# Validate range if requested
if validate_range and (start is not None or end is not None):
if start is not None and start >= total_chunks:
# Convert to 1-indexed for user-friendly error message
start_display = start + 1
raise ValueError(
f"File {file_metadata.file_name} has only {total_chunks} lines, but requested offset {start_display} is out of range"
)
if start is not None and end is not None and end > total_chunks:
# Convert to 1-indexed for user-friendly error message
start_display = start + 1
end_display = end
raise ValueError(
f"File {file_metadata.file_name} has only {total_chunks} lines, but requested range {start_display} to {end_display} extends beyond file bounds"
)
# Handle start/end slicing
# Handle range validation and clamping
if start is not None or end is not None:
# Always validate that start < end if both are specified
if start is not None and end is not None and start >= end:
if validate_range:
raise ValueError(f"Invalid range: start ({start}) must be less than end ({end})")
# If validation is off, we still need to handle this case sensibly
# but we'll allow it to proceed with an empty result
# Always check that start is within bounds - this should error regardless of validation flag
if start is not None and start >= total_chunks:
raise ValueError(
f"File {file_metadata.file_name} has only {total_chunks} {chunk_type}, but requested offset {start + 1} is out of range"
)
# Apply bounds checking
if start is not None:
start = max(0, start) # Ensure non-negative
# Only clamp end if it exceeds the file length
if end is not None:
end = min(end, total_chunks)
# Apply slicing
content_lines = content_lines[start:end]
line_offset = start if start is not None else 0
else:

View File

@@ -12,7 +12,6 @@ from letta.schemas.passage import Passage
from letta.schemas.user import User
from letta.services.agent_manager import AgentManager
from letta.services.file_manager import FileManager
from letta.services.file_processor.chunker.line_chunker import LineChunker
from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
from letta.services.file_processor.parser.base_parser import FileParser
@@ -35,7 +34,6 @@ class FileProcessor:
max_file_size: int = 50 * 1024 * 1024, # 50MB default
):
self.file_parser = file_parser
self.line_chunker = LineChunker()
self.embedder = embedder
self.max_file_size = max_file_size
self.file_manager = FileManager()

View File

@@ -17,7 +17,6 @@ class ChunkingStrategy(str, Enum):
CODE = "code" # Line-based chunking for code files
STRUCTURED_DATA = "structured_data" # Line-based chunking for JSON, XML, etc.
DOCUMENTATION = "documentation" # Paragraph-aware chunking for Markdown, HTML
PROSE = "prose" # Character-based wrapping for plain text
LINE_BASED = "line_based" # Default line-based chunking
@@ -44,7 +43,7 @@ class FileTypeRegistry:
"""Register all default supported file types."""
# Document formats
self.register(".pdf", "application/pdf", False, "PDF document", ChunkingStrategy.LINE_BASED)
self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.PROSE)
self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.LINE_BASED)
self.register(".md", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
self.register(".markdown", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
self.register(".json", "application/json", True, "JSON data file", ChunkingStrategy.STRUCTURED_DATA)

View File

@@ -1,5 +1,5 @@
from datetime import datetime, timezone
from typing import List, Optional, Union
from typing import Dict, List, Optional, Union
from sqlalchemy import and_, delete, func, or_, select, update
@@ -34,6 +34,8 @@ class FileAgentManager:
max_files_open: int,
is_open: bool = True,
visible_content: Optional[str] = None,
start_line: Optional[int] = None,
end_line: Optional[int] = None,
) -> tuple[PydanticFileAgent, List[str]]:
"""
Idempotently attach *file_id* to *agent_id* with LRU enforcement.
@@ -48,7 +50,7 @@ class FileAgentManager:
"""
if is_open:
# Use the efficient LRU + open method
closed_files, was_already_open = await self.enforce_max_open_files_and_open(
closed_files, was_already_open, _ = await self.enforce_max_open_files_and_open(
agent_id=agent_id,
file_id=file_id,
file_name=file_name,
@@ -56,6 +58,8 @@ class FileAgentManager:
actor=actor,
visible_content=visible_content or "",
max_files_open=max_files_open,
start_line=start_line,
end_line=end_line,
)
# Get the updated file agent to return
@@ -85,6 +89,8 @@ class FileAgentManager:
existing.visible_content = visible_content
existing.last_accessed_at = now_ts
existing.start_line = start_line
existing.end_line = end_line
await existing.update_async(session, actor=actor)
return existing.to_pydantic(), []
@@ -98,6 +104,8 @@ class FileAgentManager:
is_open=is_open,
visible_content=visible_content,
last_accessed_at=now_ts,
start_line=start_line,
end_line=end_line,
)
await assoc.create_async(session, actor=actor)
return assoc.to_pydantic(), []
@@ -112,6 +120,8 @@ class FileAgentManager:
actor: PydanticUser,
is_open: Optional[bool] = None,
visible_content: Optional[str] = None,
start_line: Optional[int] = None,
end_line: Optional[int] = None,
) -> PydanticFileAgent:
"""Patch an existing association row."""
async with db_registry.async_session() as session:
@@ -121,6 +131,10 @@ class FileAgentManager:
assoc.is_open = is_open
if visible_content is not None:
assoc.visible_content = visible_content
if start_line is not None:
assoc.start_line = start_line
if end_line is not None:
assoc.end_line = end_line
# touch timestamp
assoc.last_accessed_at = datetime.now(timezone.utc)
@@ -373,8 +387,18 @@ class FileAgentManager:
@enforce_types
@trace_method
async def enforce_max_open_files_and_open(
self, *, agent_id: str, file_id: str, file_name: str, source_id: str, actor: PydanticUser, visible_content: str, max_files_open: int
) -> tuple[List[str], bool]:
self,
*,
agent_id: str,
file_id: str,
file_name: str,
source_id: str,
actor: PydanticUser,
visible_content: str,
max_files_open: int,
start_line: Optional[int] = None,
end_line: Optional[int] = None,
) -> tuple[List[str], bool, Dict[str, tuple[Optional[int], Optional[int]]]]:
"""
Efficiently handle LRU eviction and file opening in a single transaction.
@@ -387,7 +411,8 @@ class FileAgentManager:
visible_content: Content to set for the opened file
Returns:
Tuple of (closed_file_names, file_was_already_open)
Tuple of (closed_file_names, file_was_already_open, previous_ranges)
where previous_ranges maps file names to their old (start_line, end_line) ranges
"""
async with db_registry.async_session() as session:
# Single query to get ALL open files for this agent, ordered by last_accessed_at (oldest first)
@@ -423,6 +448,17 @@ class FileAgentManager:
file_was_already_open = file_to_open is not None and file_to_open.is_open
# Capture previous line range if file was already open and we're changing the range
previous_ranges = {}
if file_was_already_open and file_to_open:
old_start = file_to_open.start_line
old_end = file_to_open.end_line
# Only record if there was a previous range or if we're setting a new range
if old_start is not None or old_end is not None or start_line is not None or end_line is not None:
# Only record if the range is actually changing
if old_start != start_line or old_end != end_line:
previous_ranges[file_name] = (old_start, old_end)
# Calculate how many files need to be closed
current_other_count = len(other_open_files)
target_other_count = max_files_open - 1 # Reserve 1 slot for file we're opening
@@ -458,6 +494,8 @@ class FileAgentManager:
file_to_open.is_open = True
file_to_open.visible_content = visible_content
file_to_open.last_accessed_at = now_ts
file_to_open.start_line = start_line
file_to_open.end_line = end_line
await file_to_open.update_async(session, actor=actor)
else:
# Create new file association
@@ -470,10 +508,12 @@ class FileAgentManager:
is_open=True,
visible_content=visible_content,
last_accessed_at=now_ts,
start_line=start_line,
end_line=end_line,
)
await new_file_agent.create_async(session, actor=actor)
return closed_file_names, file_was_already_open
return closed_file_names, file_was_already_open, previous_ranges
@enforce_types
@trace_method

View File

@@ -142,6 +142,7 @@ class LettaFileToolExecutor(ToolExecutor):
# Process each file
opened_files = []
all_closed_files = []
all_previous_ranges = {} # Collect all previous ranges from all files
for file_request in file_requests:
file_name = file_request.file_name
@@ -181,7 +182,7 @@ class LettaFileToolExecutor(ToolExecutor):
visible_content = "\n".join(content_lines)
# Handle LRU eviction and file opening
closed_files, was_already_open = await self.files_agents_manager.enforce_max_open_files_and_open(
closed_files, was_already_open, previous_ranges = await self.files_agents_manager.enforce_max_open_files_and_open(
agent_id=agent_state.id,
file_id=file_id,
file_name=file_name,
@@ -189,42 +190,45 @@ class LettaFileToolExecutor(ToolExecutor):
actor=self.actor,
visible_content=visible_content,
max_files_open=agent_state.max_files_open,
start_line=start + 1 if start is not None else None, # convert to 1-indexed for user display
end_line=end if end is not None else None, # end is already exclusive in slicing, so this is correct
)
opened_files.append(file_name)
all_closed_files.extend(closed_files)
all_previous_ranges.update(previous_ranges) # Merge previous ranges from this file
# Update access timestamps for all opened files efficiently
await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=file_names, actor=self.actor)
# Build success message
if len(file_requests) == 1:
# Single file - maintain existing format
file_request = file_requests[0]
file_name = file_request.file_name
offset = file_request.offset
length = file_request.length
if offset is not None and length is not None:
end_line = offset + length - 1
success_msg = (
f"Successfully opened file {file_name}, lines {offset} to {end_line} are now visible in memory block <{file_name}>"
)
elif offset is not None:
success_msg = f"Successfully opened file {file_name}, lines {offset} to end are now visible in memory block <{file_name}>"
else:
success_msg = f"Successfully opened file {file_name}, entire file is now visible in memory block <{file_name}>"
else:
# Multiple files - show individual ranges if specified
file_summaries = []
for req in file_requests:
if req.offset is not None and req.length is not None:
end_line = req.offset + req.length - 1
file_summaries.append(f"{req.file_name} (lines {req.offset}-{end_line})")
elif req.offset is not None:
file_summaries.append(f"{req.file_name} (lines {req.offset}-end)")
# Helper function to format previous range info
def format_previous_range(file_name: str) -> str:
if file_name in all_previous_ranges:
old_start, old_end = all_previous_ranges[file_name]
if old_start is not None and old_end is not None:
return f" (previously lines {old_start}-{old_end})"
elif old_start is not None:
return f" (previously lines {old_start}-end)"
else:
file_summaries.append(req.file_name)
success_msg = f"Successfully opened {len(file_requests)} files: {', '.join(file_summaries)}"
return " (previously full file)"
return ""
# Build unified success message - treat single and multiple files consistently
file_summaries = []
for req in file_requests:
previous_info = format_previous_range(req.file_name)
if req.offset is not None and req.length is not None:
end_line = req.offset + req.length - 1
file_summaries.append(f"{req.file_name} (lines {req.offset}-{end_line}){previous_info}")
elif req.offset is not None:
file_summaries.append(f"{req.file_name} (lines {req.offset}-end){previous_info}")
else:
file_summaries.append(f"{req.file_name}{previous_info}")
if len(file_requests) == 1:
success_msg = f"* Opened {file_summaries[0]}"
else:
success_msg = f"* Opened {len(file_requests)} files: {', '.join(file_summaries)}"
# Add information about closed files
if closed_by_close_all_others:

View File

@@ -32,6 +32,7 @@ from letta.constants import (
DEFAULT_CORE_MEMORY_SOURCE_CHAR_LIMIT,
DEFAULT_MAX_FILES_OPEN,
ERROR_MESSAGE_PREFIX,
FILE_IS_TRUNCATED_WARNING,
LETTA_DIR,
MAX_FILENAME_LENGTH,
TOOL_CALL_ID_MAX_LEN,
@@ -1223,3 +1224,15 @@ def calculate_file_defaults_based_on_context_window(context_window: Optional[int
return 10, 40_000 # ~100k tokens
else: # Extremely large models (200K+)
return 15, 40_000 # ~1505k tokens
def truncate_file_visible_content(visible_content: str, is_open: bool, per_file_view_window_char_limit: int):
visible_content = visible_content if visible_content and is_open else ""
# Truncate content and add warnings here when converting from FileAgent to Block
if len(visible_content) > per_file_view_window_char_limit:
truncated_warning = f"...[TRUNCATED]\n{FILE_IS_TRUNCATED_WARNING}"
visible_content = visible_content[: per_file_view_window_char_limit - len(truncated_warning)]
visible_content += truncated_warning
return visible_content

View File

@@ -8733,6 +8733,71 @@ async def test_update_file_agent_by_file_name(server, file_attachment, default_u
)
assert updated.is_open is False
assert updated.visible_content == "updated"
assert updated.start_line is None # start_line should default to None
assert updated.end_line is None # end_line should default to None
@pytest.mark.asyncio
async def test_file_agent_line_tracking(server, default_user, sarah_agent, default_source):
"""Test that line information is captured when opening files with line ranges"""
from letta.schemas.file import FileMetadata as PydanticFileMetadata
# Create a test file with multiple lines
test_content = "line 1\nline 2\nline 3\nline 4\nline 5"
file_metadata = PydanticFileMetadata(
file_name="test_lines.txt",
organization_id=default_user.organization_id,
source_id=default_source.id,
)
file = await server.file_manager.create_file(file_metadata=file_metadata, actor=default_user, text=test_content)
# Test opening with line range using enforce_max_open_files_and_open
closed_files, was_already_open, previous_ranges = await server.file_agent_manager.enforce_max_open_files_and_open(
agent_id=sarah_agent.id,
file_id=file.id,
file_name=file.file_name,
source_id=file.source_id,
actor=default_user,
visible_content="2: line 2\n3: line 3",
max_files_open=sarah_agent.max_files_open,
start_line=2, # 1-indexed
end_line=4, # exclusive
)
# Retrieve and verify line tracking
retrieved = await server.file_agent_manager.get_file_agent_by_id(
agent_id=sarah_agent.id,
file_id=file.id,
actor=default_user,
)
assert retrieved.start_line == 2
assert retrieved.end_line == 4
assert previous_ranges == {} # No previous range since it wasn't open before
# Test opening without line range - should clear line info and capture previous range
closed_files, was_already_open, previous_ranges = await server.file_agent_manager.enforce_max_open_files_and_open(
agent_id=sarah_agent.id,
file_id=file.id,
file_name=file.file_name,
source_id=file.source_id,
actor=default_user,
visible_content="full file content",
max_files_open=sarah_agent.max_files_open,
start_line=None,
end_line=None,
)
# Retrieve and verify line info is cleared
retrieved = await server.file_agent_manager.get_file_agent_by_id(
agent_id=sarah_agent.id,
file_id=file.id,
actor=default_user,
)
assert retrieved.start_line is None
assert retrieved.end_line is None
assert previous_ranges == {file.file_name: (2, 4)} # Should capture the previous range
@pytest.mark.asyncio
@@ -9131,7 +9196,7 @@ async def test_lru_eviction_on_open_file(server, default_user, sarah_agent, defa
time.sleep(0.1)
# Now "open" the last file using the efficient method
closed_files, was_already_open = await server.file_agent_manager.enforce_max_open_files_and_open(
closed_files, was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open(
agent_id=sarah_agent.id,
file_id=files[-1].id,
file_name=files[-1].file_name,
@@ -9205,7 +9270,7 @@ async def test_lru_no_eviction_when_reopening_same_file(server, default_user, sa
time.sleep(0.1)
# "Reopen" the last file (which is already open)
closed_files, was_already_open = await server.file_agent_manager.enforce_max_open_files_and_open(
closed_files, was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open(
agent_id=sarah_agent.id,
file_id=files[-1].id,
file_name=files[-1].file_name,

View File

@@ -287,11 +287,11 @@ def test_attach_existing_files_creates_source_blocks_correctly(disable_pinecone,
<file status="open" name="test_source/test.txt">
<metadata>
- read_only=true
- chars_current=46
- chars_current=45
- chars_limit=15000
</metadata>
<value>
[Viewing file start (out of 1 chunks)]
[Viewing file start (out of 1 lines)]
1: test
</value>
</file>
@@ -348,11 +348,11 @@ def test_delete_source_removes_source_blocks_correctly(disable_pinecone, client:
<file status="open" name="test_source/test.txt">
<metadata>
- read_only=true
- chars_current=46
- chars_current=45
- chars_limit=15000
</metadata>
<value>
[Viewing file start (out of 1 chunks)]
[Viewing file start (out of 1 lines)]
1: test
</value>
</file>
@@ -437,9 +437,9 @@ def test_agent_uses_open_close_file_correctly(disable_pinecone, client: LettaSDK
assert old_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {old_content_length}"
# Assert specific content expectations for first range (lines 1-5)
assert "[Viewing chunks 1 to 5 (out of 554 chunks)]" in old_value, f"Expected viewing header for lines 1-5, got: {old_value[:100]}..."
assert "[Viewing lines 1 to 5 (out of " in old_value, f"Expected viewing header for lines 1-5, got: {old_value[:100]}..."
assert "1: Enrico Letta" in old_value, f"Expected line 1 to start with '1: Enrico Letta', got: {old_value[:200]}..."
assert "5: appointed to the Cabinet" in old_value, f"Expected line 5 to contain '5: appointed to the Cabinet', got: {old_value}"
assert "5: " in old_value, f"Expected line 5 to be present, got: {old_value}"
# Ask agent to open the file for a different range
offset, length = 6, 5 # Different offset, same length
@@ -466,13 +466,9 @@ def test_agent_uses_open_close_file_correctly(disable_pinecone, client: LettaSDK
assert new_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {new_content_length}"
# Assert specific content expectations for second range (lines 6-10)
assert "[Viewing chunks 6 to 10 (out of 554 chunks)]" in new_value, f"Expected viewing header for lines 6-10, got: {new_value[:100]}..."
assert (
"6: was promoted to become Minister" in new_value
), f"Expected line 6 to start with '6: was promoted to become Minister', got: {new_value[:200]}..."
assert (
"10: produced an inconclusive result" in new_value
), f"Expected line 10 to contain '10: produced an inconclusive result', got: {new_value}"
assert "[Viewing lines 6 to 10 (out of " in new_value, f"Expected viewing header for lines 6-10, got: {new_value[:100]}..."
assert "6: " in new_value, f"Expected line 6 to be present, got: {new_value[:200]}..."
assert "10: " in new_value, f"Expected line 10 to be present, got: {new_value}"
print(f"Comparing content ranges:")
print(f" First range (offset=1, length=5): '{old_value}'")
@@ -663,7 +659,7 @@ def test_create_agent_with_source_ids_creates_source_blocks_correctly(disable_pi
# Check that source blocks were created correctly
blocks = temp_agent_state.memory.file_blocks
assert len(blocks) == 1
assert any(b.value.startswith("[Viewing file start (out of 554 chunks)]") for b in blocks)
assert any(b.value.startswith("[Viewing file start (out of ") for b in blocks)
# Verify file tools were automatically attached
file_tools = {tool.name for tool in temp_agent_state.tools if tool.tool_type == ToolType.LETTA_FILES_CORE}
@@ -1000,6 +996,10 @@ def test_agent_open_file(disable_pinecone, client: LettaSDKClient, agent_state:
closed_files = client.agents.files.open(agent_id=agent_state.id, file_id=file_metadata.id)
assert len(closed_files) == 0
system = get_raw_system_message(client, agent_state.id)
assert '<file status="open" name="test_source/test.txt">' in system
assert "[Viewing file start (out of 1 lines)]" in system
def test_agent_close_file(disable_pinecone, client: LettaSDKClient, agent_state: AgentState):
"""Test client.agents.close_file() function"""
@@ -1019,9 +1019,8 @@ def test_agent_close_file(disable_pinecone, client: LettaSDKClient, agent_state:
# Test close_file function
client.agents.files.close(agent_id=agent_state.id, file_id=file_metadata.id)
# Result can be None or any type based on the signature
# Just verify the function executes without error
assert True, "close_file should execute without error"
system = get_raw_system_message(client, agent_state.id)
assert '<file status="closed" name="test_source/test.txt">' in system
def test_agent_close_all_open_files(disable_pinecone, client: LettaSDKClient, agent_state: AgentState):
@@ -1041,6 +1040,9 @@ def test_agent_close_all_open_files(disable_pinecone, client: LettaSDKClient, ag
# Open each file
client.agents.files.open(agent_id=agent_state.id, file_id=file_metadata.id)
system = get_raw_system_message(client, agent_state.id)
assert '<file status="open"' in system
# Test close_all_open_files function
result = client.agents.files.close_all(agent_id=agent_state.id)
@@ -1048,6 +1050,9 @@ def test_agent_close_all_open_files(disable_pinecone, client: LettaSDKClient, ag
assert isinstance(result, list), f"Expected list, got {type(result)}"
assert all(isinstance(item, str) for item in result), "All items in result should be strings"
system = get_raw_system_message(client, agent_state.id)
assert '<file status="open"' not in system
def test_file_processing_timeout(disable_pinecone, client: LettaSDKClient):
"""Test that files in non-terminal states are moved to error after timeout"""

View File

@@ -445,19 +445,24 @@ def test_line_chunker_out_of_range_start():
file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3")
chunker = LineChunker()
# Test with start beyond file length (3 lines, requesting start=5 which is 0-indexed 4)
# Test with start beyond file length - should raise ValueError
with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 6 is out of range"):
chunker.chunk_text(file, start=5, end=6, validate_range=True)
def test_line_chunker_out_of_range_end():
"""Test that LineChunker throws error when end extends beyond file bounds"""
"""Test that LineChunker clamps end when it extends beyond file bounds"""
file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3")
chunker = LineChunker()
# Test with end beyond file length (3 lines, requesting 1 to 10)
with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested range 1 to 10 extends beyond file bounds"):
chunker.chunk_text(file, start=0, end=10, validate_range=True)
# Should clamp end to file length and return lines 1-3
result = chunker.chunk_text(file, start=0, end=10, validate_range=True)
assert len(result) == 4 # metadata header + 3 lines
assert "[Viewing lines 1 to 3 (out of 3 lines)]" in result[0]
assert "1: line1" in result[1]
assert "2: line2" in result[2]
assert "3: line3" in result[3]
def test_line_chunker_edge_case_empty_file():
@@ -478,21 +483,24 @@ def test_line_chunker_edge_case_single_line():
result = chunker.chunk_text(file, start=0, end=1, validate_range=True)
assert "1: only line" in result[1]
# Test out of range for single line file
# Test out of range for single line file - should raise error
with pytest.raises(ValueError, match="File single.py has only 1 lines, but requested offset 2 is out of range"):
chunker.chunk_text(file, start=1, end=2, validate_range=True)
def test_line_chunker_validation_disabled_allows_out_of_range():
"""Test that when validation is disabled, out of range silently returns partial results"""
"""Test that out-of-bounds start always raises error, but invalid ranges (start>=end) are allowed when validation is off"""
file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3")
chunker = LineChunker()
# Test with validation disabled - should not raise error
result = chunker.chunk_text(file, start=5, end=10, validate_range=False)
# Should return empty content (except metadata header) since slice is out of bounds
# Test 1: Out of bounds start should always raise error, even with validation disabled
with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 6 is out of range"):
chunker.chunk_text(file, start=5, end=10, validate_range=False)
# Test 2: With validation disabled, start >= end should be allowed (but gives empty result)
result = chunker.chunk_text(file, start=2, end=2, validate_range=False)
assert len(result) == 1 # Only metadata header
assert "[Viewing lines 6 to 10 (out of 3 lines)]" in result[0]
assert "[Viewing lines 3 to 2 (out of 3 lines)]" in result[0]
def test_line_chunker_only_start_parameter():
@@ -506,7 +514,7 @@ def test_line_chunker_only_start_parameter():
assert "2: line2" in result[1]
assert "3: line3" in result[2]
# Test invalid start only
# Test start at end of file - should raise error
with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 4 is out of range"):
chunker.chunk_text(file, start=3, validate_range=True)