feat: Polishing open files tools (#3575)
Co-authored-by: Charles Packer <packercharles@gmail.com> Co-authored-by: Shubham Naik <shub@letta.com> Co-authored-by: Shubham Naik <shub@memgpt.ai> Co-authored-by: cthomas <caren@letta.com> Co-authored-by: jnjpng <jin@letta.com> Co-authored-by: Jin Peng <jinjpeng@Jins-MacBook-Pro.local> Co-authored-by: Cameron Pfiffer <cameron@pfiffer.org> Co-authored-by: Kian Jones <11655409+kianjones9@users.noreply.github.com> Co-authored-by: Kian Jones <kian@Kians-MacBook-Pro.local>
This commit is contained in:
@@ -0,0 +1,33 @@
|
||||
"""Add start end for agent file
|
||||
|
||||
Revision ID: 4537f0996495
|
||||
Revises: 06fbbf65d4f1
|
||||
Create Date: 2025-07-25 17:44:26.748765
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "4537f0996495"
|
||||
down_revision: Union[str, None] = "06fbbf65d4f1"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column("files_agents", sa.Column("start_line", sa.Integer(), nullable=True))
|
||||
op.add_column("files_agents", sa.Column("end_line", sa.Integer(), nullable=True))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column("files_agents", "end_line")
|
||||
op.drop_column("files_agents", "start_line")
|
||||
# ### end Alembic commands ###
|
||||
@@ -103,6 +103,5 @@ class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin, AsyncAttrs):
|
||||
chunks_embedded=self.chunks_embedded,
|
||||
created_at=self.created_at,
|
||||
updated_at=self.updated_at,
|
||||
is_deleted=self.is_deleted,
|
||||
content=content_text,
|
||||
)
|
||||
|
||||
@@ -2,14 +2,14 @@ import uuid
|
||||
from datetime import datetime
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, ForeignKey, Index, String, Text, UniqueConstraint, func
|
||||
from sqlalchemy import Boolean, DateTime, ForeignKey, Index, Integer, String, Text, UniqueConstraint, func
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from letta.constants import FILE_IS_TRUNCATED_WARNING
|
||||
from letta.orm.mixins import OrganizationMixin
|
||||
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
||||
from letta.schemas.block import FileBlock as PydanticFileBlock
|
||||
from letta.schemas.file import FileAgent as PydanticFileAgent
|
||||
from letta.utils import truncate_file_visible_content
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
@@ -77,6 +77,12 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
|
||||
nullable=False,
|
||||
doc="UTC timestamp when this agent last accessed the file.",
|
||||
)
|
||||
start_line: Mapped[Optional[int]] = mapped_column(
|
||||
Integer, nullable=True, doc="Starting line number (1-indexed) when file was opened with line range."
|
||||
)
|
||||
end_line: Mapped[Optional[int]] = mapped_column(
|
||||
Integer, nullable=True, doc="Ending line number (exclusive) when file was opened with line range."
|
||||
)
|
||||
|
||||
# relationships
|
||||
agent: Mapped["Agent"] = relationship(
|
||||
@@ -87,13 +93,7 @@ class FileAgent(SqlalchemyBase, OrganizationMixin):
|
||||
|
||||
# TODO: This is temporary as we figure out if we want FileBlock as a first class citizen
|
||||
def to_pydantic_block(self, per_file_view_window_char_limit: int) -> PydanticFileBlock:
|
||||
visible_content = self.visible_content if self.visible_content and self.is_open else ""
|
||||
|
||||
# Truncate content and add warnings here when converting from FileAgent to Block
|
||||
if len(visible_content) > per_file_view_window_char_limit:
|
||||
truncated_warning = f"...[TRUNCATED]\n{FILE_IS_TRUNCATED_WARNING}"
|
||||
visible_content = visible_content[: per_file_view_window_char_limit - len(truncated_warning)]
|
||||
visible_content += truncated_warning
|
||||
visible_content = truncate_file_visible_content(self.visible_content, self.is_open, per_file_view_window_char_limit)
|
||||
|
||||
return PydanticFileBlock(
|
||||
value=visible_content,
|
||||
|
||||
@@ -56,7 +56,6 @@ class FileMetadata(FileMetadataBase):
|
||||
# orm metadata, optional fields
|
||||
created_at: Optional[datetime] = Field(default_factory=datetime.utcnow, description="The creation date of the file.")
|
||||
updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow, description="The update date of the file.")
|
||||
is_deleted: bool = Field(False, description="Whether this file is deleted or not.")
|
||||
|
||||
|
||||
class FileAgentBase(LettaBase):
|
||||
@@ -76,8 +75,10 @@ class FileAgentBase(LettaBase):
|
||||
)
|
||||
last_accessed_at: Optional[datetime] = Field(
|
||||
default_factory=datetime.utcnow,
|
||||
description="UTC timestamp of the agent’s most recent access to this file.",
|
||||
description="UTC timestamp of the agent's most recent access to this file.",
|
||||
)
|
||||
start_line: Optional[int] = Field(None, description="Starting line number (1-indexed) when file was opened with line range.")
|
||||
end_line: Optional[int] = Field(None, description="Ending line number (exclusive) when file was opened with line range.")
|
||||
|
||||
|
||||
class FileAgent(FileAgentBase):
|
||||
@@ -107,4 +108,3 @@ class FileAgent(FileAgentBase):
|
||||
default_factory=datetime.utcnow,
|
||||
description="Row last-update timestamp (UTC).",
|
||||
)
|
||||
is_deleted: bool = Field(False, description="Soft-delete flag.")
|
||||
|
||||
@@ -41,7 +41,7 @@ from letta.server.server import SyncServer
|
||||
from letta.services.summarizer.enums import SummarizationMode
|
||||
from letta.services.telemetry_manager import NoopTelemetryManager
|
||||
from letta.settings import settings
|
||||
from letta.utils import safe_create_task
|
||||
from letta.utils import safe_create_task, truncate_file_visible_content
|
||||
|
||||
# These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
|
||||
|
||||
@@ -478,14 +478,23 @@ async def open_file(
|
||||
if not file_metadata:
|
||||
raise HTTPException(status_code=404, detail=f"File with id={file_id} not found")
|
||||
|
||||
# Process file content with line numbers using LineChunker
|
||||
from letta.services.file_processor.chunker.line_chunker import LineChunker
|
||||
|
||||
content_lines = LineChunker().chunk_text(file_metadata=file_metadata, validate_range=False)
|
||||
visible_content = "\n".join(content_lines)
|
||||
|
||||
# Truncate if needed
|
||||
visible_content = truncate_file_visible_content(visible_content, True, per_file_view_window_char_limit)
|
||||
|
||||
# Use enforce_max_open_files_and_open for efficient LRU handling
|
||||
closed_files, was_already_open = await server.file_agent_manager.enforce_max_open_files_and_open(
|
||||
closed_files, was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open(
|
||||
agent_id=agent_id,
|
||||
file_id=file_id,
|
||||
file_name=file_metadata.file_name,
|
||||
source_id=file_metadata.source_id,
|
||||
actor=actor,
|
||||
visible_content=file_metadata.content[:per_file_view_window_char_limit] if file_metadata.content else "",
|
||||
visible_content=visible_content,
|
||||
max_files_open=max_files_open,
|
||||
)
|
||||
|
||||
|
||||
@@ -130,37 +130,38 @@ class LineChunker:
|
||||
# Apply the appropriate chunking strategy
|
||||
if strategy == ChunkingStrategy.DOCUMENTATION:
|
||||
content_lines = self._chunk_by_sentences(text)
|
||||
elif strategy == ChunkingStrategy.PROSE:
|
||||
content_lines = self._chunk_by_characters(text)
|
||||
elif strategy == ChunkingStrategy.CODE:
|
||||
content_lines = self._chunk_by_lines(text, preserve_indentation=True)
|
||||
else: # STRUCTURED_DATA or LINE_BASED
|
||||
content_lines = self._chunk_by_lines(text, preserve_indentation=False)
|
||||
|
||||
total_chunks = len(content_lines)
|
||||
chunk_type = (
|
||||
"sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines"
|
||||
)
|
||||
chunk_type = "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "lines"
|
||||
|
||||
# Validate range if requested
|
||||
if validate_range and (start is not None or end is not None):
|
||||
if start is not None and start >= total_chunks:
|
||||
# Convert to 1-indexed for user-friendly error message
|
||||
start_display = start + 1
|
||||
raise ValueError(
|
||||
f"File {file_metadata.file_name} has only {total_chunks} lines, but requested offset {start_display} is out of range"
|
||||
)
|
||||
|
||||
if start is not None and end is not None and end > total_chunks:
|
||||
# Convert to 1-indexed for user-friendly error message
|
||||
start_display = start + 1
|
||||
end_display = end
|
||||
raise ValueError(
|
||||
f"File {file_metadata.file_name} has only {total_chunks} lines, but requested range {start_display} to {end_display} extends beyond file bounds"
|
||||
)
|
||||
|
||||
# Handle start/end slicing
|
||||
# Handle range validation and clamping
|
||||
if start is not None or end is not None:
|
||||
# Always validate that start < end if both are specified
|
||||
if start is not None and end is not None and start >= end:
|
||||
if validate_range:
|
||||
raise ValueError(f"Invalid range: start ({start}) must be less than end ({end})")
|
||||
# If validation is off, we still need to handle this case sensibly
|
||||
# but we'll allow it to proceed with an empty result
|
||||
|
||||
# Always check that start is within bounds - this should error regardless of validation flag
|
||||
if start is not None and start >= total_chunks:
|
||||
raise ValueError(
|
||||
f"File {file_metadata.file_name} has only {total_chunks} {chunk_type}, but requested offset {start + 1} is out of range"
|
||||
)
|
||||
|
||||
# Apply bounds checking
|
||||
if start is not None:
|
||||
start = max(0, start) # Ensure non-negative
|
||||
|
||||
# Only clamp end if it exceeds the file length
|
||||
if end is not None:
|
||||
end = min(end, total_chunks)
|
||||
|
||||
# Apply slicing
|
||||
content_lines = content_lines[start:end]
|
||||
line_offset = start if start is not None else 0
|
||||
else:
|
||||
|
||||
@@ -12,7 +12,6 @@ from letta.schemas.passage import Passage
|
||||
from letta.schemas.user import User
|
||||
from letta.services.agent_manager import AgentManager
|
||||
from letta.services.file_manager import FileManager
|
||||
from letta.services.file_processor.chunker.line_chunker import LineChunker
|
||||
from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
|
||||
from letta.services.file_processor.embedder.base_embedder import BaseEmbedder
|
||||
from letta.services.file_processor.parser.base_parser import FileParser
|
||||
@@ -35,7 +34,6 @@ class FileProcessor:
|
||||
max_file_size: int = 50 * 1024 * 1024, # 50MB default
|
||||
):
|
||||
self.file_parser = file_parser
|
||||
self.line_chunker = LineChunker()
|
||||
self.embedder = embedder
|
||||
self.max_file_size = max_file_size
|
||||
self.file_manager = FileManager()
|
||||
|
||||
@@ -17,7 +17,6 @@ class ChunkingStrategy(str, Enum):
|
||||
CODE = "code" # Line-based chunking for code files
|
||||
STRUCTURED_DATA = "structured_data" # Line-based chunking for JSON, XML, etc.
|
||||
DOCUMENTATION = "documentation" # Paragraph-aware chunking for Markdown, HTML
|
||||
PROSE = "prose" # Character-based wrapping for plain text
|
||||
LINE_BASED = "line_based" # Default line-based chunking
|
||||
|
||||
|
||||
@@ -44,7 +43,7 @@ class FileTypeRegistry:
|
||||
"""Register all default supported file types."""
|
||||
# Document formats
|
||||
self.register(".pdf", "application/pdf", False, "PDF document", ChunkingStrategy.LINE_BASED)
|
||||
self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.PROSE)
|
||||
self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.LINE_BASED)
|
||||
self.register(".md", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
|
||||
self.register(".markdown", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
|
||||
self.register(".json", "application/json", True, "JSON data file", ChunkingStrategy.STRUCTURED_DATA)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Optional, Union
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from sqlalchemy import and_, delete, func, or_, select, update
|
||||
|
||||
@@ -34,6 +34,8 @@ class FileAgentManager:
|
||||
max_files_open: int,
|
||||
is_open: bool = True,
|
||||
visible_content: Optional[str] = None,
|
||||
start_line: Optional[int] = None,
|
||||
end_line: Optional[int] = None,
|
||||
) -> tuple[PydanticFileAgent, List[str]]:
|
||||
"""
|
||||
Idempotently attach *file_id* to *agent_id* with LRU enforcement.
|
||||
@@ -48,7 +50,7 @@ class FileAgentManager:
|
||||
"""
|
||||
if is_open:
|
||||
# Use the efficient LRU + open method
|
||||
closed_files, was_already_open = await self.enforce_max_open_files_and_open(
|
||||
closed_files, was_already_open, _ = await self.enforce_max_open_files_and_open(
|
||||
agent_id=agent_id,
|
||||
file_id=file_id,
|
||||
file_name=file_name,
|
||||
@@ -56,6 +58,8 @@ class FileAgentManager:
|
||||
actor=actor,
|
||||
visible_content=visible_content or "",
|
||||
max_files_open=max_files_open,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
)
|
||||
|
||||
# Get the updated file agent to return
|
||||
@@ -85,6 +89,8 @@ class FileAgentManager:
|
||||
existing.visible_content = visible_content
|
||||
|
||||
existing.last_accessed_at = now_ts
|
||||
existing.start_line = start_line
|
||||
existing.end_line = end_line
|
||||
|
||||
await existing.update_async(session, actor=actor)
|
||||
return existing.to_pydantic(), []
|
||||
@@ -98,6 +104,8 @@ class FileAgentManager:
|
||||
is_open=is_open,
|
||||
visible_content=visible_content,
|
||||
last_accessed_at=now_ts,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
)
|
||||
await assoc.create_async(session, actor=actor)
|
||||
return assoc.to_pydantic(), []
|
||||
@@ -112,6 +120,8 @@ class FileAgentManager:
|
||||
actor: PydanticUser,
|
||||
is_open: Optional[bool] = None,
|
||||
visible_content: Optional[str] = None,
|
||||
start_line: Optional[int] = None,
|
||||
end_line: Optional[int] = None,
|
||||
) -> PydanticFileAgent:
|
||||
"""Patch an existing association row."""
|
||||
async with db_registry.async_session() as session:
|
||||
@@ -121,6 +131,10 @@ class FileAgentManager:
|
||||
assoc.is_open = is_open
|
||||
if visible_content is not None:
|
||||
assoc.visible_content = visible_content
|
||||
if start_line is not None:
|
||||
assoc.start_line = start_line
|
||||
if end_line is not None:
|
||||
assoc.end_line = end_line
|
||||
|
||||
# touch timestamp
|
||||
assoc.last_accessed_at = datetime.now(timezone.utc)
|
||||
@@ -373,8 +387,18 @@ class FileAgentManager:
|
||||
@enforce_types
|
||||
@trace_method
|
||||
async def enforce_max_open_files_and_open(
|
||||
self, *, agent_id: str, file_id: str, file_name: str, source_id: str, actor: PydanticUser, visible_content: str, max_files_open: int
|
||||
) -> tuple[List[str], bool]:
|
||||
self,
|
||||
*,
|
||||
agent_id: str,
|
||||
file_id: str,
|
||||
file_name: str,
|
||||
source_id: str,
|
||||
actor: PydanticUser,
|
||||
visible_content: str,
|
||||
max_files_open: int,
|
||||
start_line: Optional[int] = None,
|
||||
end_line: Optional[int] = None,
|
||||
) -> tuple[List[str], bool, Dict[str, tuple[Optional[int], Optional[int]]]]:
|
||||
"""
|
||||
Efficiently handle LRU eviction and file opening in a single transaction.
|
||||
|
||||
@@ -387,7 +411,8 @@ class FileAgentManager:
|
||||
visible_content: Content to set for the opened file
|
||||
|
||||
Returns:
|
||||
Tuple of (closed_file_names, file_was_already_open)
|
||||
Tuple of (closed_file_names, file_was_already_open, previous_ranges)
|
||||
where previous_ranges maps file names to their old (start_line, end_line) ranges
|
||||
"""
|
||||
async with db_registry.async_session() as session:
|
||||
# Single query to get ALL open files for this agent, ordered by last_accessed_at (oldest first)
|
||||
@@ -423,6 +448,17 @@ class FileAgentManager:
|
||||
|
||||
file_was_already_open = file_to_open is not None and file_to_open.is_open
|
||||
|
||||
# Capture previous line range if file was already open and we're changing the range
|
||||
previous_ranges = {}
|
||||
if file_was_already_open and file_to_open:
|
||||
old_start = file_to_open.start_line
|
||||
old_end = file_to_open.end_line
|
||||
# Only record if there was a previous range or if we're setting a new range
|
||||
if old_start is not None or old_end is not None or start_line is not None or end_line is not None:
|
||||
# Only record if the range is actually changing
|
||||
if old_start != start_line or old_end != end_line:
|
||||
previous_ranges[file_name] = (old_start, old_end)
|
||||
|
||||
# Calculate how many files need to be closed
|
||||
current_other_count = len(other_open_files)
|
||||
target_other_count = max_files_open - 1 # Reserve 1 slot for file we're opening
|
||||
@@ -458,6 +494,8 @@ class FileAgentManager:
|
||||
file_to_open.is_open = True
|
||||
file_to_open.visible_content = visible_content
|
||||
file_to_open.last_accessed_at = now_ts
|
||||
file_to_open.start_line = start_line
|
||||
file_to_open.end_line = end_line
|
||||
await file_to_open.update_async(session, actor=actor)
|
||||
else:
|
||||
# Create new file association
|
||||
@@ -470,10 +508,12 @@ class FileAgentManager:
|
||||
is_open=True,
|
||||
visible_content=visible_content,
|
||||
last_accessed_at=now_ts,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
)
|
||||
await new_file_agent.create_async(session, actor=actor)
|
||||
|
||||
return closed_file_names, file_was_already_open
|
||||
return closed_file_names, file_was_already_open, previous_ranges
|
||||
|
||||
@enforce_types
|
||||
@trace_method
|
||||
|
||||
@@ -142,6 +142,7 @@ class LettaFileToolExecutor(ToolExecutor):
|
||||
# Process each file
|
||||
opened_files = []
|
||||
all_closed_files = []
|
||||
all_previous_ranges = {} # Collect all previous ranges from all files
|
||||
|
||||
for file_request in file_requests:
|
||||
file_name = file_request.file_name
|
||||
@@ -181,7 +182,7 @@ class LettaFileToolExecutor(ToolExecutor):
|
||||
visible_content = "\n".join(content_lines)
|
||||
|
||||
# Handle LRU eviction and file opening
|
||||
closed_files, was_already_open = await self.files_agents_manager.enforce_max_open_files_and_open(
|
||||
closed_files, was_already_open, previous_ranges = await self.files_agents_manager.enforce_max_open_files_and_open(
|
||||
agent_id=agent_state.id,
|
||||
file_id=file_id,
|
||||
file_name=file_name,
|
||||
@@ -189,42 +190,45 @@ class LettaFileToolExecutor(ToolExecutor):
|
||||
actor=self.actor,
|
||||
visible_content=visible_content,
|
||||
max_files_open=agent_state.max_files_open,
|
||||
start_line=start + 1 if start is not None else None, # convert to 1-indexed for user display
|
||||
end_line=end if end is not None else None, # end is already exclusive in slicing, so this is correct
|
||||
)
|
||||
|
||||
opened_files.append(file_name)
|
||||
all_closed_files.extend(closed_files)
|
||||
all_previous_ranges.update(previous_ranges) # Merge previous ranges from this file
|
||||
|
||||
# Update access timestamps for all opened files efficiently
|
||||
await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=file_names, actor=self.actor)
|
||||
|
||||
# Build success message
|
||||
if len(file_requests) == 1:
|
||||
# Single file - maintain existing format
|
||||
file_request = file_requests[0]
|
||||
file_name = file_request.file_name
|
||||
offset = file_request.offset
|
||||
length = file_request.length
|
||||
if offset is not None and length is not None:
|
||||
end_line = offset + length - 1
|
||||
success_msg = (
|
||||
f"Successfully opened file {file_name}, lines {offset} to {end_line} are now visible in memory block <{file_name}>"
|
||||
)
|
||||
elif offset is not None:
|
||||
success_msg = f"Successfully opened file {file_name}, lines {offset} to end are now visible in memory block <{file_name}>"
|
||||
else:
|
||||
success_msg = f"Successfully opened file {file_name}, entire file is now visible in memory block <{file_name}>"
|
||||
else:
|
||||
# Multiple files - show individual ranges if specified
|
||||
file_summaries = []
|
||||
for req in file_requests:
|
||||
if req.offset is not None and req.length is not None:
|
||||
end_line = req.offset + req.length - 1
|
||||
file_summaries.append(f"{req.file_name} (lines {req.offset}-{end_line})")
|
||||
elif req.offset is not None:
|
||||
file_summaries.append(f"{req.file_name} (lines {req.offset}-end)")
|
||||
# Helper function to format previous range info
|
||||
def format_previous_range(file_name: str) -> str:
|
||||
if file_name in all_previous_ranges:
|
||||
old_start, old_end = all_previous_ranges[file_name]
|
||||
if old_start is not None and old_end is not None:
|
||||
return f" (previously lines {old_start}-{old_end})"
|
||||
elif old_start is not None:
|
||||
return f" (previously lines {old_start}-end)"
|
||||
else:
|
||||
file_summaries.append(req.file_name)
|
||||
success_msg = f"Successfully opened {len(file_requests)} files: {', '.join(file_summaries)}"
|
||||
return " (previously full file)"
|
||||
return ""
|
||||
|
||||
# Build unified success message - treat single and multiple files consistently
|
||||
file_summaries = []
|
||||
for req in file_requests:
|
||||
previous_info = format_previous_range(req.file_name)
|
||||
if req.offset is not None and req.length is not None:
|
||||
end_line = req.offset + req.length - 1
|
||||
file_summaries.append(f"{req.file_name} (lines {req.offset}-{end_line}){previous_info}")
|
||||
elif req.offset is not None:
|
||||
file_summaries.append(f"{req.file_name} (lines {req.offset}-end){previous_info}")
|
||||
else:
|
||||
file_summaries.append(f"{req.file_name}{previous_info}")
|
||||
|
||||
if len(file_requests) == 1:
|
||||
success_msg = f"* Opened {file_summaries[0]}"
|
||||
else:
|
||||
success_msg = f"* Opened {len(file_requests)} files: {', '.join(file_summaries)}"
|
||||
|
||||
# Add information about closed files
|
||||
if closed_by_close_all_others:
|
||||
|
||||
@@ -32,6 +32,7 @@ from letta.constants import (
|
||||
DEFAULT_CORE_MEMORY_SOURCE_CHAR_LIMIT,
|
||||
DEFAULT_MAX_FILES_OPEN,
|
||||
ERROR_MESSAGE_PREFIX,
|
||||
FILE_IS_TRUNCATED_WARNING,
|
||||
LETTA_DIR,
|
||||
MAX_FILENAME_LENGTH,
|
||||
TOOL_CALL_ID_MAX_LEN,
|
||||
@@ -1223,3 +1224,15 @@ def calculate_file_defaults_based_on_context_window(context_window: Optional[int
|
||||
return 10, 40_000 # ~100k tokens
|
||||
else: # Extremely large models (200K+)
|
||||
return 15, 40_000 # ~1505k tokens
|
||||
|
||||
|
||||
def truncate_file_visible_content(visible_content: str, is_open: bool, per_file_view_window_char_limit: int):
|
||||
visible_content = visible_content if visible_content and is_open else ""
|
||||
|
||||
# Truncate content and add warnings here when converting from FileAgent to Block
|
||||
if len(visible_content) > per_file_view_window_char_limit:
|
||||
truncated_warning = f"...[TRUNCATED]\n{FILE_IS_TRUNCATED_WARNING}"
|
||||
visible_content = visible_content[: per_file_view_window_char_limit - len(truncated_warning)]
|
||||
visible_content += truncated_warning
|
||||
|
||||
return visible_content
|
||||
|
||||
@@ -8733,6 +8733,71 @@ async def test_update_file_agent_by_file_name(server, file_attachment, default_u
|
||||
)
|
||||
assert updated.is_open is False
|
||||
assert updated.visible_content == "updated"
|
||||
assert updated.start_line is None # start_line should default to None
|
||||
assert updated.end_line is None # end_line should default to None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_file_agent_line_tracking(server, default_user, sarah_agent, default_source):
|
||||
"""Test that line information is captured when opening files with line ranges"""
|
||||
from letta.schemas.file import FileMetadata as PydanticFileMetadata
|
||||
|
||||
# Create a test file with multiple lines
|
||||
test_content = "line 1\nline 2\nline 3\nline 4\nline 5"
|
||||
file_metadata = PydanticFileMetadata(
|
||||
file_name="test_lines.txt",
|
||||
organization_id=default_user.organization_id,
|
||||
source_id=default_source.id,
|
||||
)
|
||||
file = await server.file_manager.create_file(file_metadata=file_metadata, actor=default_user, text=test_content)
|
||||
|
||||
# Test opening with line range using enforce_max_open_files_and_open
|
||||
closed_files, was_already_open, previous_ranges = await server.file_agent_manager.enforce_max_open_files_and_open(
|
||||
agent_id=sarah_agent.id,
|
||||
file_id=file.id,
|
||||
file_name=file.file_name,
|
||||
source_id=file.source_id,
|
||||
actor=default_user,
|
||||
visible_content="2: line 2\n3: line 3",
|
||||
max_files_open=sarah_agent.max_files_open,
|
||||
start_line=2, # 1-indexed
|
||||
end_line=4, # exclusive
|
||||
)
|
||||
|
||||
# Retrieve and verify line tracking
|
||||
retrieved = await server.file_agent_manager.get_file_agent_by_id(
|
||||
agent_id=sarah_agent.id,
|
||||
file_id=file.id,
|
||||
actor=default_user,
|
||||
)
|
||||
|
||||
assert retrieved.start_line == 2
|
||||
assert retrieved.end_line == 4
|
||||
assert previous_ranges == {} # No previous range since it wasn't open before
|
||||
|
||||
# Test opening without line range - should clear line info and capture previous range
|
||||
closed_files, was_already_open, previous_ranges = await server.file_agent_manager.enforce_max_open_files_and_open(
|
||||
agent_id=sarah_agent.id,
|
||||
file_id=file.id,
|
||||
file_name=file.file_name,
|
||||
source_id=file.source_id,
|
||||
actor=default_user,
|
||||
visible_content="full file content",
|
||||
max_files_open=sarah_agent.max_files_open,
|
||||
start_line=None,
|
||||
end_line=None,
|
||||
)
|
||||
|
||||
# Retrieve and verify line info is cleared
|
||||
retrieved = await server.file_agent_manager.get_file_agent_by_id(
|
||||
agent_id=sarah_agent.id,
|
||||
file_id=file.id,
|
||||
actor=default_user,
|
||||
)
|
||||
|
||||
assert retrieved.start_line is None
|
||||
assert retrieved.end_line is None
|
||||
assert previous_ranges == {file.file_name: (2, 4)} # Should capture the previous range
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -9131,7 +9196,7 @@ async def test_lru_eviction_on_open_file(server, default_user, sarah_agent, defa
|
||||
time.sleep(0.1)
|
||||
|
||||
# Now "open" the last file using the efficient method
|
||||
closed_files, was_already_open = await server.file_agent_manager.enforce_max_open_files_and_open(
|
||||
closed_files, was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open(
|
||||
agent_id=sarah_agent.id,
|
||||
file_id=files[-1].id,
|
||||
file_name=files[-1].file_name,
|
||||
@@ -9205,7 +9270,7 @@ async def test_lru_no_eviction_when_reopening_same_file(server, default_user, sa
|
||||
time.sleep(0.1)
|
||||
|
||||
# "Reopen" the last file (which is already open)
|
||||
closed_files, was_already_open = await server.file_agent_manager.enforce_max_open_files_and_open(
|
||||
closed_files, was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open(
|
||||
agent_id=sarah_agent.id,
|
||||
file_id=files[-1].id,
|
||||
file_name=files[-1].file_name,
|
||||
|
||||
@@ -287,11 +287,11 @@ def test_attach_existing_files_creates_source_blocks_correctly(disable_pinecone,
|
||||
<file status="open" name="test_source/test.txt">
|
||||
<metadata>
|
||||
- read_only=true
|
||||
- chars_current=46
|
||||
- chars_current=45
|
||||
- chars_limit=15000
|
||||
</metadata>
|
||||
<value>
|
||||
[Viewing file start (out of 1 chunks)]
|
||||
[Viewing file start (out of 1 lines)]
|
||||
1: test
|
||||
</value>
|
||||
</file>
|
||||
@@ -348,11 +348,11 @@ def test_delete_source_removes_source_blocks_correctly(disable_pinecone, client:
|
||||
<file status="open" name="test_source/test.txt">
|
||||
<metadata>
|
||||
- read_only=true
|
||||
- chars_current=46
|
||||
- chars_current=45
|
||||
- chars_limit=15000
|
||||
</metadata>
|
||||
<value>
|
||||
[Viewing file start (out of 1 chunks)]
|
||||
[Viewing file start (out of 1 lines)]
|
||||
1: test
|
||||
</value>
|
||||
</file>
|
||||
@@ -437,9 +437,9 @@ def test_agent_uses_open_close_file_correctly(disable_pinecone, client: LettaSDK
|
||||
assert old_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {old_content_length}"
|
||||
|
||||
# Assert specific content expectations for first range (lines 1-5)
|
||||
assert "[Viewing chunks 1 to 5 (out of 554 chunks)]" in old_value, f"Expected viewing header for lines 1-5, got: {old_value[:100]}..."
|
||||
assert "[Viewing lines 1 to 5 (out of " in old_value, f"Expected viewing header for lines 1-5, got: {old_value[:100]}..."
|
||||
assert "1: Enrico Letta" in old_value, f"Expected line 1 to start with '1: Enrico Letta', got: {old_value[:200]}..."
|
||||
assert "5: appointed to the Cabinet" in old_value, f"Expected line 5 to contain '5: appointed to the Cabinet', got: {old_value}"
|
||||
assert "5: " in old_value, f"Expected line 5 to be present, got: {old_value}"
|
||||
|
||||
# Ask agent to open the file for a different range
|
||||
offset, length = 6, 5 # Different offset, same length
|
||||
@@ -466,13 +466,9 @@ def test_agent_uses_open_close_file_correctly(disable_pinecone, client: LettaSDK
|
||||
assert new_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {new_content_length}"
|
||||
|
||||
# Assert specific content expectations for second range (lines 6-10)
|
||||
assert "[Viewing chunks 6 to 10 (out of 554 chunks)]" in new_value, f"Expected viewing header for lines 6-10, got: {new_value[:100]}..."
|
||||
assert (
|
||||
"6: was promoted to become Minister" in new_value
|
||||
), f"Expected line 6 to start with '6: was promoted to become Minister', got: {new_value[:200]}..."
|
||||
assert (
|
||||
"10: produced an inconclusive result" in new_value
|
||||
), f"Expected line 10 to contain '10: produced an inconclusive result', got: {new_value}"
|
||||
assert "[Viewing lines 6 to 10 (out of " in new_value, f"Expected viewing header for lines 6-10, got: {new_value[:100]}..."
|
||||
assert "6: " in new_value, f"Expected line 6 to be present, got: {new_value[:200]}..."
|
||||
assert "10: " in new_value, f"Expected line 10 to be present, got: {new_value}"
|
||||
|
||||
print(f"Comparing content ranges:")
|
||||
print(f" First range (offset=1, length=5): '{old_value}'")
|
||||
@@ -663,7 +659,7 @@ def test_create_agent_with_source_ids_creates_source_blocks_correctly(disable_pi
|
||||
# Check that source blocks were created correctly
|
||||
blocks = temp_agent_state.memory.file_blocks
|
||||
assert len(blocks) == 1
|
||||
assert any(b.value.startswith("[Viewing file start (out of 554 chunks)]") for b in blocks)
|
||||
assert any(b.value.startswith("[Viewing file start (out of ") for b in blocks)
|
||||
|
||||
# Verify file tools were automatically attached
|
||||
file_tools = {tool.name for tool in temp_agent_state.tools if tool.tool_type == ToolType.LETTA_FILES_CORE}
|
||||
@@ -1000,6 +996,10 @@ def test_agent_open_file(disable_pinecone, client: LettaSDKClient, agent_state:
|
||||
closed_files = client.agents.files.open(agent_id=agent_state.id, file_id=file_metadata.id)
|
||||
assert len(closed_files) == 0
|
||||
|
||||
system = get_raw_system_message(client, agent_state.id)
|
||||
assert '<file status="open" name="test_source/test.txt">' in system
|
||||
assert "[Viewing file start (out of 1 lines)]" in system
|
||||
|
||||
|
||||
def test_agent_close_file(disable_pinecone, client: LettaSDKClient, agent_state: AgentState):
|
||||
"""Test client.agents.close_file() function"""
|
||||
@@ -1019,9 +1019,8 @@ def test_agent_close_file(disable_pinecone, client: LettaSDKClient, agent_state:
|
||||
# Test close_file function
|
||||
client.agents.files.close(agent_id=agent_state.id, file_id=file_metadata.id)
|
||||
|
||||
# Result can be None or any type based on the signature
|
||||
# Just verify the function executes without error
|
||||
assert True, "close_file should execute without error"
|
||||
system = get_raw_system_message(client, agent_state.id)
|
||||
assert '<file status="closed" name="test_source/test.txt">' in system
|
||||
|
||||
|
||||
def test_agent_close_all_open_files(disable_pinecone, client: LettaSDKClient, agent_state: AgentState):
|
||||
@@ -1041,6 +1040,9 @@ def test_agent_close_all_open_files(disable_pinecone, client: LettaSDKClient, ag
|
||||
# Open each file
|
||||
client.agents.files.open(agent_id=agent_state.id, file_id=file_metadata.id)
|
||||
|
||||
system = get_raw_system_message(client, agent_state.id)
|
||||
assert '<file status="open"' in system
|
||||
|
||||
# Test close_all_open_files function
|
||||
result = client.agents.files.close_all(agent_id=agent_state.id)
|
||||
|
||||
@@ -1048,6 +1050,9 @@ def test_agent_close_all_open_files(disable_pinecone, client: LettaSDKClient, ag
|
||||
assert isinstance(result, list), f"Expected list, got {type(result)}"
|
||||
assert all(isinstance(item, str) for item in result), "All items in result should be strings"
|
||||
|
||||
system = get_raw_system_message(client, agent_state.id)
|
||||
assert '<file status="open"' not in system
|
||||
|
||||
|
||||
def test_file_processing_timeout(disable_pinecone, client: LettaSDKClient):
|
||||
"""Test that files in non-terminal states are moved to error after timeout"""
|
||||
|
||||
@@ -445,19 +445,24 @@ def test_line_chunker_out_of_range_start():
|
||||
file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3")
|
||||
chunker = LineChunker()
|
||||
|
||||
# Test with start beyond file length (3 lines, requesting start=5 which is 0-indexed 4)
|
||||
# Test with start beyond file length - should raise ValueError
|
||||
with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 6 is out of range"):
|
||||
chunker.chunk_text(file, start=5, end=6, validate_range=True)
|
||||
|
||||
|
||||
def test_line_chunker_out_of_range_end():
|
||||
"""Test that LineChunker throws error when end extends beyond file bounds"""
|
||||
"""Test that LineChunker clamps end when it extends beyond file bounds"""
|
||||
file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3")
|
||||
chunker = LineChunker()
|
||||
|
||||
# Test with end beyond file length (3 lines, requesting 1 to 10)
|
||||
with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested range 1 to 10 extends beyond file bounds"):
|
||||
chunker.chunk_text(file, start=0, end=10, validate_range=True)
|
||||
# Should clamp end to file length and return lines 1-3
|
||||
result = chunker.chunk_text(file, start=0, end=10, validate_range=True)
|
||||
assert len(result) == 4 # metadata header + 3 lines
|
||||
assert "[Viewing lines 1 to 3 (out of 3 lines)]" in result[0]
|
||||
assert "1: line1" in result[1]
|
||||
assert "2: line2" in result[2]
|
||||
assert "3: line3" in result[3]
|
||||
|
||||
|
||||
def test_line_chunker_edge_case_empty_file():
|
||||
@@ -478,21 +483,24 @@ def test_line_chunker_edge_case_single_line():
|
||||
result = chunker.chunk_text(file, start=0, end=1, validate_range=True)
|
||||
assert "1: only line" in result[1]
|
||||
|
||||
# Test out of range for single line file
|
||||
# Test out of range for single line file - should raise error
|
||||
with pytest.raises(ValueError, match="File single.py has only 1 lines, but requested offset 2 is out of range"):
|
||||
chunker.chunk_text(file, start=1, end=2, validate_range=True)
|
||||
|
||||
|
||||
def test_line_chunker_validation_disabled_allows_out_of_range():
|
||||
"""Test that when validation is disabled, out of range silently returns partial results"""
|
||||
"""Test that out-of-bounds start always raises error, but invalid ranges (start>=end) are allowed when validation is off"""
|
||||
file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3")
|
||||
chunker = LineChunker()
|
||||
|
||||
# Test with validation disabled - should not raise error
|
||||
result = chunker.chunk_text(file, start=5, end=10, validate_range=False)
|
||||
# Should return empty content (except metadata header) since slice is out of bounds
|
||||
# Test 1: Out of bounds start should always raise error, even with validation disabled
|
||||
with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 6 is out of range"):
|
||||
chunker.chunk_text(file, start=5, end=10, validate_range=False)
|
||||
|
||||
# Test 2: With validation disabled, start >= end should be allowed (but gives empty result)
|
||||
result = chunker.chunk_text(file, start=2, end=2, validate_range=False)
|
||||
assert len(result) == 1 # Only metadata header
|
||||
assert "[Viewing lines 6 to 10 (out of 3 lines)]" in result[0]
|
||||
assert "[Viewing lines 3 to 2 (out of 3 lines)]" in result[0]
|
||||
|
||||
|
||||
def test_line_chunker_only_start_parameter():
|
||||
@@ -506,7 +514,7 @@ def test_line_chunker_only_start_parameter():
|
||||
assert "2: line2" in result[1]
|
||||
assert "3: line3" in result[2]
|
||||
|
||||
# Test invalid start only
|
||||
# Test start at end of file - should raise error
|
||||
with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 4 is out of range"):
|
||||
chunker.chunk_text(file, start=3, validate_range=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user