diff --git a/alembic/versions/4537f0996495_add_start_end_for_agent_file.py b/alembic/versions/4537f0996495_add_start_end_for_agent_file.py new file mode 100644 index 00000000..488bb0dd --- /dev/null +++ b/alembic/versions/4537f0996495_add_start_end_for_agent_file.py @@ -0,0 +1,33 @@ +"""Add start end for agent file + +Revision ID: 4537f0996495 +Revises: 06fbbf65d4f1 +Create Date: 2025-07-25 17:44:26.748765 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "4537f0996495" +down_revision: Union[str, None] = "06fbbf65d4f1" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("files_agents", sa.Column("start_line", sa.Integer(), nullable=True)) + op.add_column("files_agents", sa.Column("end_line", sa.Integer(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("files_agents", "end_line") + op.drop_column("files_agents", "start_line") + # ### end Alembic commands ### diff --git a/letta/orm/file.py b/letta/orm/file.py index 8cae2448..3229675f 100644 --- a/letta/orm/file.py +++ b/letta/orm/file.py @@ -103,6 +103,5 @@ class FileMetadata(SqlalchemyBase, OrganizationMixin, SourceMixin, AsyncAttrs): chunks_embedded=self.chunks_embedded, created_at=self.created_at, updated_at=self.updated_at, - is_deleted=self.is_deleted, content=content_text, ) diff --git a/letta/orm/files_agents.py b/letta/orm/files_agents.py index 2a4cfde2..1c768711 100644 --- a/letta/orm/files_agents.py +++ b/letta/orm/files_agents.py @@ -2,14 +2,14 @@ import uuid from datetime import datetime from typing import TYPE_CHECKING, Optional -from sqlalchemy import Boolean, DateTime, ForeignKey, Index, String, Text, UniqueConstraint, func +from sqlalchemy import Boolean, DateTime, ForeignKey, Index, Integer, String, Text, UniqueConstraint, func from sqlalchemy.orm import Mapped, mapped_column, relationship -from letta.constants import FILE_IS_TRUNCATED_WARNING from letta.orm.mixins import OrganizationMixin from letta.orm.sqlalchemy_base import SqlalchemyBase from letta.schemas.block import FileBlock as PydanticFileBlock from letta.schemas.file import FileAgent as PydanticFileAgent +from letta.utils import truncate_file_visible_content if TYPE_CHECKING: pass @@ -77,6 +77,12 @@ class FileAgent(SqlalchemyBase, OrganizationMixin): nullable=False, doc="UTC timestamp when this agent last accessed the file.", ) + start_line: Mapped[Optional[int]] = mapped_column( + Integer, nullable=True, doc="Starting line number (1-indexed) when file was opened with line range." + ) + end_line: Mapped[Optional[int]] = mapped_column( + Integer, nullable=True, doc="Ending line number (exclusive) when file was opened with line range." + ) # relationships agent: Mapped["Agent"] = relationship( @@ -87,13 +93,7 @@ class FileAgent(SqlalchemyBase, OrganizationMixin): # TODO: This is temporary as we figure out if we want FileBlock as a first class citizen def to_pydantic_block(self, per_file_view_window_char_limit: int) -> PydanticFileBlock: - visible_content = self.visible_content if self.visible_content and self.is_open else "" - - # Truncate content and add warnings here when converting from FileAgent to Block - if len(visible_content) > per_file_view_window_char_limit: - truncated_warning = f"...[TRUNCATED]\n{FILE_IS_TRUNCATED_WARNING}" - visible_content = visible_content[: per_file_view_window_char_limit - len(truncated_warning)] - visible_content += truncated_warning + visible_content = truncate_file_visible_content(self.visible_content, self.is_open, per_file_view_window_char_limit) return PydanticFileBlock( value=visible_content, diff --git a/letta/schemas/file.py b/letta/schemas/file.py index 11a3356d..cfd67f87 100644 --- a/letta/schemas/file.py +++ b/letta/schemas/file.py @@ -56,7 +56,6 @@ class FileMetadata(FileMetadataBase): # orm metadata, optional fields created_at: Optional[datetime] = Field(default_factory=datetime.utcnow, description="The creation date of the file.") updated_at: Optional[datetime] = Field(default_factory=datetime.utcnow, description="The update date of the file.") - is_deleted: bool = Field(False, description="Whether this file is deleted or not.") class FileAgentBase(LettaBase): @@ -76,8 +75,10 @@ class FileAgentBase(LettaBase): ) last_accessed_at: Optional[datetime] = Field( default_factory=datetime.utcnow, - description="UTC timestamp of the agent’s most recent access to this file.", + description="UTC timestamp of the agent's most recent access to this file.", ) + start_line: Optional[int] = Field(None, description="Starting line number (1-indexed) when file was opened with line range.") + end_line: Optional[int] = Field(None, description="Ending line number (exclusive) when file was opened with line range.") class FileAgent(FileAgentBase): @@ -107,4 +108,3 @@ class FileAgent(FileAgentBase): default_factory=datetime.utcnow, description="Row last-update timestamp (UTC).", ) - is_deleted: bool = Field(False, description="Soft-delete flag.") diff --git a/letta/server/rest_api/routers/v1/agents.py b/letta/server/rest_api/routers/v1/agents.py index d41d9cf1..2811dc95 100644 --- a/letta/server/rest_api/routers/v1/agents.py +++ b/letta/server/rest_api/routers/v1/agents.py @@ -41,7 +41,7 @@ from letta.server.server import SyncServer from letta.services.summarizer.enums import SummarizationMode from letta.services.telemetry_manager import NoopTelemetryManager from letta.settings import settings -from letta.utils import safe_create_task +from letta.utils import safe_create_task, truncate_file_visible_content # These can be forward refs, but because Fastapi needs them at runtime the must be imported normally @@ -478,14 +478,23 @@ async def open_file( if not file_metadata: raise HTTPException(status_code=404, detail=f"File with id={file_id} not found") + # Process file content with line numbers using LineChunker + from letta.services.file_processor.chunker.line_chunker import LineChunker + + content_lines = LineChunker().chunk_text(file_metadata=file_metadata, validate_range=False) + visible_content = "\n".join(content_lines) + + # Truncate if needed + visible_content = truncate_file_visible_content(visible_content, True, per_file_view_window_char_limit) + # Use enforce_max_open_files_and_open for efficient LRU handling - closed_files, was_already_open = await server.file_agent_manager.enforce_max_open_files_and_open( + closed_files, was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open( agent_id=agent_id, file_id=file_id, file_name=file_metadata.file_name, source_id=file_metadata.source_id, actor=actor, - visible_content=file_metadata.content[:per_file_view_window_char_limit] if file_metadata.content else "", + visible_content=visible_content, max_files_open=max_files_open, ) diff --git a/letta/services/file_processor/chunker/line_chunker.py b/letta/services/file_processor/chunker/line_chunker.py index fe5ed031..c78399f1 100644 --- a/letta/services/file_processor/chunker/line_chunker.py +++ b/letta/services/file_processor/chunker/line_chunker.py @@ -130,37 +130,38 @@ class LineChunker: # Apply the appropriate chunking strategy if strategy == ChunkingStrategy.DOCUMENTATION: content_lines = self._chunk_by_sentences(text) - elif strategy == ChunkingStrategy.PROSE: - content_lines = self._chunk_by_characters(text) elif strategy == ChunkingStrategy.CODE: content_lines = self._chunk_by_lines(text, preserve_indentation=True) else: # STRUCTURED_DATA or LINE_BASED content_lines = self._chunk_by_lines(text, preserve_indentation=False) total_chunks = len(content_lines) - chunk_type = ( - "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines" - ) + chunk_type = "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "lines" - # Validate range if requested - if validate_range and (start is not None or end is not None): - if start is not None and start >= total_chunks: - # Convert to 1-indexed for user-friendly error message - start_display = start + 1 - raise ValueError( - f"File {file_metadata.file_name} has only {total_chunks} lines, but requested offset {start_display} is out of range" - ) - - if start is not None and end is not None and end > total_chunks: - # Convert to 1-indexed for user-friendly error message - start_display = start + 1 - end_display = end - raise ValueError( - f"File {file_metadata.file_name} has only {total_chunks} lines, but requested range {start_display} to {end_display} extends beyond file bounds" - ) - - # Handle start/end slicing + # Handle range validation and clamping if start is not None or end is not None: + # Always validate that start < end if both are specified + if start is not None and end is not None and start >= end: + if validate_range: + raise ValueError(f"Invalid range: start ({start}) must be less than end ({end})") + # If validation is off, we still need to handle this case sensibly + # but we'll allow it to proceed with an empty result + + # Always check that start is within bounds - this should error regardless of validation flag + if start is not None and start >= total_chunks: + raise ValueError( + f"File {file_metadata.file_name} has only {total_chunks} {chunk_type}, but requested offset {start + 1} is out of range" + ) + + # Apply bounds checking + if start is not None: + start = max(0, start) # Ensure non-negative + + # Only clamp end if it exceeds the file length + if end is not None: + end = min(end, total_chunks) + + # Apply slicing content_lines = content_lines[start:end] line_offset = start if start is not None else 0 else: diff --git a/letta/services/file_processor/file_processor.py b/letta/services/file_processor/file_processor.py index d27597a4..76cac3d7 100644 --- a/letta/services/file_processor/file_processor.py +++ b/letta/services/file_processor/file_processor.py @@ -12,7 +12,6 @@ from letta.schemas.passage import Passage from letta.schemas.user import User from letta.services.agent_manager import AgentManager from letta.services.file_manager import FileManager -from letta.services.file_processor.chunker.line_chunker import LineChunker from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker from letta.services.file_processor.embedder.base_embedder import BaseEmbedder from letta.services.file_processor.parser.base_parser import FileParser @@ -35,7 +34,6 @@ class FileProcessor: max_file_size: int = 50 * 1024 * 1024, # 50MB default ): self.file_parser = file_parser - self.line_chunker = LineChunker() self.embedder = embedder self.max_file_size = max_file_size self.file_manager = FileManager() diff --git a/letta/services/file_processor/file_types.py b/letta/services/file_processor/file_types.py index b311a180..2816dd08 100644 --- a/letta/services/file_processor/file_types.py +++ b/letta/services/file_processor/file_types.py @@ -17,7 +17,6 @@ class ChunkingStrategy(str, Enum): CODE = "code" # Line-based chunking for code files STRUCTURED_DATA = "structured_data" # Line-based chunking for JSON, XML, etc. DOCUMENTATION = "documentation" # Paragraph-aware chunking for Markdown, HTML - PROSE = "prose" # Character-based wrapping for plain text LINE_BASED = "line_based" # Default line-based chunking @@ -44,7 +43,7 @@ class FileTypeRegistry: """Register all default supported file types.""" # Document formats self.register(".pdf", "application/pdf", False, "PDF document", ChunkingStrategy.LINE_BASED) - self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.PROSE) + self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.LINE_BASED) self.register(".md", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION) self.register(".markdown", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION) self.register(".json", "application/json", True, "JSON data file", ChunkingStrategy.STRUCTURED_DATA) diff --git a/letta/services/files_agents_manager.py b/letta/services/files_agents_manager.py index e04ccb59..663edffe 100644 --- a/letta/services/files_agents_manager.py +++ b/letta/services/files_agents_manager.py @@ -1,5 +1,5 @@ from datetime import datetime, timezone -from typing import List, Optional, Union +from typing import Dict, List, Optional, Union from sqlalchemy import and_, delete, func, or_, select, update @@ -34,6 +34,8 @@ class FileAgentManager: max_files_open: int, is_open: bool = True, visible_content: Optional[str] = None, + start_line: Optional[int] = None, + end_line: Optional[int] = None, ) -> tuple[PydanticFileAgent, List[str]]: """ Idempotently attach *file_id* to *agent_id* with LRU enforcement. @@ -48,7 +50,7 @@ class FileAgentManager: """ if is_open: # Use the efficient LRU + open method - closed_files, was_already_open = await self.enforce_max_open_files_and_open( + closed_files, was_already_open, _ = await self.enforce_max_open_files_and_open( agent_id=agent_id, file_id=file_id, file_name=file_name, @@ -56,6 +58,8 @@ class FileAgentManager: actor=actor, visible_content=visible_content or "", max_files_open=max_files_open, + start_line=start_line, + end_line=end_line, ) # Get the updated file agent to return @@ -85,6 +89,8 @@ class FileAgentManager: existing.visible_content = visible_content existing.last_accessed_at = now_ts + existing.start_line = start_line + existing.end_line = end_line await existing.update_async(session, actor=actor) return existing.to_pydantic(), [] @@ -98,6 +104,8 @@ class FileAgentManager: is_open=is_open, visible_content=visible_content, last_accessed_at=now_ts, + start_line=start_line, + end_line=end_line, ) await assoc.create_async(session, actor=actor) return assoc.to_pydantic(), [] @@ -112,6 +120,8 @@ class FileAgentManager: actor: PydanticUser, is_open: Optional[bool] = None, visible_content: Optional[str] = None, + start_line: Optional[int] = None, + end_line: Optional[int] = None, ) -> PydanticFileAgent: """Patch an existing association row.""" async with db_registry.async_session() as session: @@ -121,6 +131,10 @@ class FileAgentManager: assoc.is_open = is_open if visible_content is not None: assoc.visible_content = visible_content + if start_line is not None: + assoc.start_line = start_line + if end_line is not None: + assoc.end_line = end_line # touch timestamp assoc.last_accessed_at = datetime.now(timezone.utc) @@ -373,8 +387,18 @@ class FileAgentManager: @enforce_types @trace_method async def enforce_max_open_files_and_open( - self, *, agent_id: str, file_id: str, file_name: str, source_id: str, actor: PydanticUser, visible_content: str, max_files_open: int - ) -> tuple[List[str], bool]: + self, + *, + agent_id: str, + file_id: str, + file_name: str, + source_id: str, + actor: PydanticUser, + visible_content: str, + max_files_open: int, + start_line: Optional[int] = None, + end_line: Optional[int] = None, + ) -> tuple[List[str], bool, Dict[str, tuple[Optional[int], Optional[int]]]]: """ Efficiently handle LRU eviction and file opening in a single transaction. @@ -387,7 +411,8 @@ class FileAgentManager: visible_content: Content to set for the opened file Returns: - Tuple of (closed_file_names, file_was_already_open) + Tuple of (closed_file_names, file_was_already_open, previous_ranges) + where previous_ranges maps file names to their old (start_line, end_line) ranges """ async with db_registry.async_session() as session: # Single query to get ALL open files for this agent, ordered by last_accessed_at (oldest first) @@ -423,6 +448,17 @@ class FileAgentManager: file_was_already_open = file_to_open is not None and file_to_open.is_open + # Capture previous line range if file was already open and we're changing the range + previous_ranges = {} + if file_was_already_open and file_to_open: + old_start = file_to_open.start_line + old_end = file_to_open.end_line + # Only record if there was a previous range or if we're setting a new range + if old_start is not None or old_end is not None or start_line is not None or end_line is not None: + # Only record if the range is actually changing + if old_start != start_line or old_end != end_line: + previous_ranges[file_name] = (old_start, old_end) + # Calculate how many files need to be closed current_other_count = len(other_open_files) target_other_count = max_files_open - 1 # Reserve 1 slot for file we're opening @@ -458,6 +494,8 @@ class FileAgentManager: file_to_open.is_open = True file_to_open.visible_content = visible_content file_to_open.last_accessed_at = now_ts + file_to_open.start_line = start_line + file_to_open.end_line = end_line await file_to_open.update_async(session, actor=actor) else: # Create new file association @@ -470,10 +508,12 @@ class FileAgentManager: is_open=True, visible_content=visible_content, last_accessed_at=now_ts, + start_line=start_line, + end_line=end_line, ) await new_file_agent.create_async(session, actor=actor) - return closed_file_names, file_was_already_open + return closed_file_names, file_was_already_open, previous_ranges @enforce_types @trace_method diff --git a/letta/services/tool_executor/files_tool_executor.py b/letta/services/tool_executor/files_tool_executor.py index 6e5a6304..8e207942 100644 --- a/letta/services/tool_executor/files_tool_executor.py +++ b/letta/services/tool_executor/files_tool_executor.py @@ -142,6 +142,7 @@ class LettaFileToolExecutor(ToolExecutor): # Process each file opened_files = [] all_closed_files = [] + all_previous_ranges = {} # Collect all previous ranges from all files for file_request in file_requests: file_name = file_request.file_name @@ -181,7 +182,7 @@ class LettaFileToolExecutor(ToolExecutor): visible_content = "\n".join(content_lines) # Handle LRU eviction and file opening - closed_files, was_already_open = await self.files_agents_manager.enforce_max_open_files_and_open( + closed_files, was_already_open, previous_ranges = await self.files_agents_manager.enforce_max_open_files_and_open( agent_id=agent_state.id, file_id=file_id, file_name=file_name, @@ -189,42 +190,45 @@ class LettaFileToolExecutor(ToolExecutor): actor=self.actor, visible_content=visible_content, max_files_open=agent_state.max_files_open, + start_line=start + 1 if start is not None else None, # convert to 1-indexed for user display + end_line=end if end is not None else None, # end is already exclusive in slicing, so this is correct ) opened_files.append(file_name) all_closed_files.extend(closed_files) + all_previous_ranges.update(previous_ranges) # Merge previous ranges from this file # Update access timestamps for all opened files efficiently await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=file_names, actor=self.actor) - # Build success message - if len(file_requests) == 1: - # Single file - maintain existing format - file_request = file_requests[0] - file_name = file_request.file_name - offset = file_request.offset - length = file_request.length - if offset is not None and length is not None: - end_line = offset + length - 1 - success_msg = ( - f"Successfully opened file {file_name}, lines {offset} to {end_line} are now visible in memory block <{file_name}>" - ) - elif offset is not None: - success_msg = f"Successfully opened file {file_name}, lines {offset} to end are now visible in memory block <{file_name}>" - else: - success_msg = f"Successfully opened file {file_name}, entire file is now visible in memory block <{file_name}>" - else: - # Multiple files - show individual ranges if specified - file_summaries = [] - for req in file_requests: - if req.offset is not None and req.length is not None: - end_line = req.offset + req.length - 1 - file_summaries.append(f"{req.file_name} (lines {req.offset}-{end_line})") - elif req.offset is not None: - file_summaries.append(f"{req.file_name} (lines {req.offset}-end)") + # Helper function to format previous range info + def format_previous_range(file_name: str) -> str: + if file_name in all_previous_ranges: + old_start, old_end = all_previous_ranges[file_name] + if old_start is not None and old_end is not None: + return f" (previously lines {old_start}-{old_end})" + elif old_start is not None: + return f" (previously lines {old_start}-end)" else: - file_summaries.append(req.file_name) - success_msg = f"Successfully opened {len(file_requests)} files: {', '.join(file_summaries)}" + return " (previously full file)" + return "" + + # Build unified success message - treat single and multiple files consistently + file_summaries = [] + for req in file_requests: + previous_info = format_previous_range(req.file_name) + if req.offset is not None and req.length is not None: + end_line = req.offset + req.length - 1 + file_summaries.append(f"{req.file_name} (lines {req.offset}-{end_line}){previous_info}") + elif req.offset is not None: + file_summaries.append(f"{req.file_name} (lines {req.offset}-end){previous_info}") + else: + file_summaries.append(f"{req.file_name}{previous_info}") + + if len(file_requests) == 1: + success_msg = f"* Opened {file_summaries[0]}" + else: + success_msg = f"* Opened {len(file_requests)} files: {', '.join(file_summaries)}" # Add information about closed files if closed_by_close_all_others: diff --git a/letta/utils.py b/letta/utils.py index 73abb1a3..2d2775ff 100644 --- a/letta/utils.py +++ b/letta/utils.py @@ -32,6 +32,7 @@ from letta.constants import ( DEFAULT_CORE_MEMORY_SOURCE_CHAR_LIMIT, DEFAULT_MAX_FILES_OPEN, ERROR_MESSAGE_PREFIX, + FILE_IS_TRUNCATED_WARNING, LETTA_DIR, MAX_FILENAME_LENGTH, TOOL_CALL_ID_MAX_LEN, @@ -1223,3 +1224,15 @@ def calculate_file_defaults_based_on_context_window(context_window: Optional[int return 10, 40_000 # ~100k tokens else: # Extremely large models (200K+) return 15, 40_000 # ~1505k tokens + + +def truncate_file_visible_content(visible_content: str, is_open: bool, per_file_view_window_char_limit: int): + visible_content = visible_content if visible_content and is_open else "" + + # Truncate content and add warnings here when converting from FileAgent to Block + if len(visible_content) > per_file_view_window_char_limit: + truncated_warning = f"...[TRUNCATED]\n{FILE_IS_TRUNCATED_WARNING}" + visible_content = visible_content[: per_file_view_window_char_limit - len(truncated_warning)] + visible_content += truncated_warning + + return visible_content diff --git a/tests/test_managers.py b/tests/test_managers.py index 5e6da65c..28b5cdcb 100644 --- a/tests/test_managers.py +++ b/tests/test_managers.py @@ -8733,6 +8733,71 @@ async def test_update_file_agent_by_file_name(server, file_attachment, default_u ) assert updated.is_open is False assert updated.visible_content == "updated" + assert updated.start_line is None # start_line should default to None + assert updated.end_line is None # end_line should default to None + + +@pytest.mark.asyncio +async def test_file_agent_line_tracking(server, default_user, sarah_agent, default_source): + """Test that line information is captured when opening files with line ranges""" + from letta.schemas.file import FileMetadata as PydanticFileMetadata + + # Create a test file with multiple lines + test_content = "line 1\nline 2\nline 3\nline 4\nline 5" + file_metadata = PydanticFileMetadata( + file_name="test_lines.txt", + organization_id=default_user.organization_id, + source_id=default_source.id, + ) + file = await server.file_manager.create_file(file_metadata=file_metadata, actor=default_user, text=test_content) + + # Test opening with line range using enforce_max_open_files_and_open + closed_files, was_already_open, previous_ranges = await server.file_agent_manager.enforce_max_open_files_and_open( + agent_id=sarah_agent.id, + file_id=file.id, + file_name=file.file_name, + source_id=file.source_id, + actor=default_user, + visible_content="2: line 2\n3: line 3", + max_files_open=sarah_agent.max_files_open, + start_line=2, # 1-indexed + end_line=4, # exclusive + ) + + # Retrieve and verify line tracking + retrieved = await server.file_agent_manager.get_file_agent_by_id( + agent_id=sarah_agent.id, + file_id=file.id, + actor=default_user, + ) + + assert retrieved.start_line == 2 + assert retrieved.end_line == 4 + assert previous_ranges == {} # No previous range since it wasn't open before + + # Test opening without line range - should clear line info and capture previous range + closed_files, was_already_open, previous_ranges = await server.file_agent_manager.enforce_max_open_files_and_open( + agent_id=sarah_agent.id, + file_id=file.id, + file_name=file.file_name, + source_id=file.source_id, + actor=default_user, + visible_content="full file content", + max_files_open=sarah_agent.max_files_open, + start_line=None, + end_line=None, + ) + + # Retrieve and verify line info is cleared + retrieved = await server.file_agent_manager.get_file_agent_by_id( + agent_id=sarah_agent.id, + file_id=file.id, + actor=default_user, + ) + + assert retrieved.start_line is None + assert retrieved.end_line is None + assert previous_ranges == {file.file_name: (2, 4)} # Should capture the previous range @pytest.mark.asyncio @@ -9131,7 +9196,7 @@ async def test_lru_eviction_on_open_file(server, default_user, sarah_agent, defa time.sleep(0.1) # Now "open" the last file using the efficient method - closed_files, was_already_open = await server.file_agent_manager.enforce_max_open_files_and_open( + closed_files, was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open( agent_id=sarah_agent.id, file_id=files[-1].id, file_name=files[-1].file_name, @@ -9205,7 +9270,7 @@ async def test_lru_no_eviction_when_reopening_same_file(server, default_user, sa time.sleep(0.1) # "Reopen" the last file (which is already open) - closed_files, was_already_open = await server.file_agent_manager.enforce_max_open_files_and_open( + closed_files, was_already_open, _ = await server.file_agent_manager.enforce_max_open_files_and_open( agent_id=sarah_agent.id, file_id=files[-1].id, file_name=files[-1].file_name, diff --git a/tests/test_sources.py b/tests/test_sources.py index e9315c3b..6343957a 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -287,11 +287,11 @@ def test_attach_existing_files_creates_source_blocks_correctly(disable_pinecone, - read_only=true -- chars_current=46 +- chars_current=45 - chars_limit=15000 -[Viewing file start (out of 1 chunks)] +[Viewing file start (out of 1 lines)] 1: test @@ -348,11 +348,11 @@ def test_delete_source_removes_source_blocks_correctly(disable_pinecone, client: - read_only=true -- chars_current=46 +- chars_current=45 - chars_limit=15000 -[Viewing file start (out of 1 chunks)] +[Viewing file start (out of 1 lines)] 1: test @@ -437,9 +437,9 @@ def test_agent_uses_open_close_file_correctly(disable_pinecone, client: LettaSDK assert old_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {old_content_length}" # Assert specific content expectations for first range (lines 1-5) - assert "[Viewing chunks 1 to 5 (out of 554 chunks)]" in old_value, f"Expected viewing header for lines 1-5, got: {old_value[:100]}..." + assert "[Viewing lines 1 to 5 (out of " in old_value, f"Expected viewing header for lines 1-5, got: {old_value[:100]}..." assert "1: Enrico Letta" in old_value, f"Expected line 1 to start with '1: Enrico Letta', got: {old_value[:200]}..." - assert "5: appointed to the Cabinet" in old_value, f"Expected line 5 to contain '5: appointed to the Cabinet', got: {old_value}" + assert "5: " in old_value, f"Expected line 5 to be present, got: {old_value}" # Ask agent to open the file for a different range offset, length = 6, 5 # Different offset, same length @@ -466,13 +466,9 @@ def test_agent_uses_open_close_file_correctly(disable_pinecone, client: LettaSDK assert new_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {new_content_length}" # Assert specific content expectations for second range (lines 6-10) - assert "[Viewing chunks 6 to 10 (out of 554 chunks)]" in new_value, f"Expected viewing header for lines 6-10, got: {new_value[:100]}..." - assert ( - "6: was promoted to become Minister" in new_value - ), f"Expected line 6 to start with '6: was promoted to become Minister', got: {new_value[:200]}..." - assert ( - "10: produced an inconclusive result" in new_value - ), f"Expected line 10 to contain '10: produced an inconclusive result', got: {new_value}" + assert "[Viewing lines 6 to 10 (out of " in new_value, f"Expected viewing header for lines 6-10, got: {new_value[:100]}..." + assert "6: " in new_value, f"Expected line 6 to be present, got: {new_value[:200]}..." + assert "10: " in new_value, f"Expected line 10 to be present, got: {new_value}" print(f"Comparing content ranges:") print(f" First range (offset=1, length=5): '{old_value}'") @@ -663,7 +659,7 @@ def test_create_agent_with_source_ids_creates_source_blocks_correctly(disable_pi # Check that source blocks were created correctly blocks = temp_agent_state.memory.file_blocks assert len(blocks) == 1 - assert any(b.value.startswith("[Viewing file start (out of 554 chunks)]") for b in blocks) + assert any(b.value.startswith("[Viewing file start (out of ") for b in blocks) # Verify file tools were automatically attached file_tools = {tool.name for tool in temp_agent_state.tools if tool.tool_type == ToolType.LETTA_FILES_CORE} @@ -1000,6 +996,10 @@ def test_agent_open_file(disable_pinecone, client: LettaSDKClient, agent_state: closed_files = client.agents.files.open(agent_id=agent_state.id, file_id=file_metadata.id) assert len(closed_files) == 0 + system = get_raw_system_message(client, agent_state.id) + assert '' in system + assert "[Viewing file start (out of 1 lines)]" in system + def test_agent_close_file(disable_pinecone, client: LettaSDKClient, agent_state: AgentState): """Test client.agents.close_file() function""" @@ -1019,9 +1019,8 @@ def test_agent_close_file(disable_pinecone, client: LettaSDKClient, agent_state: # Test close_file function client.agents.files.close(agent_id=agent_state.id, file_id=file_metadata.id) - # Result can be None or any type based on the signature - # Just verify the function executes without error - assert True, "close_file should execute without error" + system = get_raw_system_message(client, agent_state.id) + assert '' in system def test_agent_close_all_open_files(disable_pinecone, client: LettaSDKClient, agent_state: AgentState): @@ -1041,6 +1040,9 @@ def test_agent_close_all_open_files(disable_pinecone, client: LettaSDKClient, ag # Open each file client.agents.files.open(agent_id=agent_state.id, file_id=file_metadata.id) + system = get_raw_system_message(client, agent_state.id) + assert '=end) are allowed when validation is off""" file = FileMetadata(file_name="test.py", source_id="test_source", content="line1\nline2\nline3") chunker = LineChunker() - # Test with validation disabled - should not raise error - result = chunker.chunk_text(file, start=5, end=10, validate_range=False) - # Should return empty content (except metadata header) since slice is out of bounds + # Test 1: Out of bounds start should always raise error, even with validation disabled + with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 6 is out of range"): + chunker.chunk_text(file, start=5, end=10, validate_range=False) + + # Test 2: With validation disabled, start >= end should be allowed (but gives empty result) + result = chunker.chunk_text(file, start=2, end=2, validate_range=False) assert len(result) == 1 # Only metadata header - assert "[Viewing lines 6 to 10 (out of 3 lines)]" in result[0] + assert "[Viewing lines 3 to 2 (out of 3 lines)]" in result[0] def test_line_chunker_only_start_parameter(): @@ -506,7 +514,7 @@ def test_line_chunker_only_start_parameter(): assert "2: line2" in result[1] assert "3: line3" in result[2] - # Test invalid start only + # Test start at end of file - should raise error with pytest.raises(ValueError, match="File test.py has only 3 lines, but requested offset 4 is out of range"): chunker.chunk_text(file, start=3, validate_range=True)