feat: Rework and simplify files tooling (#3058)

2025-06-27 13:23:44 -07:00
parent 506d51bb0b
commit edb20fa8d9
16 changed files with 565 additions and 134 deletions
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -129,7 +129,7 @@ MEMORY_TOOLS_LINE_NUMBER_PREFIX_REGEX = re.compile(
 BUILTIN_TOOLS = ["run_code", "web_search"]

 # Built in tools
-FILES_TOOLS = ["open_file", "close_file", "grep", "search_files"]
+FILES_TOOLS = ["open_files", "grep_files", "search_files"]

 FILE_MEMORY_EXISTS_MESSAGE = "The following files are currently accessible in memory:"
 FILE_MEMORY_EMPTY_MESSAGE = (
--- a/letta/functions/function_sets/files.py
+++ b/letta/functions/function_sets/files.py
@@ -1,17 +1,32 @@
-from typing import TYPE_CHECKING, List, Optional, Tuple
+from typing import TYPE_CHECKING, List, Optional
+
+from letta.functions.types import FileOpenRequest

 if TYPE_CHECKING:
    from letta.schemas.agent import AgentState
    from letta.schemas.file import FileMetadata


-async def open_file(agent_state: "AgentState", file_name: str, view_range: Optional[Tuple[int, int]]) -> str:
-    """
-    Open the file with name `file_name` and load the contents into files section in core memory.
+async def open_files(agent_state: "AgentState", file_requests: List[FileOpenRequest], close_all_others: bool = False) -> str:
+    """Open one or more files and load their contents into files section in core memory. Maximum of 5 files can be opened simultaneously.
+
+    Examples:
+        Open single file (entire content):
+            file_requests = [FileOpenRequest(file_name="config.py")]
+
+        Open multiple files with different view ranges:
+            file_requests = [
+                FileOpenRequest(file_name="config.py", offset=1, length=50),     # Lines 1-50
+                FileOpenRequest(file_name="main.py", offset=100, length=100),    # Lines 100-199
+                FileOpenRequest(file_name="utils.py")                            # Entire file
+            ]
+
+        Close all other files and open new ones:
+            open_files(agent_state, file_requests, close_all_others=True)

    Args:
-        file_name (str): Name of the file to view. Required.
-        view_range (Optional[Tuple[int, int]]): Optional tuple indicating range to view.
+        file_requests (List[FileOpenRequest]): List of file open requests, each specifying file name and optional view range.
+        close_all_others (bool): If True, closes all other currently open files first. Defaults to False.

    Returns:
        str: A status message
@@ -19,20 +34,7 @@ async def open_file(agent_state: "AgentState", file_name: str, view_range: Optio
    raise NotImplementedError("Tool not implemented. Please contact the Letta team.")


-async def close_file(agent_state: "AgentState", file_name: str) -> str:
-    """
-    Close file with name `file_name` in files section in core memory.
-
-    Args:
-        file_name (str): Name of the file to close.
-
-    Returns:
-        str: A status message
-    """
-    raise NotImplementedError("Tool not implemented. Please contact the Letta team.")
-
-
-async def grep(
+async def grep_files(
    agent_state: "AgentState",
    pattern: str,
    include: Optional[str] = None,
@@ -45,7 +47,7 @@ async def grep(
        pattern (str): Keyword or regex pattern to search within file contents.
        include (Optional[str]): Optional keyword or regex pattern to filter filenames to include in the search.
        context_lines (Optional[int]): Number of lines of context to show before and after each match.
-                                       Equivalent to `-C` in grep. Defaults to 3.
+                                       Equivalent to `-C` in grep_files. Defaults to 3.

    Returns:
        str: Matching lines with optional surrounding context or a summary output.
--- a/letta/functions/schema_generator.py
+++ b/letta/functions/schema_generator.py
@@ -9,6 +9,56 @@ from typing_extensions import Literal

 from letta.constants import REQUEST_HEARTBEAT_DESCRIPTION, REQUEST_HEARTBEAT_PARAM
 from letta.functions.mcp_client.types import MCPTool
+from letta.log import get_logger
+
+logger = get_logger(__name__)
+
+
+def validate_google_style_docstring(function):
+    """Validate that a function's docstring follows Google Python style format.
+
+    Args:
+        function: The function to validate
+
+    Raises:
+        ValueError: If the docstring is not in Google Python style format
+    """
+    if not function.__doc__:
+        raise ValueError(
+            f"Function '{function.__name__}' has no docstring. Expected Google Python style docstring with Args and Returns sections."
+        )
+
+    docstring = function.__doc__.strip()
+
+    # Basic Google style requirements:
+    # 1. Should have Args: section if function has parameters (excluding self, agent_state)
+    # 2. Should have Returns: section if function returns something other than None
+    # 3. Args and Returns sections should be properly formatted
+
+    sig = inspect.signature(function)
+    has_params = any(param.name not in ["self", "agent_state"] for param in sig.parameters.values())
+
+    # Check for Args section if function has parameters
+    if has_params and "Args:" not in docstring:
+        raise ValueError(f"Function '{function.__name__}' with parameters must have 'Args:' section in Google Python style docstring")
+
+    # NOTE: No check for Returns section - this is irrelevant to the LLM
+    # In proper Google Python format, the Returns: is required
+
+    # Validate Args section format if present
+    if "Args:" in docstring:
+        args_start = docstring.find("Args:")
+        args_end = docstring.find("Returns:", args_start) if "Returns:" in docstring[args_start:] else len(docstring)
+        args_section = docstring[args_start:args_end].strip()
+
+        # Check that each parameter is documented
+        for param in sig.parameters.values():
+            if param.name in ["self", "agent_state"]:
+                continue
+            if f"{param.name} (" not in args_section and f"{param.name}:" not in args_section:
+                raise ValueError(
+                    f"Function '{function.__name__}' parameter '{param.name}' not documented in Args section of Google Python style docstring"
+                )


 def is_optional(annotation):
@@ -277,6 +327,20 @@ def pydantic_model_to_json_schema(model: Type[BaseModel]) -> dict:
                "description": prop["description"],
            }

+        # Handle the case where the property uses anyOf (e.g., Optional types)
+        if "anyOf" in prop:
+            # For Optional types, extract the non-null type
+            non_null_types = [t for t in prop["anyOf"] if t.get("type") != "null"]
+            if len(non_null_types) == 1:
+                # Simple Optional[T] case - use the non-null type
+                return {
+                    "type": non_null_types[0]["type"],
+                    "description": prop["description"],
+                }
+            else:
+                # Complex anyOf case - not supported yet
+                raise ValueError(f"Complex anyOf patterns are not supported: {prop}")
+
        # If it's a regular property with a direct type (e.g., string, number)
        return {
            "type": "string" if prop["type"] == "string" else prop["type"],
@@ -344,7 +408,18 @@ def pydantic_model_to_json_schema(model: Type[BaseModel]) -> dict:
    return clean_schema(schema_part=schema, full_schema=schema)


-def generate_schema(function, name: Optional[str] = None, description: Optional[str] = None) -> dict:
+def generate_schema(function, name: Optional[str] = None, description: Optional[str] = None, tool_id: Optional[str] = None) -> dict:
+    # Validate that the function has a Google Python style docstring
+    try:
+        validate_google_style_docstring(function)
+    except ValueError:
+        logger.warning(
+            f"Function `{function.__name__}` in module `{function.__module__}` "
+            f"{'(tool_id=' + tool_id + ') ' if tool_id else ''}"
+            f"is not in Google style docstring format. "
+            f"Docstring received:\n{repr(function.__doc__[:200]) if function.__doc__ else 'None'}"
+        )
+
    # Get the signature of the function
    sig = inspect.signature(function)

@@ -353,10 +428,19 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[

    if not description:
        # Support multiline docstrings for complex functions, TODO (cliandy): consider having this as a setting
-        if docstring.long_description:
+        # Always prefer combining short + long description when both exist
+        if docstring.short_description and docstring.long_description:
+            description = f"{docstring.short_description}\n\n{docstring.long_description}"
+        elif docstring.short_description:
+            description = docstring.short_description
+        elif docstring.long_description:
            description = docstring.long_description
        else:
-            description = docstring.short_description
+            description = "No description available"
+
+        examples_section = extract_examples_section(function.__doc__)
+        if examples_section and "Examples:" not in description:
+            description = f"{description}\n\n{examples_section}"

    # Prepare the schema dictionary
    schema = {
@@ -443,6 +527,38 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[
    return schema


+def extract_examples_section(docstring: Optional[str]) -> Optional[str]:
+    """Extracts the 'Examples:' section from a Google-style docstring.
+
+    Args:
+        docstring (Optional[str]): The full docstring of a function.
+
+    Returns:
+        Optional[str]: The extracted examples section, or None if not found.
+    """
+    if not docstring or "Examples:" not in docstring:
+        return None
+
+    lines = docstring.strip().splitlines()
+    in_examples = False
+    examples_lines = []
+
+    for line in lines:
+        stripped = line.strip()
+
+        if not in_examples and stripped.startswith("Examples:"):
+            in_examples = True
+            examples_lines.append(line)
+            continue
+
+        if in_examples:
+            if stripped and not line.startswith(" ") and stripped.endswith(":"):
+                break
+            examples_lines.append(line)
+
+    return "\n".join(examples_lines).strip() if examples_lines else None
+
+
 def generate_schema_from_args_schema_v2(
    args_schema: Type[BaseModel], name: Optional[str] = None, description: Optional[str] = None, append_heartbeat: bool = True
 ) -> Dict[str, Any]:
--- a/letta/functions/types.py
+++ b/letta/functions/types.py
@@ -1,6 +1,18 @@
+from typing import Optional
+
 from pydantic import BaseModel, Field


 class SearchTask(BaseModel):
    query: str = Field(description="Search query for web search")
    question: str = Field(description="Question to answer from search results, considering full conversation context")
+
+
+class FileOpenRequest(BaseModel):
+    file_name: str = Field(description="Name of the file to open")
+    offset: Optional[int] = Field(
+        default=None, description="Optional starting line number (1-indexed). If not specified, starts from beginning of file."
+    )
+    length: Optional[int] = Field(
+        default=None, description="Optional number of lines to view from offset. If not specified, views to end of file."
+    )
--- a/letta/services/file_processor/chunker/line_chunker.py
+++ b/letta/services/file_processor/chunker/line_chunker.py
@@ -124,8 +124,8 @@ class LineChunker:
        else:
            line_offset = 0

-        # Add line numbers for all strategies
-        content_lines = [f"{i + line_offset}: {line}" for i, line in enumerate(content_lines)]
+        # Add line numbers for all strategies (1-indexed for user display)
+        content_lines = [f"{i + line_offset + 1}: {line}" for i, line in enumerate(content_lines)]

        # Add metadata about total chunks
        if add_metadata:
@@ -133,7 +133,10 @@ class LineChunker:
                "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines"
            )
            if start is not None and end is not None:
-                content_lines.insert(0, f"[Viewing {chunk_type} {start} to {end-1} (out of {total_chunks} {chunk_type})]")
+                # Display 1-indexed ranges for users
+                start_display = start + 1
+                end_display = end
+                content_lines.insert(0, f"[Viewing {chunk_type} {start_display} to {end_display} (out of {total_chunks} {chunk_type})]")
            else:
                content_lines.insert(0, f"[Viewing file start (out of {total_chunks} {chunk_type})]")

--- a/letta/services/files_agents_manager.py
+++ b/letta/services/files_agents_manager.py
@@ -283,6 +283,40 @@ class FileAgentManager:
            await session.execute(stmt)
            await session.commit()

+    @enforce_types
+    @trace_method
+    async def close_all_other_files(self, *, agent_id: str, keep_file_names: List[str], actor: PydanticUser) -> List[str]:
+        """Close every open file for this agent except those in keep_file_names.
+
+        Args:
+            agent_id: ID of the agent
+            keep_file_names: List of file names to keep open
+            actor: User performing the action
+
+        Returns:
+            List of file names that were closed
+        """
+        async with db_registry.async_session() as session:
+            stmt = (
+                update(FileAgentModel)
+                .where(
+                    and_(
+                        FileAgentModel.agent_id == agent_id,
+                        FileAgentModel.organization_id == actor.organization_id,
+                        FileAgentModel.is_open.is_(True),
+                        # Only add the NOT IN filter when there are names to keep
+                        ~FileAgentModel.file_name.in_(keep_file_names) if keep_file_names else True,
+                    )
+                )
+                .values(is_open=False, visible_content=None)
+                .returning(FileAgentModel.file_name)  # Gets the names we closed
+                .execution_options(synchronize_session=False)  # No need to sync ORM state
+            )
+
+            closed_file_names = [row.file_name for row in (await session.execute(stmt))]
+            await session.commit()
+            return closed_file_names
+
    @enforce_types
    @trace_method
    async def enforce_max_open_files_and_open(
--- a/letta/services/tool_executor/files_tool_executor.py
+++ b/letta/services/tool_executor/files_tool_executor.py
@@ -1,7 +1,9 @@
 import asyncio
 import re
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional

+from letta.constants import MAX_FILES_OPEN
+from letta.functions.types import FileOpenRequest
 from letta.log import get_logger
 from letta.otel.tracing import trace_method
 from letta.schemas.agent import AgentState
@@ -31,7 +33,7 @@ class LettaFileToolExecutor(ToolExecutor):
    MAX_REGEX_COMPLEXITY = 1000  # Prevent catastrophic backtracking
    MAX_MATCHES_PER_FILE = 20  # Limit matches per file
    MAX_TOTAL_MATCHES = 50  # Global match limit
-    GREP_TIMEOUT_SECONDS = 30  # Max time for grep operation
+    GREP_TIMEOUT_SECONDS = 30  # Max time for grep_files operation
    MAX_CONTEXT_LINES = 1  # Lines of context around matches

    def __init__(
@@ -72,9 +74,8 @@ class LettaFileToolExecutor(ToolExecutor):
            raise ValueError("Agent state is required for file tools")

        function_map = {
-            "open_file": self.open_file,
-            "close_file": self.close_file,
-            "grep": self.grep,
+            "open_files": self.open_files,
+            "grep_files": self.grep_files,
            "search_files": self.search_files,
        }

@@ -98,56 +99,135 @@ class LettaFileToolExecutor(ToolExecutor):
            )

    @trace_method
-    async def open_file(self, agent_state: AgentState, file_name: str, view_range: Optional[Tuple[int, int]] = None) -> str:
-        """Stub for open_file tool."""
-        start, end = None, None
-        if view_range:
-            start, end = view_range
-            if start >= end:
-                raise ValueError(f"Provided view range {view_range} is invalid, starting range must be less than ending range.")
+    async def open_files(self, agent_state: AgentState, file_requests: List[FileOpenRequest], close_all_others: bool = False) -> str:
+        """Open one or more files and load their contents into memory blocks."""
+        # Parse raw dictionaries into FileOpenRequest objects if needed
+        parsed_requests = []
+        for req in file_requests:
+            if isinstance(req, dict):
+                # LLM returned a dictionary, parse it into FileOpenRequest
+                parsed_requests.append(FileOpenRequest(**req))
+            elif isinstance(req, FileOpenRequest):
+                # Already a FileOpenRequest object
+                parsed_requests.append(req)
+            else:
+                raise ValueError(f"Invalid file request type: {type(req)}. Expected dict or FileOpenRequest.")

-        # TODO: This is inefficient. We can skip the initial DB lookup by preserving on the block metadata what the file_id is
-        file_agent = await self.files_agents_manager.get_file_agent_by_file_name(
-            agent_id=agent_state.id, file_name=file_name, actor=self.actor
-        )
+        file_requests = parsed_requests

-        if not file_agent:
-            file_blocks = agent_state.memory.file_blocks
-            file_names = [fb.label for fb in file_blocks]
-            raise ValueError(
-                f"{file_name} not attached - did you get the filename correct? Currently you have the following files attached: {file_names}"
+        # Validate file count first
+        if len(file_requests) > MAX_FILES_OPEN:
+            raise ValueError(f"Cannot open {len(file_requests)} files: exceeds maximum limit of {MAX_FILES_OPEN} files")
+
+        if not file_requests:
+            raise ValueError("No file requests provided")
+
+        # Extract file names for various operations
+        file_names = [req.file_name for req in file_requests]
+
+        # Get all currently attached files for error reporting
+        file_blocks = agent_state.memory.file_blocks
+        attached_file_names = [fb.label for fb in file_blocks]
+
+        # Close all other files if requested
+        closed_by_close_all_others = []
+        if close_all_others:
+            closed_by_close_all_others = await self.files_agents_manager.close_all_other_files(
+                agent_id=agent_state.id, keep_file_names=file_names, actor=self.actor
            )

-        file_id = file_agent.file_id
-        file = await self.file_manager.get_file_by_id(file_id=file_id, actor=self.actor, include_content=True)
+        # Process each file
+        opened_files = []
+        all_closed_files = []

-        # TODO: Inefficient, maybe we can pre-compute this
-        # TODO: This is also not the best way to split things - would be cool to have "content aware" splitting
-        # TODO: Split code differently from large text blurbs
-        content_lines = LineChunker().chunk_text(file_metadata=file, start=start, end=end)
-        visible_content = "\n".join(content_lines)
+        for file_request in file_requests:
+            file_name = file_request.file_name
+            offset = file_request.offset
+            length = file_request.length

-        # Efficiently handle LRU eviction and file opening in a single transaction
-        closed_files, was_already_open = await self.files_agents_manager.enforce_max_open_files_and_open(
-            agent_id=agent_state.id, file_id=file_id, file_name=file_name, actor=self.actor, visible_content=visible_content
-        )
+            # Convert 1-indexed offset/length to 0-indexed start/end for LineChunker
+            start, end = None, None
+            if offset is not None or length is not None:
+                if offset is not None and offset < 1:
+                    raise ValueError(f"Offset for file {file_name} must be >= 1 (1-indexed), got {offset}")
+                if length is not None and length < 1:
+                    raise ValueError(f"Length for file {file_name} must be >= 1, got {length}")

-        success_msg = f"Successfully opened file {file_name}, lines {start} to {end} are now visible in memory block <{file_name}>"
-        if closed_files:
+                # Convert to 0-indexed for LineChunker
+                start = (offset - 1) if offset is not None else None
+                if start is not None and length is not None:
+                    end = start + length
+                else:
+                    end = None
+
+            # Validate file exists and is attached to agent
+            file_agent = await self.files_agents_manager.get_file_agent_by_file_name(
+                agent_id=agent_state.id, file_name=file_name, actor=self.actor
+            )
+
+            if not file_agent:
+                raise ValueError(
+                    f"{file_name} not attached - did you get the filename correct? Currently you have the following files attached: {attached_file_names}"
+                )
+
+            file_id = file_agent.file_id
+            file = await self.file_manager.get_file_by_id(file_id=file_id, actor=self.actor, include_content=True)
+
+            # Process file content
+            content_lines = LineChunker().chunk_text(file_metadata=file, start=start, end=end)
+            visible_content = "\n".join(content_lines)
+
+            # Handle LRU eviction and file opening
+            closed_files, was_already_open = await self.files_agents_manager.enforce_max_open_files_and_open(
+                agent_id=agent_state.id, file_id=file_id, file_name=file_name, actor=self.actor, visible_content=visible_content
+            )
+
+            opened_files.append(file_name)
+            all_closed_files.extend(closed_files)
+
+        # Update access timestamps for all opened files efficiently
+        await self.files_agents_manager.mark_access_bulk(agent_id=agent_state.id, file_names=file_names, actor=self.actor)
+
+        # Build success message
+        if len(file_requests) == 1:
+            # Single file - maintain existing format
+            file_request = file_requests[0]
+            file_name = file_request.file_name
+            offset = file_request.offset
+            length = file_request.length
+            if offset is not None and length is not None:
+                end_line = offset + length - 1
+                success_msg = (
+                    f"Successfully opened file {file_name}, lines {offset} to {end_line} are now visible in memory block <{file_name}>"
+                )
+            elif offset is not None:
+                success_msg = f"Successfully opened file {file_name}, lines {offset} to end are now visible in memory block <{file_name}>"
+            else:
+                success_msg = f"Successfully opened file {file_name}, entire file is now visible in memory block <{file_name}>"
+        else:
+            # Multiple files - show individual ranges if specified
+            file_summaries = []
+            for req in file_requests:
+                if req.offset is not None and req.length is not None:
+                    end_line = req.offset + req.length - 1
+                    file_summaries.append(f"{req.file_name} (lines {req.offset}-{end_line})")
+                elif req.offset is not None:
+                    file_summaries.append(f"{req.file_name} (lines {req.offset}-end)")
+                else:
+                    file_summaries.append(req.file_name)
+            success_msg = f"Successfully opened {len(file_requests)} files: {', '.join(file_summaries)}"
+
+        # Add information about closed files
+        if closed_by_close_all_others:
+            success_msg += f"\nNote: Closed {len(closed_by_close_all_others)} file(s) due to close_all_others=True: {', '.join(closed_by_close_all_others)}"
+
+        if all_closed_files:
            success_msg += (
-                f"\nNote: Closed {len(closed_files)} least recently used file(s) due to open file limit: {', '.join(closed_files)}"
+                f"\nNote: Closed {len(all_closed_files)} least recently used file(s) due to open file limit: {', '.join(all_closed_files)}"
            )

        return success_msg

-    @trace_method
-    async def close_file(self, agent_state: AgentState, file_name: str) -> str:
-        """Stub for close_file tool."""
-        await self.files_agents_manager.update_file_agent_by_name(
-            agent_id=agent_state.id, file_name=file_name, actor=self.actor, is_open=False
-        )
-        return f"Successfully closed file {file_name}, use function calls to re-open file"
-
    def _validate_regex_pattern(self, pattern: str) -> None:
        """Validate regex pattern to prevent catastrophic backtracking."""
        if len(pattern) > self.MAX_REGEX_COMPLEXITY:
@@ -204,7 +284,9 @@ class LettaFileToolExecutor(ToolExecutor):
        return context_lines_with_indicator

    @trace_method
-    async def grep(self, agent_state: AgentState, pattern: str, include: Optional[str] = None, context_lines: Optional[int] = 3) -> str:
+    async def grep_files(
+        self, agent_state: AgentState, pattern: str, include: Optional[str] = None, context_lines: Optional[int] = 3
+    ) -> str:
        """
        Search for pattern in all attached files and return matches with context.

@@ -213,7 +295,7 @@ class LettaFileToolExecutor(ToolExecutor):
            pattern: Regular expression pattern to search for
            include: Optional pattern to filter filenames to include in the search
            context_lines (Optional[int]): Number of lines of context to show before and after each match.
-                                       Equivalent to `-C` in grep. Defaults to 3.
+                                       Equivalent to `-C` in grep_files. Defaults to 3.

        Returns:
            Formatted string with search results, file names, line numbers, and context
@@ -310,20 +392,7 @@ class LettaFileToolExecutor(ToolExecutor):
                if formatted_lines and formatted_lines[0].startswith("[Viewing"):
                    formatted_lines = formatted_lines[1:]

-                # Convert 0-based line numbers to 1-based for grep compatibility
-                corrected_lines = []
-                for line in formatted_lines:
-                    if line and ":" in line:
-                        try:
-                            line_parts = line.split(":", 1)
-                            line_num = int(line_parts[0].strip())
-                            line_content = line_parts[1] if len(line_parts) > 1 else ""
-                            corrected_lines.append(f"{line_num + 1}:{line_content}")
-                        except (ValueError, IndexError):
-                            corrected_lines.append(line)
-                    else:
-                        corrected_lines.append(line)
-                formatted_lines = corrected_lines
+                # LineChunker now returns 1-indexed line numbers, so no conversion needed

                # Search for matches in formatted lines
                for formatted_line in formatted_lines:
--- a/tests/data/1_to_100.py
+++ b/tests/data/1_to_100.py
@@ -1,4 +1,3 @@
-x0 = 0
 x1 = 1
 x2 = 2
 x3 = 3
@@ -98,3 +97,4 @@ x96 = 96
 x97 = 97
 x98 = 98
 x99 = 99
+x100 = 100
--- a/tests/test_sources.py
+++ b/tests/test_sources.py
@@ -71,10 +71,9 @@ def upload_file_and_wait(client: LettaSDKClient, source_id: str, file_path: str,

@pytest.fixture
 def agent_state(client: LettaSDKClient):
-    open_file_tool = client.tools.list(name="open_file")[0]
-    close_file_tool = client.tools.list(name="close_file")[0]
+    open_file_tool = client.tools.list(name="open_files")[0]
    search_files_tool = client.tools.list(name="search_files")[0]
-    grep_tool = client.tools.list(name="grep")[0]
+    grep_tool = client.tools.list(name="grep_files")[0]

    agent_state = client.agents.create(
        name="test_sources_agent",
@@ -86,7 +85,7 @@ def agent_state(client: LettaSDKClient):
        ],
        model="openai/gpt-4o-mini",
        embedding="openai/text-embedding-3-small",
-        tool_ids=[open_file_tool.id, close_file_tool.id, search_files_tool.id, grep_tool.id],
+        tool_ids=[open_file_tool.id, search_files_tool.id, grep_tool.id],
    )
    yield agent_state

@@ -304,30 +303,15 @@ def test_agent_uses_open_close_file_correctly(client: LettaSDKClient, agent_stat
        print(f"First 100 chars of content: {blocks[0].value[:100]}...")
        assert initial_content_length > 10, f"Expected file content > 10 chars, got {initial_content_length}"

-    # Ask agent to close the file
-    print(f"Requesting agent to close file: {file.file_name}")
-    close_response = client.agents.messages.create(
-        agent_id=agent_state.id,
-        messages=[MessageCreate(role="user", content=f"Use ONLY the close_file tool to close the file named {file.file_name}")],
-    )
-    print(f"Close file request sent, got {len(close_response.messages)} message(s) in response")
-    print(close_response.messages)
-
-    # Check that file is closed
-    agent_state = client.agents.retrieve(agent_id=agent_state.id)
-    blocks = agent_state.memory.file_blocks
-    closed_content_length = len(blocks[0].value) if blocks else 0
-    print(f"File content length after close: {closed_content_length} characters")
-    assert closed_content_length == 0, f"Expected empty content after close, got {closed_content_length} chars"
-
-    # Ask agent to open the file for a specific range
-    start, end = 0, 5
-    print(f"Requesting agent to open file for range [{start}, {end}]")
+    # Ask agent to open the file for a specific range using offset/length
+    offset, length = 1, 5  # 1-indexed offset, 5 lines
+    print(f"Requesting agent to open file with offset={offset}, length={length}")
    open_response1 = client.agents.messages.create(
        agent_id=agent_state.id,
        messages=[
            MessageCreate(
-                role="user", content=f"Use ONLY the open_file tool to open the file named {file.file_name} for view range [{start}, {end}]"
+                role="user",
+                content=f"Use ONLY the open_files tool to open the file named {file.file_name} with offset {offset} and length {length}",
            )
        ],
    )
@@ -341,15 +325,21 @@ def test_agent_uses_open_close_file_correctly(client: LettaSDKClient, agent_stat
    old_content_length = len(old_value)
    print(f"File content length after first open: {old_content_length} characters")
    print(f"First range content: '{old_value}'")
-    assert old_content_length > 10, f"Expected content > 10 chars for range [{start}, {end}], got {old_content_length}"
+    assert old_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {old_content_length}"
+
+    # Assert specific content expectations for first range (lines 1-5)
+    assert "[Viewing chunks 1 to 5 (out of 554 chunks)]" in old_value, f"Expected viewing header for lines 1-5, got: {old_value[:100]}..."
+    assert "1: Enrico Letta" in old_value, f"Expected line 1 to start with '1: Enrico Letta', got: {old_value[:200]}..."
+    assert "5: appointed to the Cabinet" in old_value, f"Expected line 5 to contain '5: appointed to the Cabinet', got: {old_value}"

    # Ask agent to open the file for a different range
-    start, end = 5, 10
+    offset, length = 6, 5  # Different offset, same length
    open_response2 = client.agents.messages.create(
        agent_id=agent_state.id,
        messages=[
            MessageCreate(
-                role="user", content=f"Use ONLY the open_file tool to open the file named {file.file_name} for view range [{start}, {end}]"
+                role="user",
+                content=f"Use ONLY the open_files tool to open the file named {file.file_name} with offset {offset} and length {length}",
            )
        ],
    )
@@ -364,13 +354,27 @@ def test_agent_uses_open_close_file_correctly(client: LettaSDKClient, agent_stat
    new_content_length = len(new_value)
    print(f"File content length after second open: {new_content_length} characters")
    print(f"Second range content: '{new_value}'")
-    assert new_content_length > 10, f"Expected content > 10 chars for range [{start}, {end}], got {new_content_length}"
+    assert new_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {new_content_length}"
+
+    # Assert specific content expectations for second range (lines 6-10)
+    assert "[Viewing chunks 6 to 10 (out of 554 chunks)]" in new_value, f"Expected viewing header for lines 6-10, got: {new_value[:100]}..."
+    assert (
+        "6: was promoted to become Minister" in new_value
+    ), f"Expected line 6 to start with '6: was promoted to become Minister', got: {new_value[:200]}..."
+    assert (
+        "10: produced an inconclusive result" in new_value
+    ), f"Expected line 10 to contain '10: produced an inconclusive result', got: {new_value}"

    print(f"Comparing content ranges:")
-    print(f"  First range [0, 5]:  '{old_value}'")
-    print(f"  Second range [5, 10]: '{new_value}'")
+    print(f"  First range (offset=1, length=5):  '{old_value}'")
+    print(f"  Second range (offset=6, length=5): '{new_value}'")

    assert new_value != old_value, f"Different view ranges should have different content. New: '{new_value}', Old: '{old_value}'"
+
+    # Assert that ranges don't overlap - first range should not contain line 6, second should not contain line 1
+    assert "6: was promoted" not in old_value, f"First range (1-5) should not contain line 6, got: {old_value}"
+    assert "1: Enrico Letta" not in new_value, f"Second range (6-10) should not contain line 1, got: {new_value}"
+
    print("✓ File successfully opened with different range - content differs as expected")


@@ -444,15 +448,15 @@ def test_agent_uses_grep_correctly_basic(client: LettaSDKClient, agent_state: Ag
    # Ask agent to use the search_files tool
    search_files_response = client.agents.messages.create(
        agent_id=agent_state.id,
-        messages=[MessageCreate(role="user", content=f"Use ONLY the grep tool to search for `Nunzia De Girolamo`.")],
+        messages=[MessageCreate(role="user", content=f"Use ONLY the grep_files tool to search for `Nunzia De Girolamo`.")],
    )
    print(f"Grep request sent, got {len(search_files_response.messages)} message(s) in response")
    print(search_files_response.messages)

-    # Check that grep was called
+    # Check that grep_files was called
    tool_calls = [msg for msg in search_files_response.messages if msg.message_type == "tool_call_message"]
    assert len(tool_calls) > 0, "No tool calls found"
-    assert any(tc.tool_call.name == "grep" for tc in tool_calls), "search_files not called"
+    assert any(tc.tool_call.name == "grep_files" for tc in tool_calls), "search_files not called"

    # Check it returned successfully
    tool_returns = [msg for msg in search_files_response.messages if msg.message_type == "tool_return_message"]
@@ -486,7 +490,9 @@ def test_agent_uses_grep_correctly_advanced(client: LettaSDKClient, agent_state:
    # Ask agent to use the search_files tool
    search_files_response = client.agents.messages.create(
        agent_id=agent_state.id,
-        messages=[MessageCreate(role="user", content=f"Use ONLY the grep tool to search for `tool-f5b80b08-5a45-4a0a-b2cd-dd8a0177b7ef`.")],
+        messages=[
+            MessageCreate(role="user", content=f"Use ONLY the grep_files tool to search for `tool-f5b80b08-5a45-4a0a-b2cd-dd8a0177b7ef`.")
+        ],
    )
    print(f"Grep request sent, got {len(search_files_response.messages)} message(s) in response")
    print(search_files_response.messages)
@@ -495,7 +501,7 @@ def test_agent_uses_grep_correctly_advanced(client: LettaSDKClient, agent_state:
    assert tool_return_message is not None, "No ToolReturnMessage found in messages"

    # Basic structural integrity checks
-    assert tool_return_message.name == "grep"
+    assert tool_return_message.name == "grep_files"
    assert tool_return_message.status == "success"
    assert "Found 1 matches" in tool_return_message.tool_return
    assert "tool-f5b80b08-5a45-4a0a-b2cd-dd8a0177b7ef" in tool_return_message.tool_return
@@ -565,7 +571,7 @@ def test_view_ranges_have_metadata(client: LettaSDKClient, agent_state: AgentSta
    client.agents.sources.attach(source_id=source.id, agent_id=agent_state.id)

    # Load files into the source
-    file_path = "tests/data/0_to_99.py"
+    file_path = "tests/data/1_to_100.py"

    # Upload the files
    upload_file_and_wait(client, source.id, file_path)
@@ -583,14 +589,15 @@ def test_view_ranges_have_metadata(client: LettaSDKClient, agent_state: AgentSta
    block = blocks[0]
    assert block.value.startswith("[Viewing file start (out of 100 lines)]")

-    # Open a specific range
-    start = 50
-    end = 55
+    # Open a specific range using offset/length
+    offset = 50  # 1-indexed line 50
+    length = 5  # 5 lines (50-54)
    open_response = client.agents.messages.create(
        agent_id=agent_state.id,
        messages=[
            MessageCreate(
-                role="user", content=f"Use ONLY the open_file tool to open the file named {file.file_name} for view range [{start}, {end}]"
+                role="user",
+                content=f"Use ONLY the open_files tool to open the file named {file.file_name} with offset {offset} and length {length}",
            )
        ],
    )
@@ -614,3 +621,85 @@ def test_view_ranges_have_metadata(client: LettaSDKClient, agent_state: AgentSta
 54: x54 = 54
    """.strip()
    )
+
+
+def test_open_files_schema_descriptions(client: LettaSDKClient):
+    """Test that open_files tool schema contains correct descriptions from docstring"""
+
+    # Get the open_files tool
+    tools = client.tools.list(name="open_files")
+    assert len(tools) == 1, "Expected exactly one open_files tool"
+
+    open_files_tool = tools[0]
+    schema = open_files_tool.json_schema
+
+    # Check main function description includes the full multiline docstring with examples
+    description = schema["description"]
+
+    # Check main description line
+    assert (
+        "Open one or more files and load their contents into files section in core memory. Maximum of 5 files can be opened simultaneously."
+        in description
+    )
+
+    # Check that examples are included
+    assert "Examples:" in description
+    assert 'FileOpenRequest(file_name="config.py")' in description
+    assert 'FileOpenRequest(file_name="config.py", offset=1, length=50)' in description
+    assert "# Lines 1-50" in description
+    assert "# Lines 100-199" in description
+    assert "# Entire file" in description
+    assert "close_all_others=True" in description
+
+    # Check parameters structure
+    assert "parameters" in schema
+    assert "properties" in schema["parameters"]
+    properties = schema["parameters"]["properties"]
+
+    # Check file_requests parameter
+    assert "file_requests" in properties
+    file_requests_prop = properties["file_requests"]
+    expected_file_requests_desc = "List of file open requests, each specifying file name and optional view range."
+    assert (
+        file_requests_prop["description"] == expected_file_requests_desc
+    ), f"Expected file_requests description: '{expected_file_requests_desc}', got: '{file_requests_prop['description']}'"
+
+    # Check close_all_others parameter
+    assert "close_all_others" in properties
+    close_all_others_prop = properties["close_all_others"]
+    expected_close_all_others_desc = "If True, closes all other currently open files first. Defaults to False."
+    assert (
+        close_all_others_prop["description"] == expected_close_all_others_desc
+    ), f"Expected close_all_others description: '{expected_close_all_others_desc}', got: '{close_all_others_prop['description']}'"
+
+    # Check that file_requests is an array type
+    assert file_requests_prop["type"] == "array", f"Expected file_requests type to be 'array', got: '{file_requests_prop['type']}'"
+
+    # Check FileOpenRequest schema within file_requests items
+    assert "items" in file_requests_prop
+    file_request_items = file_requests_prop["items"]
+    assert file_request_items["type"] == "object", "Expected FileOpenRequest to be object type"
+
+    # Check FileOpenRequest properties
+    assert "properties" in file_request_items
+    file_request_properties = file_request_items["properties"]
+
+    # Check file_name field
+    assert "file_name" in file_request_properties
+    file_name_prop = file_request_properties["file_name"]
+    assert file_name_prop["description"] == "Name of the file to open"
+    assert file_name_prop["type"] == "string"
+
+    # Check offset field
+    assert "offset" in file_request_properties
+    offset_prop = file_request_properties["offset"]
+    expected_offset_desc = "Optional starting line number (1-indexed). If not specified, starts from beginning of file."
+    assert offset_prop["description"] == expected_offset_desc
+    assert offset_prop["type"] == "integer"
+
+    # Check length field
+    assert "length" in file_request_properties
+    length_prop = file_request_properties["length"]
+    expected_length_desc = "Optional number of lines to view from offset. If not specified, views to end of file."
+    assert length_prop["description"] == expected_length_desc
+    assert length_prop["type"] == "integer"
--- a/tests/test_tool_schema_parsing.py
+++ b/tests/test_tool_schema_parsing.py
@@ -10,6 +10,7 @@ import pytest
 from pydantic import BaseModel

 from letta.functions.functions import derive_openai_json_schema
+from letta.functions.schema_generator import validate_google_style_docstring
 from letta.llm_api.helpers import convert_to_structured_output, make_post_request
 from letta.schemas.tool import Tool, ToolCreate

@@ -477,3 +478,108 @@ def test_valid_schemas_with_pydantic_args_schema(openai_model: str, structured_o

    end_time = time.time()
    print(f"Total execution time: {end_time - start_time:.2f} seconds")
+
+
+# Google comment style validation tests
+
+
+# ---------- helpers ----------
+def _check(fn, expected_regex: str | None = None):
+    if expected_regex is None:
+        # should pass
+        validate_google_style_docstring(fn)
+    else:
+        with pytest.raises(ValueError, match=expected_regex):
+            validate_google_style_docstring(fn)
+
+
+# ---------- passing cases ----------
+def good_function(file_requests: list, close_all_others: bool = False) -> str:
+    """Open files.
+
+    Args:
+        file_requests (list): Requests.
+        close_all_others (bool): Flag.
+
+    Returns:
+        str: Status.
+    """
+    return "ok"
+
+
+def good_function_no_return(file_requests: list, close_all_others: bool = False) -> str:
+    """Open files.
+
+    Args:
+        file_requests (list): Requests.
+        close_all_others (bool): Flag.
+    """
+    return "ok"
+
+
+def agent_state_ok(agent_state, value: int) -> str:
+    """Ignores agent_state param.
+
+    Args:
+        value (int): Some value.
+
+    Returns:
+        str: Status.
+    """
+    return "ok"
+
+
+class Dummy:
+    def method(self, bar: int) -> str:  # keeps an explicit self
+        """Bound-method example.
+
+        Args:
+            bar (int): Number.
+
+        Returns:
+            str: Status.
+        """
+        return "ok"
+
+
+# ---------- failing cases ----------
+def no_doc(x: int) -> str:
+    return "fail"
+
+
+def no_args(x: int) -> str:
+    """Missing Args.
+
+    Returns:
+        str: Status.
+    """
+    return "fail"
+
+
+def missing_param_doc(x: int, y: int) -> str:
+    """Only one param documented.
+
+    Args:
+        x (int): X.
+
+    Returns:
+        str: Status.
+    """
+    return "fail"
+
+
+# ---------- parametrized test ----------
+@pytest.mark.parametrize(
+    "fn, regex",
+    [
+        (good_function, None),
+        (agent_state_ok, None),
+        (Dummy.method, None),  # unbound method keeps `self`
+        (good_function_no_return, None),
+        (no_doc, "has no docstring"),
+        (no_args, "must have 'Args:' section"),
+        (missing_param_doc, "parameter 'y' not documented"),
+    ],
+)
+def test_google_style_docstring_validation(fn, regex):
+    _check(fn, regex)
--- a/tests/test_tool_schema_parsing_files/list_of_pydantic_example.json
+++ b/tests/test_tool_schema_parsing_files/list_of_pydantic_example.json
@@ -1,6 +1,6 @@
 {
  "name": "create_task_plan",
-  "description": "It takes in a list of steps, and updates the task with the new steps provided.\nIf there are any current steps, they will be overwritten.\nEach step in the list should have the following format:\n{\n    \"name\": <string> -- Name of the step.\n    \"key\": <string> -- Unique identifier for the step.\n    \"description\": <string> -- An exhaustic description of what this step is trying to achieve and accomplish.\n}",
+  "description": "Creates a task plan for the current task.\n\nIt takes in a list of steps, and updates the task with the new steps provided.\nIf there are any current steps, they will be overwritten.\nEach step in the list should have the following format:\n{\n    \"name\": <string> -- Name of the step.\n    \"key\": <string> -- Unique identifier for the step.\n    \"description\": <string> -- An exhaustic description of what this step is trying to achieve and accomplish.\n}",
  "parameters": {
    "type": "object",
    "properties": {
--- a/tests/test_tool_schema_parsing_files/list_of_pydantic_example_so.json
+++ b/tests/test_tool_schema_parsing_files/list_of_pydantic_example_so.json
@@ -1,6 +1,6 @@
 {
  "name": "create_task_plan",
-  "description": "It takes in a list of steps, and updates the task with the new steps provided.\nIf there are any current steps, they will be overwritten.\nEach step in the list should have the following format:\n{\n    \"name\": <string> -- Name of the step.\n    \"key\": <string> -- Unique identifier for the step.\n    \"description\": <string> -- An exhaustic description of what this step is trying to achieve and accomplish.\n}",
+  "description": "Creates a task plan for the current task.\n\nIt takes in a list of steps, and updates the task with the new steps provided.\nIf there are any current steps, they will be overwritten.\nEach step in the list should have the following format:\n{\n    \"name\": <string> -- Name of the step.\n    \"key\": <string> -- Unique identifier for the step.\n    \"description\": <string> -- An exhaustic description of what this step is trying to achieve and accomplish.\n}",
  "strict": true,
  "parameters": {
    "type": "object",
--- a/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example.json
+++ b/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example.json
@@ -1,6 +1,6 @@
 {
  "name": "create_task_plan",
-  "description": "It takes in a list of steps, and updates the task with the new steps provided.\nIf there are any current steps, they will be overwritten.\nEach step in the list should have the following format:\n{\n    \"name\": <string> -- Name of the step.\n    \"key\": <string> -- Unique identifier for the step.\n    \"description\": <string> -- An exhaustic description of what this step is trying to achieve and accomplish.\n}",
+  "description": "Creates a task plan for the current task.\n\nIt takes in a list of steps, and updates the task with the new steps provided.\nIf there are any current steps, they will be overwritten.\nEach step in the list should have the following format:\n{\n    \"name\": <string> -- Name of the step.\n    \"key\": <string> -- Unique identifier for the step.\n    \"description\": <string> -- An exhaustic description of what this step is trying to achieve and accomplish.\n}",
  "parameters": {
    "type": "object",
    "properties": {
--- a/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example_so.json
+++ b/tests/test_tool_schema_parsing_files/nested_pydantic_as_arg_example_so.json
@@ -1,6 +1,6 @@
 {
  "name": "create_task_plan",
-  "description": "It takes in a list of steps, and updates the task with the new steps provided.\nIf there are any current steps, they will be overwritten.\nEach step in the list should have the following format:\n{\n    \"name\": <string> -- Name of the step.\n    \"key\": <string> -- Unique identifier for the step.\n    \"description\": <string> -- An exhaustic description of what this step is trying to achieve and accomplish.\n}",
+  "description": "Creates a task plan for the current task.\n\nIt takes in a list of steps, and updates the task with the new steps provided.\nIf there are any current steps, they will be overwritten.\nEach step in the list should have the following format:\n{\n    \"name\": <string> -- Name of the step.\n    \"key\": <string> -- Unique identifier for the step.\n    \"description\": <string> -- An exhaustic description of what this step is trying to achieve and accomplish.\n}",
  "strict": true,
  "parameters": {
    "type": "object",
--- a/tests/test_tool_schema_parsing_files/simple_d20.json
+++ b/tests/test_tool_schema_parsing_files/simple_d20.json
@@ -1,6 +1,6 @@
 {
  "name": "roll_d20",
-  "description": "This function generates a random integer between 1 and 20, inclusive,\nwhich represents the outcome of a single roll of a d20.",
+  "description": "Simulate the roll of a 20-sided die (d20).\n\nThis function generates a random integer between 1 and 20, inclusive,\nwhich represents the outcome of a single roll of a d20.",
  "parameters": {
    "type": "object",
    "properties": {},
--- a/tests/test_tool_schema_parsing_files/simple_d20_so.json
+++ b/tests/test_tool_schema_parsing_files/simple_d20_so.json
@@ -1,6 +1,6 @@
 {
  "name": "roll_d20",
-  "description": "This function generates a random integer between 1 and 20, inclusive,\nwhich represents the outcome of a single roll of a d20.",
+  "description": "Simulate the roll of a 20-sided die (d20).\n\nThis function generates a random integer between 1 and 20, inclusive,\nwhich represents the outcome of a single roll of a d20.",
  "strict": true,
  "parameters": {
    "type": "object",