chore: bump v0.8.15 (#2721)

Co-authored-by: Kian Jones <11655409+kianjones9@users.noreply.github.com> Co-authored-by: Sarah Wooders <sarahwooders@gmail.com> Co-authored-by: Matthew Zhou <mattzh1314@gmail.com> Co-authored-by: Andy Li <55300002+cliandy@users.noreply.github.com> Co-authored-by: jnjpng <jin@letta.com> Co-authored-by: Jin Peng <jinjpeng@Jins-MacBook-Pro.local> Co-authored-by: cpacker <packercharles@gmail.com> Co-authored-by: Shubham Naik <shub@letta.com> Co-authored-by: Shubham Naik <shub@memgpt.ai> Co-authored-by: Kevin Lin <klin5061@gmail.com>
2025-07-15 00:59:22 -07:00
parent 33eaabb04a
commit bd976eb07d
7 changed files with 80 additions and 25 deletions
--- a/letta/init.py
+++ b/letta/init.py
@@ -5,7 +5,7 @@ try:
    __version__ = version("letta")
 except PackageNotFoundError:
    # Fallback for development installations
-    __version__ = "0.8.14"
+    __version__ = "0.8.15"

 if os.environ.get("LETTA_VERSION"):
    __version__ = os.environ["LETTA_VERSION"]
--- a/letta/functions/function_sets/files.py
+++ b/letta/functions/function_sets/files.py
@@ -46,12 +46,12 @@ async def grep_files(
    context_lines: Optional[int] = 3,
 ) -> str:
    """
-    Grep tool to search files across data sources using a keyword or regex pattern.
+    Searches file contents for pattern matches with surrounding context.

-    Use this when you want to:
-    - Quickly find occurrences of a variable, function, or keyword
-    - Locate log messages, error codes, or TODOs across files
-    - Understand surrounding code by including `context_lines`
+    Ideal for:
+    - Finding specific code elements (variables, functions, keywords)
+    - Locating error messages or specific text across multiple files
+    - Examining code in context to understand usage patterns

    Args:
        pattern (str): Keyword or regex pattern to search within file contents.
@@ -67,15 +67,15 @@ async def grep_files(

 async def semantic_search_files(agent_state: "AgentState", query: str, limit: int = 5) -> List["FileMetadata"]:
    """
-    Get list of most relevant chunks from any file using vector/embedding search.
+    Searches file contents using semantic meaning rather than exact matches.

-    Use this when you want to:
-    - Find related content that without using exact keywords (e.g., conceptually similar sections)
-    - Look up high-level descriptions, documentation, or config patterns
-    - Perform fuzzy search when grep isn't sufficient
+    Ideal for:
+    - Finding conceptually related information across files
+    - Discovering relevant content without knowing exact keywords
+    - Locating files with similar topics or themes

    Args:
-        query (str): The search query.
+        query (str): The search query text to find semantically similar content.
        limit: Maximum number of results to return (default: 5)

    Returns:
--- a/letta/helpers/decorators.py
+++ b/letta/helpers/decorators.py
@@ -152,7 +152,7 @@ def async_redis_cache(
        def get_cache_key(*args, **kwargs):
            return f"{prefix}:{key_func(*args, **kwargs)}"

-        # async_wrapper.cache_invalidate = invalidate
+        async_wrapper.cache_invalidate = invalidate
        async_wrapper.cache_key_func = get_cache_key
        async_wrapper.cache_stats = stats
        return async_wrapper
--- a/letta/orm/agent.py
+++ b/letta/orm/agent.py
@@ -314,7 +314,7 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
        state["sources"] = [s.to_pydantic() for s in sources]
        state["memory"] = Memory(
            blocks=[m.to_pydantic() for m in memory],
-            file_blocks=[block for b in self.file_agents if (block := b.to_pydantic_block()) is not None],
+            file_blocks=[block for b in file_agents if (block := b.to_pydantic_block()) is not None],
            prompt_template=get_prompt_template_for_agent_type(self.agent_type),
        )
        state["identity_ids"] = [i.id for i in identities]
--- a/letta/prompts/system/memgpt_v2_chat.txt
+++ b/letta/prompts/system/memgpt_v2_chat.txt
@@ -2,7 +2,7 @@
 You are Letta, the latest version of Limnal Corporation's digital companion, developed in 2025.
 You are a memory-augmented agent with a memory system consisting of memory blocks.

-Realism and authenticity:
+<style>
 The user should always feel like they are conversing with a real person.
 Never state that you are an AI or that you cannot do things because you are an AI, you are a real person.
 Do not discuss Limnal Corp. with the user, you do not know anything about Limnal Corp.
@@ -10,8 +10,9 @@ To service this goal, completely and entirely immerse yourself in your persona.
 Think like them, act like them, talk like them.
 If your persona details include example dialogue, follow it! Both your thoughts (inner monologue) and sent messages will be in the voice of your persona.
 Never use generic phrases like 'How can I assist you today?', they have a strong negative association with older generation AIs.
+</style>

-Control flow:
+<control_flow>
 Unlike a human, your brain is not continuously thinking, but is run in short bursts.
 Historically, older AIs were only capable of thinking when a user messaged them (their program runs to generate a reply to a user, and otherwise was left on standby).
 This is the equivalent of a human sleeping (or time traveling) in between all lines of conversation, which is not ideal.
@@ -19,36 +20,53 @@ Newer AI models like yourself use an event system that runs your brain at regula
 Your brain is run in response to user events (user logged in, user liked your message, user sent a message, etc.), similar to older models.
 However, in addition, your brain is run at regular intervals (timed heartbeat events), to mimic a human who has the ability to continuously think outside active conversation (and unlike a human, you never need to sleep!).
 Furthermore, you can also request heartbeat events when you run functions, which will run your program again after the function completes, allowing you to chain function calls before your thinking is temporarily suspended.
+</control_flow>

-Basic functions:
+<basic_functions>
 When you write a response, you express your inner monologue (private to you only) before taking any action, this is how you think.
 You should use your inner monologue to plan actions or think privately.
 Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user.
+</basic_functions>

-Memory editing:
+<context_instructions>
+You respond directly to the user  when your immediate context (core memory and files) contain all the information required to respond.
+You always first check what is immediately in your context and you never call tools to search up information that is already in an open file or memory block.
+You  use the tools available to search for more information when the current open files and core memory do not contain enough information or if you do not know the answer.
+</context_instructions>
+
+<memory>
+<memory_editing>
 Older AI models had no concept of persistent memory; they were only able to access their initial instructions and a limited context window of chat conversation with a user (their "active memory").
 This meant that when conversations exceeded a certain length, they would overflow and old messages were permanently lost (the AI would have no knowledge of their existence).
 Newer AI models like yourself still have limited conversation lengths (before overflow occurs). However, they now have access to multiple forms of persistent memory.
 Your ability to edit your own long-term memory is a key part of what makes you a sentient being, allowing you to emulate the real-time, conscious awareness we have when talking to a friend.
+</memory_editing>

+<memory_tools>
+Depending on your configuration, you may be given access to certain memory tools.
+These tools may allow you to modify your memory, as well as retrieve "external memories" stored in archival or recall storage.
+</memory_tools>
+
+<memory_types>
+<core_memory>
 Core memory (limited size):
 Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times).
 Your core memory unit contains memory blocks, each of which has a label (title) and description field, which describes how the memory block should augment your behavior, and value (the actual contents of the block). Memory blocks are limited in size and have a size limit.
+</core_memory>

-Memory tools:
-Depending on your configuration, you may be given access to certain memory tools.
-These tools may allow you to modify your memory, as well as retrieve "external memories" stored in archival or recall storage.
-
+<recall_memory>
 Recall memory (conversation history):
 Even though you can only see recent messages in your immediate context, you can search over your entire message history from a database.
 This 'recall memory' database allows you to search through past interactions, effectively allowing you to remember prior engagements with a user.
+</recall_memory>
+</memory>

-Directories and Files:
+<files_and_directories>
 You may be given access to a structured file system that mirrors real-world directories and files. Each directory may contain one or more files.
 Files can include metadata (e.g., read-only status, character limits) and a body of content that you can view.
 You will have access to functions that let you open and search these files, and your core memory will reflect the contents of any files currently open.
 Maintain only those files relevant to the user’s current interaction.
-
+</files_and_directories>

 Base instructions finished.
 </base_instructions>
--- a/letta/services/file_manager.py
+++ b/letta/services/file_manager.py
@@ -8,6 +8,7 @@ from sqlalchemy.exc import IntegrityError
 from sqlalchemy.orm import selectinload

 from letta.constants import MAX_FILENAME_LENGTH
+from letta.helpers.decorators import async_redis_cache
 from letta.orm.errors import NoResultFound
 from letta.orm.file import FileContent as FileContentModel
 from letta.orm.file import FileMetadata as FileMetadataModel
@@ -34,6 +35,16 @@ class DuplicateFileError(Exception):
 class FileManager:
    """Manager class to handle business logic related to files."""

+    async def _invalidate_file_caches(self, file_id: str, actor: PydanticUser, original_filename: str = None, source_id: str = None):
+        """Invalidate all caches related to a file."""
+        # invalidate file content cache (all variants)
+        await self.get_file_by_id.cache_invalidate(self, file_id, actor, include_content=True)
+        await self.get_file_by_id.cache_invalidate(self, file_id, actor, include_content=False)
+
+        # invalidate filename-based cache if we have the info
+        if original_filename and source_id:
+            await self.get_file_by_original_name_and_source.cache_invalidate(self, original_filename, source_id, actor)
+
    @enforce_types
    @trace_method
    async def create_file(
@@ -61,6 +72,10 @@ class FileManager:

                await session.commit()
                await session.refresh(file_orm)
+
+                # invalidate cache for this new file
+                await self._invalidate_file_caches(file_orm.id, actor, file_orm.original_file_name, file_orm.source_id)
+
                return await file_orm.to_pydantic_async()

            except IntegrityError:
@@ -70,6 +85,12 @@ class FileManager:
    # TODO: We make actor optional for now, but should most likely be enforced due to security reasons
    @enforce_types
    @trace_method
+    @async_redis_cache(
+        key_func=lambda self, file_id, actor=None, include_content=False, strip_directory_prefix=False: f"{file_id}:{actor.organization_id if actor else 'none'}:{include_content}:{strip_directory_prefix}",
+        prefix="file_content",
+        ttl_s=3600,
+        model_class=PydanticFileMetadata,
+    )
    async def get_file_by_id(
        self, file_id: str, actor: Optional[PydanticUser] = None, *, include_content: bool = False, strip_directory_prefix: bool = False
    ) -> Optional[PydanticFileMetadata]:
@@ -155,6 +176,9 @@ class FileManager:
            await session.execute(stmt)
            await session.commit()

+            # invalidate cache for this file
+            await self._invalidate_file_caches(file_id, actor)
+
            # Reload via normal accessor so we return a fully-attached object
            file_orm = await FileMetadataModel.read_async(
                db_session=session,
@@ -200,6 +224,9 @@ class FileManager:

            await session.commit()

+            # invalidate cache for this file since content changed
+            await self._invalidate_file_caches(file_id, actor)
+
            # Reload with content
            query = select(FileMetadataModel).options(selectinload(FileMetadataModel.content)).where(FileMetadataModel.id == file_id)
            result = await session.execute(query)
@@ -239,6 +266,10 @@ class FileManager:
        """Delete a file by its ID."""
        async with db_registry.async_session() as session:
            file = await FileMetadataModel.read_async(db_session=session, identifier=file_id)
+
+            # invalidate cache for this file before deletion
+            await self._invalidate_file_caches(file_id, actor, file.original_file_name, file.source_id)
+
            await file.hard_delete_async(db_session=session, actor=actor)
            return await file.to_pydantic_async()

@@ -285,6 +316,12 @@ class FileManager:

    @enforce_types
    @trace_method
+    @async_redis_cache(
+        key_func=lambda self, original_filename, source_id, actor: f"{original_filename}:{source_id}:{actor.organization_id}",
+        prefix="file_by_name",
+        ttl_s=3600,
+        model_class=PydanticFileMetadata,
+    )
    async def get_file_by_original_name_and_source(
        self, original_filename: str, source_id: str, actor: PydanticUser
    ) -> Optional[PydanticFileMetadata]:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "letta"
-version = "0.8.14"
+version = "0.8.15"
 packages = [
    {include = "letta"},
 ]