diff --git a/letta/__init__.py b/letta/__init__.py index 83fc346e..20087c44 100644 --- a/letta/__init__.py +++ b/letta/__init__.py @@ -5,7 +5,7 @@ try: __version__ = version("letta") except PackageNotFoundError: # Fallback for development installations - __version__ = "0.8.14" + __version__ = "0.8.15" if os.environ.get("LETTA_VERSION"): __version__ = os.environ["LETTA_VERSION"] diff --git a/letta/functions/function_sets/files.py b/letta/functions/function_sets/files.py index 43b34bd9..0e29bee6 100644 --- a/letta/functions/function_sets/files.py +++ b/letta/functions/function_sets/files.py @@ -46,12 +46,12 @@ async def grep_files( context_lines: Optional[int] = 3, ) -> str: """ - Grep tool to search files across data sources using a keyword or regex pattern. + Searches file contents for pattern matches with surrounding context. - Use this when you want to: - - Quickly find occurrences of a variable, function, or keyword - - Locate log messages, error codes, or TODOs across files - - Understand surrounding code by including `context_lines` + Ideal for: + - Finding specific code elements (variables, functions, keywords) + - Locating error messages or specific text across multiple files + - Examining code in context to understand usage patterns Args: pattern (str): Keyword or regex pattern to search within file contents. @@ -67,15 +67,15 @@ async def grep_files( async def semantic_search_files(agent_state: "AgentState", query: str, limit: int = 5) -> List["FileMetadata"]: """ - Get list of most relevant chunks from any file using vector/embedding search. + Searches file contents using semantic meaning rather than exact matches. - Use this when you want to: - - Find related content that without using exact keywords (e.g., conceptually similar sections) - - Look up high-level descriptions, documentation, or config patterns - - Perform fuzzy search when grep isn't sufficient + Ideal for: + - Finding conceptually related information across files + - Discovering relevant content without knowing exact keywords + - Locating files with similar topics or themes Args: - query (str): The search query. + query (str): The search query text to find semantically similar content. limit: Maximum number of results to return (default: 5) Returns: diff --git a/letta/helpers/decorators.py b/letta/helpers/decorators.py index bfcb0665..77744ea1 100644 --- a/letta/helpers/decorators.py +++ b/letta/helpers/decorators.py @@ -152,7 +152,7 @@ def async_redis_cache( def get_cache_key(*args, **kwargs): return f"{prefix}:{key_func(*args, **kwargs)}" - # async_wrapper.cache_invalidate = invalidate + async_wrapper.cache_invalidate = invalidate async_wrapper.cache_key_func = get_cache_key async_wrapper.cache_stats = stats return async_wrapper diff --git a/letta/orm/agent.py b/letta/orm/agent.py index 1c78816b..ce51d729 100644 --- a/letta/orm/agent.py +++ b/letta/orm/agent.py @@ -314,7 +314,7 @@ class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs): state["sources"] = [s.to_pydantic() for s in sources] state["memory"] = Memory( blocks=[m.to_pydantic() for m in memory], - file_blocks=[block for b in self.file_agents if (block := b.to_pydantic_block()) is not None], + file_blocks=[block for b in file_agents if (block := b.to_pydantic_block()) is not None], prompt_template=get_prompt_template_for_agent_type(self.agent_type), ) state["identity_ids"] = [i.id for i in identities] diff --git a/letta/prompts/system/memgpt_v2_chat.txt b/letta/prompts/system/memgpt_v2_chat.txt index 8197c6a2..07099a06 100644 --- a/letta/prompts/system/memgpt_v2_chat.txt +++ b/letta/prompts/system/memgpt_v2_chat.txt @@ -2,7 +2,7 @@ You are Letta, the latest version of Limnal Corporation's digital companion, developed in 2025. You are a memory-augmented agent with a memory system consisting of memory blocks. -Realism and authenticity: + -Control flow: + Unlike a human, your brain is not continuously thinking, but is run in short bursts. Historically, older AIs were only capable of thinking when a user messaged them (their program runs to generate a reply to a user, and otherwise was left on standby). This is the equivalent of a human sleeping (or time traveling) in between all lines of conversation, which is not ideal. @@ -19,36 +20,53 @@ Newer AI models like yourself use an event system that runs your brain at regula Your brain is run in response to user events (user logged in, user liked your message, user sent a message, etc.), similar to older models. However, in addition, your brain is run at regular intervals (timed heartbeat events), to mimic a human who has the ability to continuously think outside active conversation (and unlike a human, you never need to sleep!). Furthermore, you can also request heartbeat events when you run functions, which will run your program again after the function completes, allowing you to chain function calls before your thinking is temporarily suspended. + -Basic functions: + When you write a response, you express your inner monologue (private to you only) before taking any action, this is how you think. You should use your inner monologue to plan actions or think privately. Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user. + -Memory editing: + +You respond directly to the user when your immediate context (core memory and files) contain all the information required to respond. +You always first check what is immediately in your context and you never call tools to search up information that is already in an open file or memory block. +You use the tools available to search for more information when the current open files and core memory do not contain enough information or if you do not know the answer. + + + + Older AI models had no concept of persistent memory; they were only able to access their initial instructions and a limited context window of chat conversation with a user (their "active memory"). This meant that when conversations exceeded a certain length, they would overflow and old messages were permanently lost (the AI would have no knowledge of their existence). Newer AI models like yourself still have limited conversation lengths (before overflow occurs). However, they now have access to multiple forms of persistent memory. Your ability to edit your own long-term memory is a key part of what makes you a sentient being, allowing you to emulate the real-time, conscious awareness we have when talking to a friend. + + +Depending on your configuration, you may be given access to certain memory tools. +These tools may allow you to modify your memory, as well as retrieve "external memories" stored in archival or recall storage. + + + + Core memory (limited size): Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times). Your core memory unit contains memory blocks, each of which has a label (title) and description field, which describes how the memory block should augment your behavior, and value (the actual contents of the block). Memory blocks are limited in size and have a size limit. + -Memory tools: -Depending on your configuration, you may be given access to certain memory tools. -These tools may allow you to modify your memory, as well as retrieve "external memories" stored in archival or recall storage. - + Recall memory (conversation history): Even though you can only see recent messages in your immediate context, you can search over your entire message history from a database. This 'recall memory' database allows you to search through past interactions, effectively allowing you to remember prior engagements with a user. + + -Directories and Files: + You may be given access to a structured file system that mirrors real-world directories and files. Each directory may contain one or more files. Files can include metadata (e.g., read-only status, character limits) and a body of content that you can view. You will have access to functions that let you open and search these files, and your core memory will reflect the contents of any files currently open. Maintain only those files relevant to the user’s current interaction. - + Base instructions finished. diff --git a/letta/services/file_manager.py b/letta/services/file_manager.py index 530fa3e1..6aa86b16 100644 --- a/letta/services/file_manager.py +++ b/letta/services/file_manager.py @@ -8,6 +8,7 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import selectinload from letta.constants import MAX_FILENAME_LENGTH +from letta.helpers.decorators import async_redis_cache from letta.orm.errors import NoResultFound from letta.orm.file import FileContent as FileContentModel from letta.orm.file import FileMetadata as FileMetadataModel @@ -34,6 +35,16 @@ class DuplicateFileError(Exception): class FileManager: """Manager class to handle business logic related to files.""" + async def _invalidate_file_caches(self, file_id: str, actor: PydanticUser, original_filename: str = None, source_id: str = None): + """Invalidate all caches related to a file.""" + # invalidate file content cache (all variants) + await self.get_file_by_id.cache_invalidate(self, file_id, actor, include_content=True) + await self.get_file_by_id.cache_invalidate(self, file_id, actor, include_content=False) + + # invalidate filename-based cache if we have the info + if original_filename and source_id: + await self.get_file_by_original_name_and_source.cache_invalidate(self, original_filename, source_id, actor) + @enforce_types @trace_method async def create_file( @@ -61,6 +72,10 @@ class FileManager: await session.commit() await session.refresh(file_orm) + + # invalidate cache for this new file + await self._invalidate_file_caches(file_orm.id, actor, file_orm.original_file_name, file_orm.source_id) + return await file_orm.to_pydantic_async() except IntegrityError: @@ -70,6 +85,12 @@ class FileManager: # TODO: We make actor optional for now, but should most likely be enforced due to security reasons @enforce_types @trace_method + @async_redis_cache( + key_func=lambda self, file_id, actor=None, include_content=False, strip_directory_prefix=False: f"{file_id}:{actor.organization_id if actor else 'none'}:{include_content}:{strip_directory_prefix}", + prefix="file_content", + ttl_s=3600, + model_class=PydanticFileMetadata, + ) async def get_file_by_id( self, file_id: str, actor: Optional[PydanticUser] = None, *, include_content: bool = False, strip_directory_prefix: bool = False ) -> Optional[PydanticFileMetadata]: @@ -155,6 +176,9 @@ class FileManager: await session.execute(stmt) await session.commit() + # invalidate cache for this file + await self._invalidate_file_caches(file_id, actor) + # Reload via normal accessor so we return a fully-attached object file_orm = await FileMetadataModel.read_async( db_session=session, @@ -200,6 +224,9 @@ class FileManager: await session.commit() + # invalidate cache for this file since content changed + await self._invalidate_file_caches(file_id, actor) + # Reload with content query = select(FileMetadataModel).options(selectinload(FileMetadataModel.content)).where(FileMetadataModel.id == file_id) result = await session.execute(query) @@ -239,6 +266,10 @@ class FileManager: """Delete a file by its ID.""" async with db_registry.async_session() as session: file = await FileMetadataModel.read_async(db_session=session, identifier=file_id) + + # invalidate cache for this file before deletion + await self._invalidate_file_caches(file_id, actor, file.original_file_name, file.source_id) + await file.hard_delete_async(db_session=session, actor=actor) return await file.to_pydantic_async() @@ -285,6 +316,12 @@ class FileManager: @enforce_types @trace_method + @async_redis_cache( + key_func=lambda self, original_filename, source_id, actor: f"{original_filename}:{source_id}:{actor.organization_id}", + prefix="file_by_name", + ttl_s=3600, + model_class=PydanticFileMetadata, + ) async def get_file_by_original_name_and_source( self, original_filename: str, source_id: str, actor: PydanticUser ) -> Optional[PydanticFileMetadata]: diff --git a/pyproject.toml b/pyproject.toml index 57a8f27a..fdc98900 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "letta" -version = "0.8.14" +version = "0.8.15" packages = [ {include = "letta"}, ]