From be274749fc010d092b9bc51ee4529473cae6a6e5 Mon Sep 17 00:00:00 2001 From: mlong93 <35275280+mlong93@users.noreply.github.com> Date: Thu, 12 Dec 2024 11:36:26 -0800 Subject: [PATCH] feat: archival memory file upload (#2228) Co-authored-by: Mindy Long Co-authored-by: Sarah Wooders --- letta/agent.py | 3 +- letta/data_sources/connectors.py | 8 +- letta/server/server.py | 16 +- tests/test_server.py | 389 ++++++++++++++++++++++++++++++- 4 files changed, 400 insertions(+), 16 deletions(-) diff --git a/letta/agent.py b/letta/agent.py index 98e6f9ff..4060c363 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -1371,10 +1371,9 @@ class Agent(BaseAgent): # TODO: recall memory raise NotImplementedError() - def attach_source(self, user: PydanticUser, source_id: str, source_manager: SourceManager, ms: MetadataStore): + def attach_source(self, user: PydanticUser, source_id: str, source_manager: SourceManager, ms: MetadataStore, page_size: Optional[int] = None): """Attach data with name `source_name` to the agent from source_connector.""" # TODO: eventually, adding a data source should just give access to the retriever the source table, rather than modifying archival memory - page_size = 100 passages = self.passage_manager.list_passages(actor=user, source_id=source_id, limit=page_size) for passage in passages: diff --git a/letta/data_sources/connectors.py b/letta/data_sources/connectors.py index 741e46a9..f9fdd261 100644 --- a/letta/data_sources/connectors.py +++ b/letta/data_sources/connectors.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterator, List, Tuple, Optional +from typing import Dict, Iterator, List, Tuple import typer @@ -13,8 +13,6 @@ from letta.schemas.passage import Passage from letta.schemas.source import Source from letta.services.passage_manager import PassageManager from letta.services.source_manager import SourceManager -from letta.utils import create_uuid_from_string - class DataConnector: """ @@ -42,7 +40,7 @@ class DataConnector: """ -def load_data(connector: DataConnector, source: Source, passage_manager: PassageManager, source_manager: SourceManager, actor: "User", agent_id: Optional[str] = None): +def load_data(connector: DataConnector, source: Source, passage_manager: PassageManager, source_manager: SourceManager, actor: "User"): """Load data from a connector (generates file and passages) into a specified source_id, associated with a user_id.""" embedding_config = source.embedding_config @@ -79,10 +77,8 @@ def load_data(connector: DataConnector, source: Source, passage_manager: Passage continue passage = Passage( - id=create_uuid_from_string(f"{str(source.id)}_{passage_text}"), text=passage_text, file_id=file_metadata.id, - agent_id=agent_id, source_id=source.id, metadata_=passage_metadata, organization_id=source.organization_id, diff --git a/letta/server/server.py b/letta/server/server.py index 6dd8d65b..80a1335a 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -1584,6 +1584,8 @@ class SyncServer(Server): from letta.data_sources.connectors import DirectoryConnector source = self.source_manager.get_source_by_id(source_id=source_id) + if source is None: + raise ValueError(f"Source {source_id} does not exist") connector = DirectoryConnector(input_files=[file_path]) num_passages, num_documents = self.load_data(user_id=source.created_by_id, source_name=source.name, connector=connector) @@ -1593,6 +1595,15 @@ class SyncServer(Server): job.metadata_["num_documents"] = num_documents self.job_manager.update_job_by_id(job_id=job_id, job_update=JobUpdate(**job.model_dump()), actor=actor) + # update all agents who have this source attached + agent_ids = self.ms.list_attached_agents(source_id=source_id) + for agent_id in agent_ids: + agent = self.load_agent(agent_id=agent_id) + curr_passage_size = self.passage_manager.size(actor=actor, agent_id=agent_id, source_id=source_id) + agent.attach_source(user=actor, source_id=source_id, source_manager=self.source_manager, ms=self.ms) + new_passage_size = self.passage_manager.size(actor=actor, agent_id=agent_id, source_id=source_id) + assert new_passage_size >= curr_passage_size # in case empty files are added + return job def load_data( @@ -1600,7 +1611,6 @@ class SyncServer(Server): user_id: str, connector: DataConnector, source_name: str, - agent_id: Optional[str] = None, ) -> Tuple[int, int]: """Load data from a DataConnector into a source for a specified user_id""" # TODO: this should be implemented as a batch job or at least async, since it may take a long time @@ -1612,9 +1622,7 @@ class SyncServer(Server): raise ValueError(f"Data source {source_name} does not exist for user {user_id}") # load data into the document store - passage_count, document_count = load_data( - connector, source, self.passage_manager, self.source_manager, actor=user, agent_id=agent_id - ) + passage_count, document_count = load_data(connector, source, self.passage_manager, self.source_manager, actor=user) return passage_count, document_count def attach_source_to_agent( diff --git a/tests/test_server.py b/tests/test_server.py index 555b7c07..ee745c5c 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -25,13 +25,246 @@ utils.DEBUG = True from letta.config import LettaConfig from letta.schemas.agent import CreateAgent from letta.schemas.embedding_config import EmbeddingConfig +from letta.schemas.job import Job as PydanticJob from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message -from letta.schemas.source import Source +from letta.schemas.source import Source as PydanticSource from letta.server.server import SyncServer from .utils import DummyDataConnector +WAR_AND_PEACE = """BOOK ONE: 1805 + +CHAPTER I + +“Well, Prince, so Genoa and Lucca are now just family estates of the +Buonapartes. But I warn you, if you don't tell me that this means war, +if you still try to defend the infamies and horrors perpetrated by that +Antichrist—I really believe he is Antichrist—I will have nothing +more to do with you and you are no longer my friend, no longer my +'faithful slave,' as you call yourself! But how do you do? I see I +have frightened you—sit down and tell me all the news.” + +It was in July, 1805, and the speaker was the well-known Anna Pávlovna +Schérer, maid of honor and favorite of the Empress Márya Fëdorovna. +With these words she greeted Prince Vasíli Kurágin, a man of high +rank and importance, who was the first to arrive at her reception. Anna +Pávlovna had had a cough for some days. She was, as she said, suffering +from la grippe; grippe being then a new word in St. Petersburg, used +only by the elite. + +All her invitations without exception, written in French, and delivered +by a scarlet-liveried footman that morning, ran as follows: + +“If you have nothing better to do, Count (or Prince), and if the +prospect of spending an evening with a poor invalid is not too terrible, +I shall be very charmed to see you tonight between 7 and 10—Annette +Schérer.” + +“Heavens! what a virulent attack!” replied the prince, not in the +least disconcerted by this reception. He had just entered, wearing an +embroidered court uniform, knee breeches, and shoes, and had stars on +his breast and a serene expression on his flat face. He spoke in that +refined French in which our grandfathers not only spoke but thought, and +with the gentle, patronizing intonation natural to a man of importance +who had grown old in society and at court. He went up to Anna Pávlovna, +kissed her hand, presenting to her his bald, scented, and shining head, +and complacently seated himself on the sofa. + +“First of all, dear friend, tell me how you are. Set your friend's +mind at rest,” said he without altering his tone, beneath the +politeness and affected sympathy of which indifference and even irony +could be discerned. + +“Can one be well while suffering morally? Can one be calm in times +like these if one has any feeling?” said Anna Pávlovna. “You are +staying the whole evening, I hope?” + +“And the fete at the English ambassador's? Today is Wednesday. I +must put in an appearance there,” said the prince. “My daughter is +coming for me to take me there.” + +“I thought today's fete had been canceled. I confess all these +festivities and fireworks are becoming wearisome.” + +“If they had known that you wished it, the entertainment would have +been put off,” said the prince, who, like a wound-up clock, by force +of habit said things he did not even wish to be believed. + +“Don't tease! Well, and what has been decided about Novosíltsev's +dispatch? You know everything.” + +“What can one say about it?” replied the prince in a cold, listless +tone. “What has been decided? They have decided that Buonaparte has +burnt his boats, and I believe that we are ready to burn ours.” + +Prince Vasíli always spoke languidly, like an actor repeating a stale +part. Anna Pávlovna Schérer on the contrary, despite her forty years, +overflowed with animation and impulsiveness. To be an enthusiast had +become her social vocation and, sometimes even when she did not +feel like it, she became enthusiastic in order not to disappoint the +expectations of those who knew her. The subdued smile which, though it +did not suit her faded features, always played round her lips expressed, +as in a spoiled child, a continual consciousness of her charming defect, +which she neither wished, nor could, nor considered it necessary, to +correct. + +In the midst of a conversation on political matters Anna Pávlovna burst +out: + +“Oh, don't speak to me of Austria. Perhaps I don't understand +things, but Austria never has wished, and does not wish, for war. She +is betraying us! Russia alone must save Europe. Our gracious sovereign +recognizes his high vocation and will be true to it. That is the one +thing I have faith in! Our good and wonderful sovereign has to perform +the noblest role on earth, and he is so virtuous and noble that God will +not forsake him. He will fulfill his vocation and crush the hydra of +revolution, which has become more terrible than ever in the person of +this murderer and villain! We alone must avenge the blood of the just +one.... Whom, I ask you, can we rely on?... England with her commercial +spirit will not and cannot understand the Emperor Alexander's +loftiness of soul. She has refused to evacuate Malta. She wanted to +find, and still seeks, some secret motive in our actions. What answer +did Novosíltsev get? None. The English have not understood and cannot +understand the self-abnegation of our Emperor who wants nothing for +himself, but only desires the good of mankind. And what have they +promised? Nothing! And what little they have promised they will not +perform! Prussia has always declared that Buonaparte is invincible, and +that all Europe is powerless before him.... And I don't believe a +word that Hardenburg says, or Haugwitz either. This famous Prussian +neutrality is just a trap. I have faith only in God and the lofty +destiny of our adored monarch. He will save Europe!” + +She suddenly paused, smiling at her own impetuosity. + +“I think,” said the prince with a smile, “that if you had been +sent instead of our dear Wintzingerode you would have captured the King +of Prussia's consent by assault. You are so eloquent. Will you give me +a cup of tea?” + +“In a moment. À propos,” she added, becoming calm again, “I am +expecting two very interesting men tonight, le Vicomte de Mortemart, who +is connected with the Montmorencys through the Rohans, one of the best +French families. He is one of the genuine émigrés, the good ones. And +also the Abbé Morio. Do you know that profound thinker? He has been +received by the Emperor. Had you heard?” + +“I shall be delighted to meet them,” said the prince. “But +tell me,” he added with studied carelessness as if it had only just +occurred to him, though the question he was about to ask was the chief +motive of his visit, “is it true that the Dowager Empress wants +Baron Funke to be appointed first secretary at Vienna? The baron by all +accounts is a poor creature.” + +Prince Vasíli wished to obtain this post for his son, but others were +trying through the Dowager Empress Márya Fëdorovna to secure it for +the baron. + +Anna Pávlovna almost closed her eyes to indicate that neither she nor +anyone else had a right to criticize what the Empress desired or was +pleased with. + +“Baron Funke has been recommended to the Dowager Empress by her +sister,” was all she said, in a dry and mournful tone. + +As she named the Empress, Anna Pávlovna's face suddenly assumed an +expression of profound and sincere devotion and respect mingled with +sadness, and this occurred every time she mentioned her illustrious +patroness. She added that Her Majesty had deigned to show Baron Funke +beaucoup d'estime, and again her face clouded over with sadness. + +The prince was silent and looked indifferent. But, with the womanly and +courtierlike quickness and tact habitual to her, Anna Pávlovna +wished both to rebuke him (for daring to speak as he had done of a man +recommended to the Empress) and at the same time to console him, so she +said: + +“Now about your family. Do you know that since your daughter came +out everyone has been enraptured by her? They say she is amazingly +beautiful.” + +The prince bowed to signify his respect and gratitude. + +“I often think,” she continued after a short pause, drawing nearer +to the prince and smiling amiably at him as if to show that political +and social topics were ended and the time had come for intimate +conversation—“I often think how unfairly sometimes the joys of life +are distributed. Why has fate given you two such splendid children? +I don't speak of Anatole, your youngest. I don't like him,” she +added in a tone admitting of no rejoinder and raising her eyebrows. +“Two such charming children. And really you appreciate them less than +anyone, and so you don't deserve to have them.” + +And she smiled her ecstatic smile. + +“I can't help it,” said the prince. “Lavater would have said I +lack the bump of paternity.” + +“Don't joke; I mean to have a serious talk with you. Do you know +I am dissatisfied with your younger son? Between ourselves” (and her +face assumed its melancholy expression), “he was mentioned at Her +Majesty's and you were pitied....” + +The prince answered nothing, but she looked at him significantly, +awaiting a reply. He frowned. + +“What would you have me do?” he said at last. “You know I did all +a father could for their education, and they have both turned out fools. +Hippolyte is at least a quiet fool, but Anatole is an active one. That +is the only difference between them.” He said this smiling in a way +more natural and animated than usual, so that the wrinkles round +his mouth very clearly revealed something unexpectedly coarse and +unpleasant. + +“And why are children born to such men as you? If you were not a +father there would be nothing I could reproach you with,” said Anna +Pávlovna, looking up pensively. + +“I am your faithful slave and to you alone I can confess that my +children are the bane of my life. It is the cross I have to bear. That +is how I explain it to myself. It can't be helped!” + +He said no more, but expressed his resignation to cruel fate by a +gesture. Anna Pávlovna meditated. + +“Have you never thought of marrying your prodigal son Anatole?” she +asked. “They say old maids have a mania for matchmaking, and though I +don't feel that weakness in myself as yet, I know a little person who +is very unhappy with her father. She is a relation of yours, Princess +Mary Bolkónskaya.” + +Prince Vasíli did not reply, though, with the quickness of memory and +perception befitting a man of the world, he indicated by a movement of +the head that he was considering this information. + +“Do you know,” he said at last, evidently unable to check the sad +current of his thoughts, “that Anatole is costing me forty thousand +rubles a year? And,” he went on after a pause, “what will it be in +five years, if he goes on like this?” Presently he added: “That's +what we fathers have to put up with.... Is this princess of yours +rich?” + +“Her father is very rich and stingy. He lives in the country. He is +the well-known Prince Bolkónski who had to retire from the army under +the late Emperor, and was nicknamed 'the King of Prussia.' He is +very clever but eccentric, and a bore. The poor girl is very unhappy. +She has a brother; I think you know him, he married Lise Meinen lately. +He is an aide-de-camp of Kutúzov's and will be here tonight.” + +“Listen, dear Annette,” said the prince, suddenly taking Anna +Pávlovna's hand and for some reason drawing it downwards. “Arrange +that affair for me and I shall always be your most devoted slave-slafe +with an f, as a village elder of mine writes in his reports. She is rich +and of good family and that's all I want.” + +And with the familiarity and easy grace peculiar to him, he raised the +maid of honor's hand to his lips, kissed it, and swung it to and fro +as he lay back in his armchair, looking in another direction. + +“Attendez,” said Anna Pávlovna, reflecting, “I'll speak to +Lise, young Bolkónski's wife, this very evening, and perhaps the +thing can be arranged. It shall be on your family's behalf that I'll +start my apprenticeship as old maid.""" @pytest.fixture(scope="module") def server(): @@ -87,6 +320,24 @@ def agent_id(server, user_id): # cleanup server.delete_agent(user_id, agent_state.id) +@pytest.fixture(scope="module") +def other_agent_id(server, user_id): + # create agent + agent_state = server.create_agent( + request=CreateAgent( + name="test_agent_other", + tools=BASE_TOOLS, + memory_blocks=[], + llm_config=LLMConfig.default_config("gpt-4"), + embedding_config=EmbeddingConfig.default_config(provider="openai"), + ), + actor=server.get_user_or_default(user_id), + ) + print(f"Created agent\n{agent_state}") + yield agent_state.id + + # cleanup + server.delete_agent(user_id, agent_state.id) def test_error_on_nonexistent_agent(server, user_id, agent_id): try: @@ -121,7 +372,7 @@ def test_load_data(server, user_id, agent_id): assert len(passages_before) == 0 source = server.source_manager.create_source( - Source(name="test_source", embedding_config=DEFAULT_EMBEDDING_CONFIG), actor=server.default_user + PydanticSource(name="test_source", embedding_config=EmbeddingConfig.default_config(provider="openai")), actor=server.default_user ) # load data @@ -133,7 +384,7 @@ def test_load_data(server, user_id, agent_id): "Shishir loves indian food", ] connector = DummyDataConnector(archival_memories) - server.load_data(user_id, connector, source.name, agent_id=agent_id) + server.load_data(user_id, connector, source.name) # @pytest.mark.order(3) # def test_attach_source_to_agent(server, user_id, agent_id): @@ -168,7 +419,6 @@ def test_get_recall_memory(server, org_id, user_id, agent_id): messages_1 = server.get_agent_recall_cursor(user_id=user_id, agent_id=agent_id, limit=2) cursor1 = messages_1[-1].id messages_2 = server.get_agent_recall_cursor(user_id=user_id, agent_id=agent_id, after=cursor1, limit=1000) - messages_2[-1].id messages_3 = server.get_agent_recall_cursor(user_id=user_id, agent_id=agent_id, limit=1000) messages_3[-1].id assert messages_3[-1].created_at >= messages_3[0].created_at @@ -691,3 +941,134 @@ def test_memory_rebuild_count(server, user_id, mock_e2b_api_key_none): finally: # cleanup server.delete_agent(user_id, agent_state.id) + + +def test_load_file_to_source(server: SyncServer, user_id: str, agent_id: str, other_agent_id: str, tmp_path): + user = server.get_user_or_default(user_id) + + # Create a source + source = server.source_manager.create_source( + PydanticSource( + name="timber_source", + embedding_config=EmbeddingConfig.default_config(provider="openai"), + created_by_id=user_id, + ), + actor=user + ) + + # Create a test file with some content + test_file = tmp_path / "test.txt" + test_content = "We have a dog called Timber. He likes to sleep and eat chicken." + test_file.write_text(test_content) + + # Attach source to agent first + agent = server.load_agent(agent_id=agent_id) + agent.attach_source(user=user, source_id=source.id, source_manager=server.source_manager, ms=server.ms) + + # Get initial passage count + initial_passage_count = server.passage_manager.size(actor=user, agent_id=agent_id, source_id=source.id) + assert initial_passage_count == 0 + + # Create a job for loading the first file + job = server.job_manager.create_job( + PydanticJob( + user_id=user_id, + metadata_={"type": "embedding", "filename": test_file.name, "source_id": source.id}, + ), + actor=user + ) + + # Load the first file to source + server.load_file_to_source( + source_id=source.id, + file_path=str(test_file), + job_id=job.id, + actor=user, + ) + + # Verify job completed successfully + job = server.job_manager.get_job_by_id(job_id=job.id, actor=user) + assert job.status == "completed" + assert job.metadata_["num_passages"] == 1 + assert job.metadata_["num_documents"] == 1 + + # Verify passages were added + first_file_passage_count = server.passage_manager.size(actor=user, agent_id=agent_id, source_id=source.id) + assert first_file_passage_count > initial_passage_count + + # Create a second test file with different content + test_file2 = tmp_path / "test2.txt" + test_file2.write_text(WAR_AND_PEACE) + + # Create a job for loading the second file + job2 = server.job_manager.create_job( + PydanticJob( + user_id=user_id, + metadata_={"type": "embedding", "filename": test_file2.name, "source_id": source.id}, + ), + actor=user + ) + + # Load the second file to source + server.load_file_to_source( + source_id=source.id, + file_path=str(test_file2), + job_id=job2.id, + actor=user, + ) + + # Verify second job completed successfully + job2 = server.job_manager.get_job_by_id(job_id=job2.id, actor=user) + assert job2.status == "completed" + assert job2.metadata_["num_passages"] >= 10 + assert job2.metadata_["num_documents"] == 1 + + # Verify passages were appended (not replaced) + final_passage_count = server.passage_manager.size(actor=user, agent_id=agent_id, source_id=source.id) + assert final_passage_count > first_file_passage_count + + # Verify both old and new content is searchable + passages = server.passage_manager.list_passages( + actor=user, + agent_id=agent_id, + source_id=source.id, + query_text="what does Timber like to eat", + embedding_config=EmbeddingConfig.default_config(provider="openai"), + embed_query=True, + ) + assert len(passages) == final_passage_count + assert any("chicken" in passage.text.lower() for passage in passages) + assert any("Anna".lower() in passage.text.lower() for passage in passages) + + # TODO: Add this test back in after separation of `Passage tables` (LET-449) + # # Load second agent + # agent2 = server.load_agent(agent_id=other_agent_id) + + # # Initially should have no passages + # initial_agent2_passages = server.passage_manager.size(actor=user, agent_id=other_agent_id, source_id=source.id) + # assert initial_agent2_passages == 0 + + # # Attach source to second agent + # agent2.attach_source(user=user, source_id=source.id, source_manager=server.source_manager, ms=server.ms) + + # # Verify second agent has same number of passages as first agent + # agent2_passages = server.passage_manager.size(actor=user, agent_id=other_agent_id, source_id=source.id) + # agent1_passages = server.passage_manager.size(actor=user, agent_id=agent_id, source_id=source.id) + # assert agent2_passages == agent1_passages + + # # Verify second agent can query the same content + # passages2 = server.passage_manager.list_passages( + # actor=user, + # agent_id=other_agent_id, + # source_id=source.id, + # query_text="what does Timber like to eat", + # embedding_config=EmbeddingConfig.default_config(provider="openai"), + # embed_query=True, + # limit=10, + # ) + # assert len(passages2) == len(passages) + # assert any("chicken" in passage.text.lower() for passage in passages2) + # assert any("sleep" in passage.text.lower() for passage in passages2) + + # # Cleanup + # server.delete_agent(user_id=user_id, agent_id=agent2_state.id)