1632 lines
70 KiB
Python
1632 lines
70 KiB
Python
import asyncio
|
|
import os
|
|
import re
|
|
import tempfile
|
|
import threading
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from typing import Any
|
|
|
|
import pytest
|
|
from dotenv import load_dotenv
|
|
from letta_client import Letta as LettaSDKClient
|
|
from letta_client.types import CreateBlockParam
|
|
from letta_client.types.agent_state import AgentState
|
|
|
|
from letta.constants import DEFAULT_ORG_ID, FILES_TOOLS
|
|
from letta.helpers.pinecone_utils import should_use_pinecone
|
|
from letta.helpers.tpuf_client import TurbopufferClient
|
|
from letta.schemas.enums import FileProcessingStatus, ToolType
|
|
from letta.schemas.message import MessageCreate
|
|
from letta.schemas.user import User
|
|
from letta.settings import settings
|
|
from tests.helpers.utils import upload_file_and_wait, upload_file_and_wait_list_files
|
|
from tests.utils import wait_for_server
|
|
|
|
# Constants
|
|
SERVER_PORT = 8283
|
|
|
|
|
|
def get_raw_system_message(client: LettaSDKClient, agent_id: str) -> str:
|
|
"""Helper function to get the raw system message from an agent's preview payload."""
|
|
raw_payload = client.post(
|
|
f"/v1/agents/{agent_id}/messages/preview-raw-payload",
|
|
cast_to=dict[str, Any],
|
|
body={
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "Testing",
|
|
}
|
|
],
|
|
},
|
|
)
|
|
return raw_payload["messages"][0]["content"]
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def clear_sources(client: LettaSDKClient):
|
|
# Clear existing sources
|
|
for source in list(client.folders.list()):
|
|
client.folders.delete(folder_id=source.id)
|
|
|
|
|
|
def run_server():
|
|
load_dotenv()
|
|
|
|
from letta.server.rest_api.app import start_server
|
|
|
|
print("Starting server...")
|
|
start_server(debug=True)
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def client() -> LettaSDKClient:
|
|
# Get URL from environment or start server
|
|
server_url = os.getenv("LETTA_SERVER_URL", f"http://localhost:{SERVER_PORT}")
|
|
if not os.getenv("LETTA_SERVER_URL"):
|
|
print("Starting server thread")
|
|
thread = threading.Thread(target=run_server, daemon=True)
|
|
thread.start()
|
|
wait_for_server(server_url)
|
|
print("Running client tests with server:", server_url)
|
|
client = LettaSDKClient(base_url=server_url)
|
|
client.tools.upsert_base_tools()
|
|
yield client
|
|
|
|
|
|
@pytest.fixture
|
|
def agent_state(disable_pinecone, client: LettaSDKClient):
|
|
open_file_tool = list(client.tools.list(name="open_files"))[0]
|
|
search_files_tool = list(client.tools.list(name="semantic_search_files"))[0]
|
|
grep_tool = list(client.tools.list(name="grep_files"))[0]
|
|
|
|
agent_state = client.agents.create(
|
|
name="test_sources_agent",
|
|
memory_blocks=[
|
|
CreateBlockParam(
|
|
label="human",
|
|
value="username: sarah",
|
|
),
|
|
],
|
|
model="openai/gpt-4o-mini",
|
|
embedding="openai/text-embedding-3-small",
|
|
tool_ids=[open_file_tool.id, search_files_tool.id, grep_tool.id],
|
|
)
|
|
yield agent_state
|
|
|
|
|
|
# Tests
|
|
|
|
|
|
def test_auto_attach_detach_files_tools(disable_pinecone, disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test automatic attachment and detachment of file tools when managing agent sources."""
|
|
# Create agent with basic configuration
|
|
agent = client.agents.create(
|
|
memory_blocks=[
|
|
CreateBlockParam(label="human", value="username: sarah"),
|
|
],
|
|
model="openai/gpt-4o-mini",
|
|
embedding="openai/text-embedding-3-small",
|
|
)
|
|
|
|
# Helper function to get file tools from agent
|
|
def get_file_tools(agent_state):
|
|
return {tool.name for tool in agent_state.tools if tool.tool_type == ToolType.LETTA_FILES_CORE}
|
|
|
|
# Helper function to assert file tools presence
|
|
def assert_file_tools_present(agent_state, expected_tools):
|
|
actual_tools = get_file_tools(agent_state)
|
|
assert actual_tools == expected_tools, f"File tools mismatch.\nExpected: {expected_tools}\nFound: {actual_tools}"
|
|
|
|
# Helper function to assert no file tools
|
|
def assert_no_file_tools(agent_state):
|
|
has_file_tools = any(tool.tool_type == ToolType.LETTA_FILES_CORE for tool in agent_state.tools)
|
|
assert not has_file_tools, "File tools should not be present"
|
|
|
|
# Initial state: no file tools
|
|
assert_no_file_tools(agent)
|
|
|
|
# Create and attach first source
|
|
source_1 = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
assert len(list(client.folders.list())) == 1
|
|
|
|
client.agents.folders.attach(folder_id=source_1.id, agent_id=agent.id)
|
|
agent = client.agents.retrieve(agent_id=agent.id, include=["agent.sources", "agent.tools"])
|
|
assert len(agent.sources) == 1
|
|
assert_file_tools_present(agent, set(FILES_TOOLS))
|
|
|
|
# Create and attach second source
|
|
source_2 = client.folders.create(name="another_test_source", embedding="openai/text-embedding-3-small")
|
|
assert len(list(client.folders.list())) == 2
|
|
|
|
client.agents.folders.attach(folder_id=source_2.id, agent_id=agent.id)
|
|
agent = client.agents.retrieve(agent_id=agent.id, include=["agent.sources", "agent.tools"])
|
|
assert len(agent.sources) == 2
|
|
# File tools should remain after attaching second source
|
|
assert_file_tools_present(agent, set(FILES_TOOLS))
|
|
|
|
# Detach second source - tools should remain (first source still attached)
|
|
client.agents.folders.detach(folder_id=source_2.id, agent_id=agent.id)
|
|
agent = client.agents.retrieve(agent_id=agent.id, include=["agent.sources", "agent.tools"])
|
|
assert_file_tools_present(agent, set(FILES_TOOLS))
|
|
|
|
# Detach first source - all file tools should be removed
|
|
client.agents.folders.detach(folder_id=source_1.id, agent_id=agent.id)
|
|
agent = client.agents.retrieve(agent_id=agent.id, include=["agent.sources", "agent.tools"])
|
|
assert_no_file_tools(agent)
|
|
|
|
|
|
@pytest.mark.parametrize("use_mistral_parser", [True, False])
|
|
@pytest.mark.parametrize(
|
|
"file_path, expected_value, expected_label_regex",
|
|
[
|
|
("tests/data/test.txt", "test", r"test_source/test\.txt"),
|
|
("tests/data/memgpt_paper.pdf", "MemGPT", r"test_source/memgpt_paper\.pdf"),
|
|
("tests/data/toy_chat_fine_tuning.jsonl", '{"messages"', r"test_source/toy_chat_fine_tuning\.jsonl"),
|
|
("tests/data/test.md", "h2 Heading", r"test_source/test\.md"),
|
|
("tests/data/test.json", "glossary", r"test_source/test\.json"),
|
|
("tests/data/react_component.jsx", "UserProfile", r"test_source/react_component\.jsx"),
|
|
("tests/data/task_manager.java", "TaskManager", r"test_source/task_manager\.java"),
|
|
("tests/data/data_structures.cpp", "BinarySearchTree", r"test_source/data_structures\.cpp"),
|
|
("tests/data/api_server.go", "UserService", r"test_source/api_server\.go"),
|
|
("tests/data/data_analysis.py", "StatisticalAnalyzer", r"test_source/data_analysis\.py"),
|
|
("tests/data/test.csv", "Smart Fridge Plus", r"test_source/test\.csv"),
|
|
],
|
|
)
|
|
def test_file_upload_creates_source_blocks_correctly(
|
|
disable_pinecone,
|
|
disable_turbopuffer,
|
|
client: LettaSDKClient,
|
|
agent_state: AgentState,
|
|
file_path: str,
|
|
expected_value: str,
|
|
expected_label_regex: str,
|
|
use_mistral_parser: bool,
|
|
):
|
|
# Override mistral API key setting to force parser selection for testing
|
|
original_mistral_key = settings.mistral_api_key
|
|
try:
|
|
if not use_mistral_parser:
|
|
# Set to None to force markitdown parser selection
|
|
settings.mistral_api_key = None
|
|
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
assert len(list(client.folders.list())) == 1
|
|
|
|
# Attach
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Upload the file
|
|
upload_file_and_wait(client, source.id, file_path)
|
|
|
|
# Get uploaded files
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].source_id == source.id
|
|
|
|
# Check that blocks were created
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
assert len(blocks) == 1
|
|
assert any(expected_value in b.value for b in blocks)
|
|
assert any(b.value.startswith("[Viewing file start") for b in blocks)
|
|
assert any(re.fullmatch(expected_label_regex, b.label) for b in blocks)
|
|
|
|
# verify raw system message contains source information
|
|
raw_system_message = get_raw_system_message(client, agent_state.id)
|
|
assert "test_source" in raw_system_message
|
|
assert "<directories>" in raw_system_message
|
|
# verify file-specific details in raw system message
|
|
file_name = files[0].file_name
|
|
assert f'name="test_source/{file_name}"' in raw_system_message
|
|
assert 'status="open"' in raw_system_message
|
|
|
|
# Remove file from source
|
|
client.folders.files.delete(folder_id=source.id, file_id=files[0].id)
|
|
|
|
# Confirm blocks were removed
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
assert len(blocks) == 0
|
|
assert not any(expected_value in b.value for b in blocks)
|
|
assert not any(re.fullmatch(expected_label_regex, b.label) for b in blocks)
|
|
|
|
# verify raw system message no longer contains source information
|
|
raw_system_message_after_removal = get_raw_system_message(client, agent_state.id)
|
|
# this should be in, because we didn't delete the source
|
|
assert "test_source" in raw_system_message_after_removal
|
|
assert "<directories>" in raw_system_message_after_removal
|
|
# verify file-specific details are also removed
|
|
assert f'name="test_source/{file_name}"' not in raw_system_message_after_removal
|
|
|
|
finally:
|
|
# Restore original mistral API key setting
|
|
settings.mistral_api_key = original_mistral_key
|
|
|
|
|
|
def test_attach_existing_files_creates_source_blocks_correctly(
|
|
disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState
|
|
):
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
assert len(list(client.folders.list())) == 1
|
|
|
|
# Load files into the source
|
|
file_path = "tests/data/test.txt"
|
|
|
|
# Upload the files
|
|
upload_file_and_wait(client, source.id, file_path)
|
|
|
|
# Get the first file with pagination
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].source_id == source.id
|
|
|
|
# Attach after uploading the file
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
raw_system_message = get_raw_system_message(client, agent_state.id)
|
|
|
|
# Assert that the expected chunk is in the raw system message
|
|
expected_chunk = """<directories>
|
|
<file_limits>
|
|
- current_files_open=1
|
|
- max_files_open=5
|
|
</file_limits>
|
|
<directory name="test_source">
|
|
<file status="open" name="test_source/test.txt">
|
|
<metadata>
|
|
- read_only=true
|
|
- chars_current=45
|
|
- chars_limit=15000
|
|
</metadata>
|
|
<value>
|
|
[Viewing file start (out of 1 lines)]
|
|
1: test
|
|
</value>
|
|
</file>
|
|
</directory>
|
|
</directories>"""
|
|
assert expected_chunk in raw_system_message
|
|
|
|
# Get the agent state, check blocks exist
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
assert len(blocks) == 1
|
|
assert any("test" in b.value for b in blocks)
|
|
assert any(b.value.startswith("[Viewing file start") for b in blocks)
|
|
|
|
# Detach the source
|
|
client.agents.folders.detach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Get the agent state, check blocks do NOT exist
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
assert len(blocks) == 0
|
|
assert not any("test" in b.value for b in blocks)
|
|
|
|
# Verify no traces of the prompt exist in the raw system message after detaching
|
|
raw_system_message_after_detach = get_raw_system_message(client, agent_state.id)
|
|
assert expected_chunk not in raw_system_message_after_detach
|
|
assert "test_source" not in raw_system_message_after_detach
|
|
assert "<directories>" not in raw_system_message_after_detach
|
|
|
|
|
|
def test_delete_source_removes_source_blocks_correctly(
|
|
disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState
|
|
):
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
assert len(list(client.folders.list())) == 1
|
|
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
raw_system_message = get_raw_system_message(client, agent_state.id)
|
|
assert "test_source" in raw_system_message
|
|
assert "<directories>" in raw_system_message
|
|
|
|
# Load files into the source
|
|
file_path = "tests/data/test.txt"
|
|
|
|
# Upload the files
|
|
upload_file_and_wait(client, source.id, file_path)
|
|
raw_system_message = get_raw_system_message(client, agent_state.id)
|
|
# Assert that the expected chunk is in the raw system message
|
|
expected_chunk = """<directories>
|
|
<file_limits>
|
|
- current_files_open=1
|
|
- max_files_open=5
|
|
</file_limits>
|
|
<directory name="test_source">
|
|
<file status="open" name="test_source/test.txt">
|
|
<metadata>
|
|
- read_only=true
|
|
- chars_current=45
|
|
- chars_limit=15000
|
|
</metadata>
|
|
<value>
|
|
[Viewing file start (out of 1 lines)]
|
|
1: test
|
|
</value>
|
|
</file>
|
|
</directory>
|
|
</directories>"""
|
|
assert expected_chunk in raw_system_message
|
|
|
|
# Get the agent state, check blocks exist
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
assert len(blocks) == 1
|
|
assert any("test" in b.value for b in blocks)
|
|
|
|
# Remove file from source
|
|
client.folders.delete(folder_id=source.id)
|
|
raw_system_message_after_detach = get_raw_system_message(client, agent_state.id)
|
|
assert expected_chunk not in raw_system_message_after_detach
|
|
assert "test_source" not in raw_system_message_after_detach
|
|
assert "<directories>" not in raw_system_message_after_detach
|
|
|
|
# Get the agent state, check blocks do NOT exist
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
assert len(blocks) == 0
|
|
assert not any("test" in b.value for b in blocks)
|
|
|
|
|
|
def test_agent_uses_open_close_file_correctly(disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState):
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
|
|
sources_list = list(client.folders.list())
|
|
assert len(sources_list) == 1
|
|
|
|
# Attach source to agent
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Load files into the source
|
|
file_path = "tests/data/long_test.txt"
|
|
|
|
# Upload the files
|
|
upload_file_and_wait(client, source.id, file_path)
|
|
|
|
# Get uploaded files
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].source_id == source.id
|
|
file = files[0]
|
|
|
|
# Check that file is opened initially
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
print(f"Agent has {len(blocks)} file block(s)")
|
|
if blocks:
|
|
initial_content_length = len(blocks[0].value)
|
|
print(f"Initial file content length: {initial_content_length} characters")
|
|
print(f"First 100 chars of content: {blocks[0].value[:100]}...")
|
|
assert initial_content_length > 10, f"Expected file content > 10 chars, got {initial_content_length}"
|
|
|
|
# Ask agent to open the file for a specific range using offset/length
|
|
offset, length = 0, 5 # 0-indexed offset, 5 lines
|
|
print(f"Requesting agent to open file with offset={offset}, length={length}")
|
|
open_response1 = client.agents.messages.create(
|
|
agent_id=agent_state.id,
|
|
messages=[
|
|
MessageCreate(
|
|
role="user",
|
|
content=f"Use ONLY the open_files tool to open the file named test_source/{file.file_name} with offset {offset} and length {length}",
|
|
)
|
|
],
|
|
)
|
|
print(f"First open request sent, got {len(open_response1.messages)} message(s) in response")
|
|
print(open_response1.messages)
|
|
|
|
# Check that file is opened
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
assert len(blocks) == 1
|
|
old_value = blocks[0].value
|
|
old_content_length = len(old_value)
|
|
print(f"File content length after first open: {old_content_length} characters")
|
|
print(f"First range content: '{old_value}'")
|
|
assert old_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {old_content_length}"
|
|
|
|
# Assert specific content expectations for first range (lines 1-5)
|
|
assert "[Viewing lines 1 to 5 (out of " in old_value, f"Expected viewing header for lines 1-5, got: {old_value[:100]}..."
|
|
assert "1: Enrico Letta" in old_value, f"Expected line 1 to start with '1: Enrico Letta', got: {old_value[:200]}..."
|
|
assert "5: " in old_value, f"Expected line 5 to be present, got: {old_value}"
|
|
|
|
# Ask agent to open the file for a different range
|
|
offset, length = 5, 5 # Different offset, same length
|
|
open_response2 = client.agents.messages.create(
|
|
agent_id=agent_state.id,
|
|
messages=[
|
|
MessageCreate(
|
|
role="user",
|
|
content=f"Use ONLY the open_files tool to open the file named {file.file_name} with offset {offset} and length {length}",
|
|
)
|
|
],
|
|
)
|
|
print(f"Second open request sent, got {len(open_response2.messages)} message(s) in response")
|
|
print(open_response2.messages)
|
|
|
|
# Check that file is opened, but for different range
|
|
print("Verifying file is opened with second range...")
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
new_value = blocks[0].value
|
|
new_content_length = len(new_value)
|
|
print(f"File content length after second open: {new_content_length} characters")
|
|
print(f"Second range content: '{new_value}'")
|
|
assert new_content_length > 10, f"Expected content > 10 chars for offset={offset}, length={length}, got {new_content_length}"
|
|
|
|
# Assert specific content expectations for second range (lines 6-10)
|
|
assert "[Viewing lines 6 to 10 (out of " in new_value, f"Expected viewing header for lines 6-10, got: {new_value[:100]}..."
|
|
assert "6: " in new_value, f"Expected line 6 to be present, got: {new_value[:200]}..."
|
|
assert "10: " in new_value, f"Expected line 10 to be present, got: {new_value}"
|
|
|
|
print("Comparing content ranges:")
|
|
print(f" First range (offset=0, length=5): '{old_value}'")
|
|
print(f" Second range (offset=5, length=5): '{new_value}'")
|
|
|
|
assert new_value != old_value, f"Different view ranges should have different content. New: '{new_value}', Old: '{old_value}'"
|
|
|
|
# Assert that ranges don't overlap - first range should not contain line 6, second should not contain line 1
|
|
assert "6: was promoted" not in old_value, f"First range (1-5) should not contain line 6, got: {old_value}"
|
|
assert "1: Enrico Letta" not in new_value, f"Second range (6-10) should not contain line 1, got: {new_value}"
|
|
|
|
print("✓ File successfully opened with different range - content differs as expected")
|
|
|
|
|
|
def test_agent_uses_search_files_correctly(disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState):
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
|
|
sources_list = list(client.folders.list())
|
|
assert len(sources_list) == 1
|
|
|
|
# Attach source to agent
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Load files into the source
|
|
file_path = "tests/data/long_test.txt"
|
|
print(f"Uploading file: {file_path}")
|
|
|
|
# Upload the files
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
print(f"File uploaded and processed: {file_metadata['file_name']}")
|
|
|
|
# Get uploaded files
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].source_id == source.id
|
|
|
|
# Ask agent to use the semantic_search_files tool
|
|
search_files_response = client.agents.messages.create(
|
|
agent_id=agent_state.id,
|
|
messages=[
|
|
MessageCreate(
|
|
role="user", content="Use ONLY the semantic_search_files tool to search for details regarding the electoral history."
|
|
)
|
|
],
|
|
)
|
|
print(f"Search file request sent, got {len(search_files_response.messages)} message(s) in response")
|
|
print(search_files_response.messages)
|
|
|
|
# Check that archival_memory_search was called
|
|
tool_calls = [msg for msg in search_files_response.messages if msg.message_type == "tool_call_message"]
|
|
assert len(tool_calls) > 0, "No tool calls found"
|
|
assert any(tc.tool_call.name == "semantic_search_files" for tc in tool_calls), "semantic_search_files not called"
|
|
|
|
# Check it returned successfully
|
|
tool_returns = [msg for msg in search_files_response.messages if msg.message_type == "tool_return_message"]
|
|
assert len(tool_returns) > 0, "No tool returns found"
|
|
assert all(tr.status == "success" for tr in tool_returns), f"Tool call failed {tr}"
|
|
|
|
|
|
def test_agent_uses_grep_correctly_basic(disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState):
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
|
|
sources_list = list(client.folders.list())
|
|
assert len(sources_list) == 1
|
|
|
|
# Attach source to agent
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Load files into the source
|
|
file_path = "tests/data/long_test.txt"
|
|
print(f"Uploading file: {file_path}")
|
|
|
|
# Upload the files
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
print(f"File uploaded and processed: {file_metadata['file_name']}")
|
|
|
|
# Get uploaded files
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].source_id == source.id
|
|
|
|
# Ask agent to use the semantic_search_files tool
|
|
search_files_response = client.agents.messages.create(
|
|
agent_id=agent_state.id,
|
|
messages=[MessageCreate(role="user", content="Use ONLY the grep_files tool to search for `Nunzia De Girolamo`.")],
|
|
)
|
|
print(f"Grep request sent, got {len(search_files_response.messages)} message(s) in response")
|
|
print(search_files_response.messages)
|
|
|
|
# Check that grep_files was called
|
|
tool_calls = [msg for msg in search_files_response.messages if msg.message_type == "tool_call_message"]
|
|
assert len(tool_calls) > 0, "No tool calls found"
|
|
assert any(tc.tool_call.name == "grep_files" for tc in tool_calls), "semantic_search_files not called"
|
|
|
|
# Check it returned successfully
|
|
tool_returns = [msg for msg in search_files_response.messages if msg.message_type == "tool_return_message"]
|
|
assert len(tool_returns) > 0, "No tool returns found"
|
|
assert all(tr.status == "success" for tr in tool_returns), "Tool call failed"
|
|
|
|
|
|
def test_agent_uses_grep_correctly_advanced(disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState):
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
|
|
sources_list = list(client.folders.list())
|
|
assert len(sources_list) == 1
|
|
|
|
# Attach source to agent
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Load files into the source
|
|
file_path = "tests/data/list_tools.json"
|
|
print(f"Uploading file: {file_path}")
|
|
|
|
# Upload the files
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
print(f"File uploaded and processed: {file_metadata['file_name']}")
|
|
|
|
# Get uploaded files
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].source_id == source.id
|
|
|
|
# Ask agent to use the semantic_search_files tool
|
|
search_files_response = client.agents.messages.create(
|
|
agent_id=agent_state.id,
|
|
messages=[
|
|
MessageCreate(role="user", content="Use ONLY the grep_files tool to search for `tool-f5b80b08-5a45-4a0a-b2cd-dd8a0177b7ef`.")
|
|
],
|
|
)
|
|
print(f"Grep request sent, got {len(search_files_response.messages)} message(s) in response")
|
|
print(search_files_response.messages)
|
|
|
|
tool_return_message = next((m for m in search_files_response.messages if m.message_type == "tool_return_message"), None)
|
|
assert tool_return_message is not None, "No ToolReturnMessage found in messages"
|
|
|
|
# Basic structural integrity checks
|
|
assert tool_return_message.name == "grep_files"
|
|
assert tool_return_message.status == "success"
|
|
assert "Found 1 total matches across 1 files" in tool_return_message.tool_return
|
|
assert "tool-f5b80b08-5a45-4a0a-b2cd-dd8a0177b7ef" in tool_return_message.tool_return
|
|
|
|
# Context line integrity (3 lines before and after)
|
|
assert "509:" in tool_return_message.tool_return
|
|
assert "> 510:" in tool_return_message.tool_return # Match line with > prefix
|
|
assert "511:" in tool_return_message.tool_return
|
|
|
|
|
|
def test_create_agent_with_source_ids_creates_source_blocks_correctly(disable_pinecone, disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that creating an agent with source_ids parameter correctly creates source blocks."""
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
assert len(list(client.folders.list())) == 1
|
|
|
|
# Upload a file to the source before attaching
|
|
file_path = "tests/data/long_test.txt"
|
|
upload_file_and_wait(client, source.id, file_path)
|
|
|
|
# Get uploaded files to verify
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].source_id == source.id
|
|
|
|
# Create agent with source_ids parameter
|
|
temp_agent_state = client.agents.create(
|
|
name="test_agent_with_sources",
|
|
memory_blocks=[
|
|
CreateBlockParam(
|
|
label="human",
|
|
value="username: sarah",
|
|
),
|
|
],
|
|
model="openai/gpt-4o-mini",
|
|
embedding="openai/text-embedding-3-small",
|
|
source_ids=[source.id], # Attach source during creation
|
|
)
|
|
|
|
# Verify agent was created successfully
|
|
assert temp_agent_state is not None
|
|
assert temp_agent_state.name == "test_agent_with_sources"
|
|
|
|
# Check that source blocks were created correctly
|
|
blocks = temp_agent_state.memory.file_blocks
|
|
assert len(blocks) == 1
|
|
assert any(b.value.startswith("[Viewing file start (out of ") for b in blocks)
|
|
|
|
# Verify file tools were automatically attached
|
|
file_tools = {tool.name for tool in temp_agent_state.tools if tool.tool_type == ToolType.LETTA_FILES_CORE}
|
|
assert file_tools == set(FILES_TOOLS)
|
|
|
|
|
|
def test_view_ranges_have_metadata(disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState):
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
|
|
sources_list = list(client.folders.list())
|
|
assert len(sources_list) == 1
|
|
|
|
# Attach source to agent
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Load files into the source
|
|
file_path = "tests/data/1_to_100.py"
|
|
|
|
# Upload the files
|
|
upload_file_and_wait(client, source.id, file_path)
|
|
|
|
# Get uploaded files
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].source_id == source.id
|
|
file = files[0]
|
|
|
|
# Check that file is opened initially
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
assert len(blocks) == 1
|
|
block = blocks[0]
|
|
assert block.value.startswith("[Viewing file start (out of 100 lines)]")
|
|
|
|
# Open a specific range using offset/length
|
|
offset = 49 # 0-indexed for line 50
|
|
length = 5 # 5 lines (50-54)
|
|
open_response = client.agents.messages.create(
|
|
agent_id=agent_state.id,
|
|
messages=[
|
|
MessageCreate(
|
|
role="user",
|
|
content=f"Use ONLY the open_files tool to open the file named test_source/{file.file_name} with offset {offset} and length {length}",
|
|
)
|
|
],
|
|
)
|
|
print(f"Open request sent, got {len(open_response.messages)} message(s) in response")
|
|
print(open_response.messages)
|
|
|
|
# Check that file is opened correctly
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
blocks = agent_state.memory.file_blocks
|
|
assert len(blocks) == 1
|
|
block = blocks[0]
|
|
print(block.value)
|
|
assert (
|
|
block.value
|
|
== """
|
|
[Viewing lines 50 to 54 (out of 100 lines)]
|
|
50: x50 = 50
|
|
51: x51 = 51
|
|
52: x52 = 52
|
|
53: x53 = 53
|
|
54: x54 = 54
|
|
""".strip()
|
|
)
|
|
|
|
|
|
def test_duplicate_file_renaming(disable_pinecone, disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that duplicate files are renamed with count-based suffixes (e.g., file.txt, file (1).txt, file (2).txt)"""
|
|
# Create a new source
|
|
source = client.folders.create(name="test_duplicate_source", embedding="openai/text-embedding-3-small")
|
|
|
|
# Upload the same file three times
|
|
file_path = "tests/data/test.txt"
|
|
|
|
with open(file_path, "rb") as f:
|
|
first_file = client.folders.files.upload(folder_id=source.id, file=f)
|
|
|
|
with open(file_path, "rb") as f:
|
|
second_file = client.folders.files.upload(folder_id=source.id, file=f)
|
|
|
|
with open(file_path, "rb") as f:
|
|
third_file = client.folders.files.upload(folder_id=source.id, file=f)
|
|
|
|
# Get all uploaded files
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=10))
|
|
assert len(files) == 3, f"Expected 3 files, got {len(files)}"
|
|
|
|
# Sort files by creation time to ensure predictable order
|
|
files.sort(key=lambda f: f.created_at)
|
|
|
|
# Verify filenames follow the count-based pattern
|
|
expected_filenames = ["test.txt", "test_(1).txt", "test_(2).txt"]
|
|
actual_filenames = [f.file_name for f in files]
|
|
|
|
assert actual_filenames == expected_filenames, f"Expected {expected_filenames}, got {actual_filenames}"
|
|
|
|
# Verify all files have the same original_file_name
|
|
for file in files:
|
|
assert file.original_file_name == "test.txt", f"Expected original_file_name='test.txt', got '{file.original_file_name}'"
|
|
|
|
print("✓ Successfully tested duplicate file renaming:")
|
|
for i, file in enumerate(files):
|
|
print(f" File {i + 1}: original='{file.original_file_name}' → renamed='{file.file_name}'")
|
|
|
|
|
|
def test_duplicate_file_handling_replace(disable_pinecone, disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that DuplicateFileHandling.REPLACE replaces existing files with same name"""
|
|
# Create a new source
|
|
source = client.folders.create(name="test_replace_source", embedding="openai/text-embedding-3-small")
|
|
|
|
# Create agent and attach source to test memory blocks
|
|
agent_state = client.agents.create(
|
|
name="test_replace_agent",
|
|
memory_blocks=[
|
|
CreateBlockParam(label="human", value="username: sarah"),
|
|
],
|
|
model="openai/gpt-4o-mini",
|
|
embedding="openai/text-embedding-3-small",
|
|
source_ids=[source.id],
|
|
)
|
|
|
|
# Create a temporary file with original content
|
|
original_content = "original file content for testing"
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
f.write(original_content)
|
|
temp_file_path = f.name
|
|
temp_filename = os.path.basename(f.name)
|
|
|
|
try:
|
|
# Wait for the file to be processed
|
|
upload_file_and_wait(client, source.id, temp_file_path)
|
|
|
|
# Verify original file was uploaded
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=10))
|
|
assert len(files) == 1, f"Expected 1 file, got {len(files)}"
|
|
original_file = files[0]
|
|
assert original_file.original_file_name == temp_filename
|
|
|
|
# Get agent state and verify original content is in memory blocks
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
file_blocks = agent_state.memory.file_blocks
|
|
assert len(file_blocks) == 1, f"Expected 1 file block, got {len(file_blocks)}"
|
|
original_block_content = file_blocks[0].value
|
|
assert original_content in original_block_content
|
|
|
|
# Create replacement content
|
|
replacement_content = "this is the replacement content that should overwrite the original"
|
|
with open(temp_file_path, "w") as f:
|
|
f.write(replacement_content)
|
|
|
|
# Upload replacement file with REPLACE duplicate handling
|
|
replacement_file = upload_file_and_wait(client, source.id, temp_file_path, duplicate_handling="replace")
|
|
|
|
# Verify we still have only 1 file (replacement, not addition)
|
|
files_after_replace = list(client.folders.files.list(folder_id=source.id, limit=10))
|
|
assert len(files_after_replace) == 1, f"Expected 1 file after replacement, got {len(files_after_replace)}"
|
|
|
|
replaced_file = files_after_replace[0]
|
|
|
|
# Verify file metadata shows replacement
|
|
assert replaced_file.original_file_name == temp_filename, "Original filename should be preserved"
|
|
assert replaced_file.file_name == temp_filename, "File name should match original"
|
|
|
|
# Verify the file ID is different (new file replaced the old one)
|
|
assert replaced_file.id != original_file.id, "Replacement file should have different ID"
|
|
|
|
# Verify agent memory blocks contain replacement content
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
updated_file_blocks = agent_state.memory.file_blocks
|
|
assert len(updated_file_blocks) == 1, f"Expected 1 file block after replacement, got {len(updated_file_blocks)}"
|
|
|
|
replacement_block_content = updated_file_blocks[0].value
|
|
assert replacement_content in replacement_block_content, f"Expected replacement content in block, got: {replacement_block_content}"
|
|
assert original_content not in replacement_block_content, (
|
|
f"Original content should not be present after replacement: {replacement_block_content}"
|
|
)
|
|
|
|
print("✓ Successfully tested DuplicateFileHandling.REPLACE functionality")
|
|
|
|
finally:
|
|
# Clean up temporary file
|
|
if os.path.exists(temp_file_path):
|
|
os.unlink(temp_file_path)
|
|
|
|
|
|
def test_upload_file_with_custom_name(disable_pinecone, disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that uploading a file with a custom name overrides the original filename"""
|
|
# Create agent
|
|
agent_state = client.agents.create(
|
|
name="test_agent_custom_name",
|
|
memory_blocks=[
|
|
CreateBlockParam(
|
|
label="persona",
|
|
value="I am a helpful assistant",
|
|
),
|
|
CreateBlockParam(
|
|
label="human",
|
|
value="The user is a developer",
|
|
),
|
|
],
|
|
model="openai/gpt-4o-mini",
|
|
embedding="openai/text-embedding-3-small",
|
|
)
|
|
|
|
# Create source
|
|
source = client.folders.create(name="test_source_custom_name", embedding="openai/text-embedding-3-small")
|
|
|
|
# Attach source to agent
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Create a temporary file with specific content
|
|
import tempfile
|
|
|
|
temp_file_path = None
|
|
try:
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
f.write("This is a test file for custom naming")
|
|
temp_file_path = f.name
|
|
|
|
# Upload file with custom name
|
|
custom_name = "my_custom_file_name.txt"
|
|
file_metadata = upload_file_and_wait(client, source.id, temp_file_path, name=custom_name)
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
|
|
# Verify the file uses the custom name
|
|
assert file_metadata["file_name"] == custom_name
|
|
assert file_metadata["original_file_name"] == custom_name
|
|
|
|
# Verify file appears in source files list with custom name
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].file_name == custom_name
|
|
assert files[0].original_file_name == custom_name
|
|
|
|
# Verify the custom name is used in file blocks
|
|
agent_state = client.agents.retrieve(agent_id=agent_state.id, include=["agent.blocks"])
|
|
file_blocks = agent_state.memory.file_blocks
|
|
assert len(file_blocks) == 1
|
|
# Check that the custom name appears in the block label
|
|
assert custom_name.replace(".txt", "") in file_blocks[0].label
|
|
|
|
# Test duplicate handling with custom name - upload same file with same custom name
|
|
with pytest.raises(Exception) as exc_info:
|
|
upload_file_and_wait(client, source.id, temp_file_path, name=custom_name, duplicate_handling="error")
|
|
assert "already exists" in str(exc_info.value).lower()
|
|
|
|
# Upload same file with different custom name should succeed
|
|
different_custom_name = "folder_a/folder_b/another_custom_name.txt"
|
|
file_metadata2 = upload_file_and_wait(client, source.id, temp_file_path, name=different_custom_name)
|
|
if not isinstance(file_metadata2, dict):
|
|
file_metadata2 = file_metadata2.model_dump()
|
|
assert file_metadata2["file_name"] == different_custom_name
|
|
assert file_metadata2["original_file_name"] == different_custom_name
|
|
|
|
# Verify both files exist
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=10))
|
|
assert len(files) == 2
|
|
file_names = {f.file_name for f in files}
|
|
assert custom_name in file_names
|
|
assert different_custom_name in file_names
|
|
|
|
finally:
|
|
# Clean up temporary file
|
|
if temp_file_path and os.path.exists(temp_file_path):
|
|
os.unlink(temp_file_path)
|
|
|
|
|
|
def test_open_files_schema_descriptions(disable_pinecone, disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that open_files tool schema contains correct descriptions from docstring"""
|
|
|
|
# Get the open_files tool
|
|
tools = list(client.tools.list(name="open_files"))
|
|
assert len(tools) == 1, "Expected exactly one open_files tool"
|
|
|
|
open_files_tool = tools[0]
|
|
schema = open_files_tool.json_schema
|
|
|
|
# Check main function description includes the full multiline docstring with examples
|
|
description = schema["description"]
|
|
|
|
# Check main description line
|
|
assert (
|
|
"Open one or more files and load their contents into files section in core memory. Maximum of 5 files can be opened simultaneously."
|
|
in description
|
|
)
|
|
|
|
# Check that examples are included
|
|
assert "Examples:" in description
|
|
assert 'FileOpenRequest(file_name="project_utils/config.py")' in description
|
|
assert 'FileOpenRequest(file_name="project_utils/config.py", offset=0, length=50)' in description
|
|
assert "# Lines 1-50" in description
|
|
assert "# Lines 101-200" in description
|
|
assert "# Entire file" in description
|
|
assert "close_all_others=True" in description
|
|
assert "View specific portions of large files (e.g. functions or definitions)" in description
|
|
|
|
# Check parameters structure
|
|
assert "parameters" in schema
|
|
assert "properties" in schema["parameters"]
|
|
properties = schema["parameters"]["properties"]
|
|
|
|
# Check file_requests parameter
|
|
assert "file_requests" in properties
|
|
file_requests_prop = properties["file_requests"]
|
|
expected_file_requests_desc = "List of file open requests, each specifying file name and optional view range."
|
|
assert file_requests_prop["description"] == expected_file_requests_desc, (
|
|
f"Expected file_requests description: '{expected_file_requests_desc}', got: '{file_requests_prop['description']}'"
|
|
)
|
|
|
|
# Check close_all_others parameter
|
|
assert "close_all_others" in properties
|
|
close_all_others_prop = properties["close_all_others"]
|
|
expected_close_all_others_desc = "If True, closes all other currently open files first. Defaults to False."
|
|
assert close_all_others_prop["description"] == expected_close_all_others_desc, (
|
|
f"Expected close_all_others description: '{expected_close_all_others_desc}', got: '{close_all_others_prop['description']}'"
|
|
)
|
|
|
|
# Check that file_requests is an array type
|
|
assert file_requests_prop["type"] == "array", f"Expected file_requests type to be 'array', got: '{file_requests_prop['type']}'"
|
|
|
|
# Check FileOpenRequest schema within file_requests items
|
|
assert "items" in file_requests_prop
|
|
file_request_items = file_requests_prop["items"]
|
|
assert file_request_items["type"] == "object", "Expected FileOpenRequest to be object type"
|
|
|
|
# Check FileOpenRequest properties
|
|
assert "properties" in file_request_items
|
|
file_request_properties = file_request_items["properties"]
|
|
|
|
# Check file_name field
|
|
assert "file_name" in file_request_properties
|
|
file_name_prop = file_request_properties["file_name"]
|
|
assert file_name_prop["description"] == "Name of the file to open"
|
|
assert file_name_prop["type"] == "string"
|
|
|
|
# Check offset field
|
|
assert "offset" in file_request_properties
|
|
offset_prop = file_request_properties["offset"]
|
|
expected_offset_desc = "Optional offset for starting line number (0-indexed). If not specified, starts from beginning of file."
|
|
assert offset_prop["description"] == expected_offset_desc
|
|
assert offset_prop["type"] == "integer"
|
|
|
|
# Check length field
|
|
assert "length" in file_request_properties
|
|
length_prop = file_request_properties["length"]
|
|
expected_length_desc = "Optional number of lines to view from offset (inclusive). If not specified, views to end of file."
|
|
assert length_prop["description"] == expected_length_desc
|
|
assert length_prop["type"] == "integer"
|
|
|
|
|
|
def test_grep_files_schema_descriptions(disable_pinecone, disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that grep_files tool schema contains correct descriptions from docstring"""
|
|
|
|
# Get the grep_files tool
|
|
tools = list(client.tools.list(name="grep_files"))
|
|
assert len(tools) == 1, "Expected exactly one grep_files tool"
|
|
|
|
grep_files_tool = tools[0]
|
|
schema = grep_files_tool.json_schema
|
|
|
|
# Check main function description includes the full multiline docstring with examples
|
|
description = schema["description"]
|
|
|
|
# Check main description line
|
|
assert "Searches file contents for pattern matches with surrounding context." in description
|
|
|
|
# Check important details are included
|
|
assert "Results are paginated - shows 20 matches per call" in description
|
|
assert "The response includes:" in description
|
|
assert "A summary of total matches and which files contain them" in description
|
|
assert "The current page of matches (20 at a time)" in description
|
|
assert "Instructions for viewing more matches using the offset parameter" in description
|
|
|
|
# Check examples are included
|
|
assert "Example usage:" in description
|
|
assert 'grep_files(pattern="TODO")' in description
|
|
assert 'grep_files(pattern="TODO", offset=20)' in description
|
|
assert "# Shows matches 21-40" in description
|
|
|
|
# Check parameters structure
|
|
assert "parameters" in schema
|
|
assert "properties" in schema["parameters"]
|
|
properties = schema["parameters"]["properties"]
|
|
|
|
# Check pattern parameter
|
|
assert "pattern" in properties
|
|
pattern_prop = properties["pattern"]
|
|
expected_pattern_desc = "Keyword or regex pattern to search within file contents."
|
|
assert pattern_prop["description"] == expected_pattern_desc, (
|
|
f"Expected pattern description: '{expected_pattern_desc}', got: '{pattern_prop['description']}'"
|
|
)
|
|
assert pattern_prop["type"] == "string"
|
|
|
|
# Check include parameter
|
|
assert "include" in properties
|
|
include_prop = properties["include"]
|
|
expected_include_desc = "Optional keyword or regex pattern to filter filenames to include in the search."
|
|
assert include_prop["description"] == expected_include_desc, (
|
|
f"Expected include description: '{expected_include_desc}', got: '{include_prop['description']}'"
|
|
)
|
|
assert include_prop["type"] == "string"
|
|
|
|
# Check context_lines parameter
|
|
assert "context_lines" in properties
|
|
context_lines_prop = properties["context_lines"]
|
|
expected_context_lines_desc = (
|
|
"Number of lines of context to show before and after each match.\nEquivalent to `-C` in grep_files. Defaults to 1."
|
|
)
|
|
assert context_lines_prop["description"] == expected_context_lines_desc, (
|
|
f"Expected context_lines description: '{expected_context_lines_desc}', got: '{context_lines_prop['description']}'"
|
|
)
|
|
assert context_lines_prop["type"] == "integer"
|
|
|
|
# Check offset parameter
|
|
assert "offset" in properties
|
|
offset_prop = properties["offset"]
|
|
expected_offset_desc = (
|
|
"Number of matches to skip before showing results. Used for pagination.\n"
|
|
"For example, offset=20 shows matches starting from the 21st match.\n"
|
|
"Use offset=0 (or omit) for first page, offset=20 for second page,\n"
|
|
"offset=40 for third page, etc. The tool will tell you the exact\n"
|
|
"offset to use for the next page."
|
|
)
|
|
assert offset_prop["description"] == expected_offset_desc, (
|
|
f"Expected offset description: '{expected_offset_desc}', got: '{offset_prop['description']}'"
|
|
)
|
|
assert offset_prop["type"] == "integer"
|
|
|
|
# Check return description in main description
|
|
assert "Returns search results containing:" in description
|
|
assert "Summary with total match count and file distribution" in description
|
|
assert "List of files with match counts per file" in description
|
|
assert "Current page of matches (up to 20)" in description
|
|
assert "Navigation hint for next page if more matches exist" in description
|
|
|
|
|
|
def test_agent_open_file(disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState):
|
|
"""Test client.agents.open_file() function"""
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
|
|
# Attach source to agent
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Upload a file
|
|
file_path = "tests/data/test.txt"
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
|
|
# Basic test open_file function
|
|
closed_files = client.agents.files.open(agent_id=agent_state.id, file_id=file_metadata["id"])
|
|
assert len(closed_files) == 0
|
|
|
|
system = get_raw_system_message(client, agent_state.id)
|
|
assert '<file status="open" name="test_source/test.txt">' in system
|
|
assert "[Viewing file start (out of 1 lines)]" in system
|
|
|
|
|
|
def test_agent_close_file(disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState):
|
|
"""Test client.agents.close_file() function"""
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
|
|
# Attach source to agent
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Upload a file
|
|
file_path = "tests/data/test.txt"
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
|
|
# First open the file
|
|
client.agents.files.open(agent_id=agent_state.id, file_id=file_metadata["id"])
|
|
|
|
# Test close_file function
|
|
client.agents.files.close(agent_id=agent_state.id, file_id=file_metadata["id"])
|
|
|
|
system = get_raw_system_message(client, agent_state.id)
|
|
assert '<file status="closed" name="test_source/test.txt">' in system
|
|
|
|
|
|
def test_agent_close_all_open_files(disable_pinecone, disable_turbopuffer, client: LettaSDKClient, agent_state: AgentState):
|
|
"""Test client.agents.close_all_open_files() function"""
|
|
# Create a new source
|
|
source = client.folders.create(name="test_source", embedding="openai/text-embedding-3-small")
|
|
|
|
# Attach source to agent
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent_state.id)
|
|
|
|
# Upload multiple files
|
|
file_paths = ["tests/data/test.txt", "tests/data/test.md"]
|
|
file_metadatas = []
|
|
for file_path in file_paths:
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
file_metadatas.append(file_metadata)
|
|
# Open each file
|
|
client.agents.files.open(agent_id=agent_state.id, file_id=file_metadata["id"])
|
|
|
|
system = get_raw_system_message(client, agent_state.id)
|
|
assert '<file status="open"' in system
|
|
|
|
# Test close_all_open_files function
|
|
result = client.agents.files.close_all(agent_id=agent_state.id)
|
|
|
|
# Verify result is a list of strings
|
|
assert isinstance(result, list), f"Expected list, got {type(result)}"
|
|
assert all(isinstance(item, str) for item in result), "All items in result should be strings"
|
|
|
|
system = get_raw_system_message(client, agent_state.id)
|
|
assert '<file status="open"' not in system
|
|
|
|
|
|
def test_file_processing_timeout(disable_pinecone, disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that files in non-terminal states are moved to error after timeout"""
|
|
# Create a source
|
|
source = client.folders.create(name="test_timeout_source", embedding="openai/text-embedding-3-small")
|
|
|
|
# Upload a file
|
|
file_path = "tests/data/test.txt"
|
|
with open(file_path, "rb") as f:
|
|
file_metadata = client.folders.files.upload(folder_id=source.id, file=f)
|
|
|
|
# Get the file ID
|
|
file_id = file_metadata.id
|
|
|
|
# Test the is_terminal_state method directly (this doesn't require server mocking)
|
|
assert FileProcessingStatus.COMPLETED.is_terminal_state() == True
|
|
assert FileProcessingStatus.ERROR.is_terminal_state() == True
|
|
assert FileProcessingStatus.PARSING.is_terminal_state() == False
|
|
assert FileProcessingStatus.EMBEDDING.is_terminal_state() == False
|
|
assert FileProcessingStatus.PENDING.is_terminal_state() == False
|
|
|
|
# For testing the actual timeout logic, we can check the current file status
|
|
current_file = client.get(
|
|
path=f"/v1/sources/{source.id}/files/{file_id}",
|
|
cast_to=dict[str, Any],
|
|
)
|
|
|
|
# Convert string status to enum for testing
|
|
if not isinstance(current_file, dict):
|
|
current_file = current_file.model_dump()
|
|
processing_status = current_file["processing_status"]
|
|
status_enum = FileProcessingStatus(processing_status)
|
|
|
|
# Verify that files in terminal states are not affected by timeout checks
|
|
if status_enum.is_terminal_state():
|
|
# This is the expected behavior - files that completed processing shouldn't timeout
|
|
print(f"File {file_id} is in terminal state: {processing_status}")
|
|
assert status_enum in [FileProcessingStatus.COMPLETED, FileProcessingStatus.ERROR]
|
|
else:
|
|
# If file is still processing, it should eventually complete or timeout
|
|
# In a real scenario, we'd wait and check, but for unit tests we just verify the logic exists
|
|
print(f"File {file_id} is still processing: {processing_status}")
|
|
assert status_enum in [FileProcessingStatus.PENDING, FileProcessingStatus.PARSING, FileProcessingStatus.EMBEDDING]
|
|
|
|
|
|
@pytest.mark.unit
|
|
def test_file_processing_timeout_logic():
|
|
"""Test the timeout logic directly without server dependencies"""
|
|
from datetime import timezone
|
|
|
|
# Test scenario: file created 35 minutes ago, timeout is 30 minutes
|
|
old_time = datetime.now(timezone.utc) - timedelta(minutes=35)
|
|
current_time = datetime.now(timezone.utc)
|
|
timeout_minutes = 30
|
|
|
|
# Calculate timeout threshold
|
|
timeout_threshold = current_time - timedelta(minutes=timeout_minutes)
|
|
|
|
# Verify timeout logic
|
|
assert old_time < timeout_threshold, "File created 35 minutes ago should be past 30-minute timeout"
|
|
|
|
# Test edge case: file created exactly at timeout
|
|
edge_time = current_time - timedelta(minutes=timeout_minutes)
|
|
assert not (edge_time < timeout_threshold), "File created exactly at timeout should not trigger timeout"
|
|
|
|
# Test recent file
|
|
recent_time = current_time - timedelta(minutes=10)
|
|
assert not (recent_time < timeout_threshold), "Recent file should not trigger timeout"
|
|
|
|
|
|
def test_openai_embedding(disable_pinecone, disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test creating a source with OpenAI embeddings and uploading a file"""
|
|
source = client.folders.create(name="test_openai_embed_source", embedding="openai/text-embedding-3-small")
|
|
|
|
# verify source was created with correct embedding
|
|
assert source.name == "test_openai_embed_source"
|
|
|
|
# upload test.txt file
|
|
file_path = "tests/data/test.txt"
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
|
|
# verify file was uploaded successfully
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
assert file_metadata["processing_status"] == "completed"
|
|
assert file_metadata["source_id"] == source.id
|
|
assert file_metadata["file_name"] == "test.txt"
|
|
|
|
# verify file appears in source files list
|
|
files = list(client.folders.files.list(folder_id=source.id, limit=1))
|
|
assert len(files) == 1
|
|
assert files[0].id == file_metadata["id"]
|
|
|
|
# cleanup
|
|
client.folders.delete(folder_id=source.id)
|
|
|
|
|
|
# --- Pinecone Tests ---
|
|
def test_pinecone_search_files_tool(disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that search_files tool uses Pinecone when enabled"""
|
|
from letta.helpers.pinecone_utils import should_use_pinecone
|
|
|
|
if not should_use_pinecone(verbose=True):
|
|
pytest.skip("Pinecone not configured (missing API key or disabled), skipping Pinecone-specific tests")
|
|
|
|
print("Testing Pinecone search_files tool functionality")
|
|
|
|
# Create agent with file tools
|
|
agent = client.agents.create(
|
|
name="test_pinecone_agent",
|
|
memory_blocks=[
|
|
CreateBlockParam(label="human", value="username: testuser"),
|
|
],
|
|
model="openai/gpt-4o-mini",
|
|
embedding="openai/text-embedding-3-small",
|
|
)
|
|
|
|
# Create source and attach to agent
|
|
source = client.folders.create(name="test_pinecone_source", embedding="openai/text-embedding-3-small")
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent.id)
|
|
|
|
# Upload a file with searchable content
|
|
file_path = "tests/data/long_test.txt"
|
|
upload_file_and_wait(client, source.id, file_path)
|
|
|
|
# Test semantic search using Pinecone
|
|
search_response = client.agents.messages.create(
|
|
agent_id=agent.id,
|
|
messages=[MessageCreate(role="user", content="Use the semantic_search_files tool to search for 'electoral history' in the files.")],
|
|
)
|
|
|
|
# Verify tool was called successfully
|
|
tool_calls = [msg for msg in search_response.messages if msg.message_type == "tool_call_message"]
|
|
assert len(tool_calls) > 0, "No tool calls found"
|
|
assert any(tc.tool_call.name == "semantic_search_files" for tc in tool_calls), "semantic_search_files not called"
|
|
|
|
# Verify tool returned results
|
|
tool_returns = [msg for msg in search_response.messages if msg.message_type == "tool_return_message"]
|
|
assert len(tool_returns) > 0, "No tool returns found"
|
|
assert all(tr.status == "success" for tr in tool_returns), "Tool call failed"
|
|
|
|
# Check that results contain expected content
|
|
search_results = tool_returns[0].tool_return
|
|
print(search_results)
|
|
assert "electoral" in search_results.lower() or "history" in search_results.lower(), (
|
|
f"Search results should contain relevant content: {search_results}"
|
|
)
|
|
|
|
|
|
def test_pinecone_list_files_status(disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that list_source_files properly syncs embedding status with Pinecone"""
|
|
if not should_use_pinecone():
|
|
pytest.skip("Pinecone not configured (missing API key or disabled), skipping Pinecone-specific tests")
|
|
|
|
# create source
|
|
source = client.folders.create(name="test_list_files_status", embedding="openai/text-embedding-3-small")
|
|
|
|
file_paths = ["tests/data/long_test.txt"]
|
|
uploaded_files = []
|
|
for file_path in file_paths:
|
|
# use the new helper that polls via list_files
|
|
file_metadata = upload_file_and_wait_list_files(client, source.id, file_path)
|
|
uploaded_files.append(file_metadata)
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
assert file_metadata["processing_status"] == "completed", f"File {file_path} should be completed"
|
|
|
|
# now get files using list_source_files to verify status checking works
|
|
files_list = client.folders.files.list(folder_id=source.id, limit=100)
|
|
|
|
# verify all files show completed status and have proper embedding counts
|
|
assert len(files_list) == len(uploaded_files), f"Expected {len(uploaded_files)} files, got {len(files_list)}"
|
|
|
|
for file_metadata in files_list:
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
assert file_metadata["processing_status"] == "completed", f"File {file_metadata['file_name']} should show completed status"
|
|
|
|
# verify embedding counts for files that have chunks
|
|
if file_metadata["total_chunks"] and file_metadata["total_chunks"] > 0:
|
|
assert file_metadata["chunks_embedded"] == file_metadata["total_chunks"], (
|
|
f"File {file_metadata['file_name']} should have all chunks embedded: {file_metadata['chunks_embedded']}/{file_metadata['total_chunks']}"
|
|
)
|
|
|
|
# cleanup
|
|
client.folders.delete(folder_id=source.id)
|
|
|
|
|
|
def test_pinecone_lifecycle_file_and_source_deletion(disable_turbopuffer, client: LettaSDKClient):
|
|
"""Test that file and source deletion removes records from Pinecone"""
|
|
from letta.helpers.pinecone_utils import list_pinecone_index_for_files, should_use_pinecone
|
|
|
|
if not should_use_pinecone():
|
|
pytest.skip("Pinecone not configured (missing API key or disabled), skipping Pinecone-specific tests")
|
|
|
|
print("Testing Pinecone file and source deletion lifecycle")
|
|
|
|
# Create source
|
|
source = client.folders.create(name="test_lifecycle_source", embedding="openai/text-embedding-3-small")
|
|
|
|
# Upload multiple files and wait for processing
|
|
file_paths = ["tests/data/test.txt", "tests/data/test.md"]
|
|
uploaded_files = []
|
|
for file_path in file_paths:
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
uploaded_files.append(file_metadata)
|
|
|
|
# Get temp user for Pinecone operations
|
|
user = User(name="temp", organization_id=DEFAULT_ORG_ID)
|
|
|
|
# Test file-level deletion first
|
|
if len(uploaded_files) > 1:
|
|
file_to_delete = uploaded_files[0]
|
|
|
|
# Check records for the specific file using list function
|
|
records_before = asyncio.run(list_pinecone_index_for_files(file_to_delete.id, user))
|
|
print(f"Found {len(records_before)} records for file before deletion")
|
|
|
|
# Delete the file
|
|
client.folders.files.delete(folder_id=source.id, file_id=file_to_delete.id)
|
|
|
|
# Allow time for deletion to propagate
|
|
time.sleep(2)
|
|
|
|
# Verify file records are removed
|
|
records_after = asyncio.run(list_pinecone_index_for_files(file_to_delete.id, user))
|
|
print(f"Found {len(records_after)} records for file after deletion")
|
|
|
|
assert len(records_after) == 0, f"File records should be removed from Pinecone after deletion, but found {len(records_after)}"
|
|
|
|
# Test source-level deletion - check remaining files
|
|
# Check records for remaining files
|
|
remaining_records = []
|
|
for file_metadata in uploaded_files[1:]: # Skip the already deleted file
|
|
file_records = asyncio.run(list_pinecone_index_for_files(file_metadata.id, user))
|
|
remaining_records.extend(file_records)
|
|
|
|
records_before = len(remaining_records)
|
|
print(f"Found {records_before} records for remaining files before source deletion")
|
|
|
|
# Delete the entire source
|
|
client.folders.delete(folder_id=source.id)
|
|
|
|
# Allow time for deletion to propagate
|
|
time.sleep(3)
|
|
|
|
# Verify all remaining file records are removed
|
|
records_after = []
|
|
for file_metadata in uploaded_files[1:]:
|
|
file_records = asyncio.run(list_pinecone_index_for_files(file_metadata.id, user))
|
|
records_after.extend(file_records)
|
|
|
|
print(f"Found {len(records_after)} records for files after source deletion")
|
|
|
|
assert len(records_after) == 0, (
|
|
f"All source records should be removed from Pinecone after source deletion, but found {len(records_after)}"
|
|
)
|
|
|
|
|
|
# --- End Pinecone Tests ---
|
|
|
|
|
|
# --- Turbopuffer Tests ---
|
|
def test_turbopuffer_search_files_tool(disable_pinecone, client: LettaSDKClient):
|
|
"""Test that search_files tool uses Turbopuffer when enabled"""
|
|
agent = client.agents.create(
|
|
name="test_turbopuffer_agent",
|
|
memory_blocks=[
|
|
CreateBlockParam(label="human", value="username: testuser"),
|
|
],
|
|
model="openai/gpt-4o-mini",
|
|
embedding="openai/text-embedding-3-small",
|
|
)
|
|
|
|
source = client.folders.create(name="test_turbopuffer_source", embedding="openai/text-embedding-3-small")
|
|
client.agents.folders.attach(folder_id=source.id, agent_id=agent.id)
|
|
|
|
file_path = "tests/data/long_test.txt"
|
|
upload_file_and_wait(client, source.id, file_path)
|
|
|
|
search_response = client.agents.messages.create(
|
|
agent_id=agent.id,
|
|
messages=[MessageCreate(role="user", content="Use the semantic_search_files tool to search for 'electoral history' in the files.")],
|
|
)
|
|
|
|
tool_calls = [msg for msg in search_response.messages if msg.message_type == "tool_call_message"]
|
|
assert len(tool_calls) > 0, "No tool calls found"
|
|
assert any(tc.tool_call.name == "semantic_search_files" for tc in tool_calls), "semantic_search_files not called"
|
|
|
|
tool_returns = [msg for msg in search_response.messages if msg.message_type == "tool_return_message"]
|
|
assert len(tool_returns) > 0, "No tool returns found"
|
|
assert all(tr.status == "success" for tr in tool_returns), "Tool call failed"
|
|
|
|
search_results = tool_returns[0].tool_return
|
|
print(f"Turbopuffer search results: {search_results}")
|
|
assert "electoral" in search_results.lower() or "history" in search_results.lower(), (
|
|
f"Search results should contain relevant content: {search_results}"
|
|
)
|
|
|
|
client.agents.delete(agent_id=agent.id)
|
|
client.folders.delete(folder_id=source.id)
|
|
|
|
|
|
def test_turbopuffer_file_processing_status(disable_pinecone, client: LettaSDKClient):
|
|
"""Test that file processing completes successfully with Turbopuffer"""
|
|
print("Testing Turbopuffer file processing status")
|
|
|
|
source = client.folders.create(name="test_tpuf_file_status", embedding="openai/text-embedding-3-small")
|
|
|
|
file_paths = ["tests/data/long_test.txt", "tests/data/test.md"]
|
|
uploaded_files = []
|
|
for file_path in file_paths:
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
uploaded_files.append(file_metadata)
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
assert file_metadata["processing_status"] == "completed", f"File {file_path} should be completed"
|
|
|
|
files_list = client.folders.files.list(folder_id=source.id, limit=100).items
|
|
|
|
assert len(files_list) == len(uploaded_files), f"Expected {len(uploaded_files)} files, got {len(files_list)}"
|
|
|
|
for file_metadata in files_list:
|
|
if not isinstance(file_metadata, dict):
|
|
file_metadata = file_metadata.model_dump()
|
|
assert file_metadata["processing_status"] == "completed", f"File {file_metadata['file_name']} should show completed status"
|
|
|
|
if file_metadata["total_chunks"] and file_metadata["total_chunks"] > 0:
|
|
assert file_metadata["chunks_embedded"] == file_metadata["total_chunks"], (
|
|
f"File {file_metadata['file_name']} should have all chunks embedded: {file_metadata['chunks_embedded']}/{file_metadata['total_chunks']}"
|
|
)
|
|
|
|
client.folders.delete(folder_id=source.id)
|
|
|
|
|
|
def test_turbopuffer_lifecycle_file_and_source_deletion(disable_pinecone, client: LettaSDKClient):
|
|
"""Test that file and source deletion removes records from Turbopuffer"""
|
|
source = client.folders.create(name="test_tpuf_lifecycle", embedding="openai/text-embedding-3-small")
|
|
|
|
file_paths = ["tests/data/test.txt", "tests/data/test.md"]
|
|
uploaded_files = []
|
|
for file_path in file_paths:
|
|
file_metadata = upload_file_and_wait(client, source.id, file_path)
|
|
uploaded_files.append(file_metadata)
|
|
|
|
user = User(name="temp", organization_id=DEFAULT_ORG_ID)
|
|
tpuf_client = TurbopufferClient()
|
|
|
|
# test file-level deletion
|
|
if len(uploaded_files) > 1:
|
|
file_to_delete = uploaded_files[0]
|
|
|
|
passages_before = asyncio.run(
|
|
tpuf_client.query_file_passages(
|
|
source_ids=[source.id], organization_id=user.organization_id, actor=user, file_id=file_to_delete["id"], top_k=100
|
|
)
|
|
)
|
|
print(f"Found {len(passages_before)} passages for file before deletion")
|
|
assert len(passages_before) > 0, "Should have passages before deletion"
|
|
|
|
client.folders.files.delete(folder_id=source.id, file_id=file_to_delete["id"])
|
|
|
|
time.sleep(2)
|
|
|
|
passages_after = asyncio.run(
|
|
tpuf_client.query_file_passages(
|
|
source_ids=[source.id], organization_id=user.organization_id, actor=user, file_id=file_to_delete["id"], top_k=100
|
|
)
|
|
)
|
|
print(f"Found {len(passages_after)} passages for file after deletion")
|
|
assert len(passages_after) == 0, f"File passages should be removed from Turbopuffer after deletion, but found {len(passages_after)}"
|
|
|
|
# test source-level deletion
|
|
remaining_passages_before = []
|
|
for file_metadata in uploaded_files[1:]:
|
|
passages = asyncio.run(
|
|
tpuf_client.query_file_passages(
|
|
source_ids=[source.id], organization_id=user.organization_id, actor=user, file_id=file_metadata["id"], top_k=100
|
|
)
|
|
)
|
|
remaining_passages_before.extend(passages)
|
|
|
|
print(f"Found {len(remaining_passages_before)} passages for remaining files before source deletion")
|
|
assert len(remaining_passages_before) > 0, "Should have passages for remaining files"
|
|
|
|
client.folders.delete(folder_id=source.id)
|
|
|
|
time.sleep(3)
|
|
|
|
remaining_passages_after = []
|
|
for file_metadata in uploaded_files[1:]:
|
|
try:
|
|
passages = asyncio.run(
|
|
tpuf_client.query_file_passages(
|
|
source_ids=[source.id], organization_id=user.organization_id, actor=user, file_id=file_metadata["id"], top_k=100
|
|
)
|
|
)
|
|
remaining_passages_after.extend(passages)
|
|
except Exception as e:
|
|
print(f"Expected error querying deleted source: {e}")
|
|
|
|
print(f"Found {len(remaining_passages_after)} passages for files after source deletion")
|
|
assert len(remaining_passages_after) == 0, (
|
|
f"All source passages should be removed from Turbopuffer after source deletion, but found {len(remaining_passages_after)}"
|
|
)
|
|
|
|
|
|
def test_turbopuffer_multiple_sources(disable_pinecone, client: LettaSDKClient):
|
|
"""Test that Turbopuffer correctly isolates passages by source in org-scoped namespace"""
|
|
source1 = client.folders.create(name="test_tpuf_source1", embedding="openai/text-embedding-3-small")
|
|
source2 = client.folders.create(name="test_tpuf_source2", embedding="openai/text-embedding-3-small")
|
|
|
|
file1_metadata = upload_file_and_wait(client, source1.id, "tests/data/test.txt")
|
|
file2_metadata = upload_file_and_wait(client, source2.id, "tests/data/test.md")
|
|
|
|
user = User(name="temp", organization_id=DEFAULT_ORG_ID)
|
|
tpuf_client = TurbopufferClient()
|
|
|
|
source1_passages = asyncio.run(
|
|
tpuf_client.query_file_passages(source_ids=[source1.id], organization_id=user.organization_id, actor=user, top_k=100)
|
|
)
|
|
|
|
source2_passages = asyncio.run(
|
|
tpuf_client.query_file_passages(source_ids=[source2.id], organization_id=user.organization_id, actor=user, top_k=100)
|
|
)
|
|
|
|
print(f"Source1 has {len(source1_passages)} passages")
|
|
print(f"Source2 has {len(source2_passages)} passages")
|
|
|
|
assert len(source1_passages) > 0, "Source1 should have passages"
|
|
assert len(source2_passages) > 0, "Source2 should have passages"
|
|
|
|
for passage, _, _ in source1_passages:
|
|
assert passage.source_id == source1.id, f"Passage should belong to source1, but has folder_id={passage.source_id}"
|
|
assert passage.file_id == file1_metadata["id"], f"Passage should belong to file1, but has file_id={passage.file_id}"
|
|
|
|
for passage, _, _ in source2_passages:
|
|
assert passage.source_id == source2.id, f"Passage should belong to source2, but has folder_id={passage.source_id}"
|
|
assert passage.file_id == file2_metadata["id"], f"Passage should belong to file2, but has file_id={passage.file_id}"
|
|
|
|
# delete source1 and verify source2 is unaffected
|
|
client.folders.delete(folder_id=source1.id)
|
|
time.sleep(2)
|
|
|
|
source2_passages_after = asyncio.run(
|
|
tpuf_client.query_file_passages(source_ids=[source2.id], organization_id=user.organization_id, actor=user, top_k=100)
|
|
)
|
|
|
|
assert len(source2_passages_after) == len(source2_passages), (
|
|
f"Source2 should still have all passages after source1 deletion: {len(source2_passages_after)} vs {len(source2_passages)}"
|
|
)
|
|
|
|
client.folders.delete(folder_id=source2.id)
|
|
|
|
|
|
# --- End Turbopuffer Tests ---
|