chore(ci): Add uv support and use for unit tests (#4127)

* cherrypick just relevant commits?

* make work with poetry

* update poetry?

* regen?

* change tests and dev to dependency groups instead of optional extras

* Fix Poetry/UV compatibility issues

- Fix sqlite-vec dependency: Remove optional flag from Poetry section to match main deps
- Regenerate poetry.lock to sync with pyproject.toml changes
- Test both package managers successfully:
  - Poetry: `poetry install --with dev --with test -E postgres -E external-tools -E cloud-tool-sandbox`
  - UV: `uv sync --group dev --group test --extra postgres --extra external-tools --extra cloud-tool-sandbox`

Resolves Poetry lock sync errors and ensures sqlite-vec is available for tests.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* more robust pip install

* Fix fern SDK wheel installation in CI workflow

Replace unreliable command substitution with proper error handling:
- Check if directory exists before attempting to find wheels
- Store wheel file path in variable to avoid empty arguments
- Provide clear error messages when directory/wheels are missing
- Prevents "required arguments were not provided" error in uv pip install

Fixes: error: the following required arguments were not provided: <PACKAGE>

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* debugging

* trigger CI

* ls

* revert whl installation to -e

* programmatic HIT version insertion

* version templating properly

* set var properly

* labelling

* remove version insertion

* ?

* try using sed '2r /dev/stdin'

* version

* try again smh

* not trigger on poetry version

* only add once

* filter only for project not poetry

* hand re-construct the file

* save tail?

* fix docker command

* please please please

* rename test -> tests

* update poetry and rename group to -E

* move async into tests extra and regen lock files and add sqlite extra

* remove loading cached venv from cloud api integration

* add uv dependency to CI runners

* test removing the custom event loop

* regen poetry.lock and try to fix async tests

* wrap async pg exception and event loop tweak in plugins

* remove event loop from plugins test and remove caching from cloud-api-integration-test

* migrate all tests away from event loop for pytest-asyncio

* pin firecrawl

* pin e2b

* take claude's suggestion

* deeper down the claude rabbit hole

* increase timeout for httpbin.org

---------

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Kian Jones
2025-08-26 11:51:31 -07:00
committed by GitHub
parent 5bb13d9baa
commit 45c4dbd5e8
16 changed files with 7674 additions and 2003 deletions

3186
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,3 +1,93 @@
[project]
name = "letta"
version = "0.10.0"
description = "Create LLM agents with long-term memory and custom tools"
authors = [
{name = "Letta Team", email = "contact@letta.com"},
]
license = {text = "Apache License"}
readme = "README.md"
requires-python = "<3.14,>=3.11"
dependencies = [
"typer>=0.15.2",
"questionary>=2.0.1",
"pytz>=2023.3.post1",
"tqdm>=4.66.1",
"black[jupyter]>=24.2.0",
"setuptools>=70",
"prettytable>=3.9.0",
"docstring-parser>=0.16,<0.17",
"httpx>=0.28.0",
"numpy>=2.1.0",
"demjson3>=3.0.6",
"pyyaml>=6.0.1",
"sqlalchemy-json>=0.7.0",
"pydantic>=2.10.6",
"html2text>=2020.1.16",
"sqlalchemy[asyncio]>=2.0.41",
"python-box>=7.1.1",
"sqlmodel>=0.0.16",
"python-multipart>=0.0.19",
"sqlalchemy-utils>=0.41.2",
"pydantic-settings>=2.2.1",
"httpx-sse>=0.4.0",
"nltk>=3.8.1",
"jinja2>=3.1.5",
"composio-core>=0.7.7",
"alembic>=1.13.3",
"pyhumps>=3.8.0",
"pathvalidate>=3.2.1",
"sentry-sdk[fastapi]==2.19.1",
"rich>=13.9.4",
"brotli>=1.1.0",
"grpcio>=1.68.1",
"grpcio-tools>=1.68.1",
"llama-index>=0.12.2",
"llama-index-embeddings-openai>=0.3.1",
"anthropic>=0.49.0",
"letta_client>=0.1.276",
"openai>=1.99.9",
"opentelemetry-api==1.30.0",
"opentelemetry-sdk==1.30.0",
"opentelemetry-instrumentation-requests==0.51b0",
"opentelemetry-instrumentation-sqlalchemy==0.51b0",
"opentelemetry-exporter-otlp==1.30.0",
"faker>=36.1.0",
"colorama>=0.4.6",
"marshmallow-sqlalchemy>=1.4.1",
"datamodel-code-generator[http]>=0.25.0",
"mcp[cli]>=1.9.4",
"firecrawl-py==2.16.5",
"apscheduler>=3.11.0",
"aiomultiprocess>=0.9.1",
"matplotlib>=3.10.1",
"tavily-python>=0.7.2",
"mistralai>=1.8.1",
"structlog>=25.4.0",
"certifi>=2025.6.15",
"markitdown[docx,pdf,pptx]>=0.1.2",
"orjson>=3.11.1",
]
[project.optional-dependencies]
postgres = ["pgvector>=0.2.3", "pg8000>=1.30.3", "psycopg2-binary>=2.9.10", "psycopg2>=2.9.10", "asyncpg>=0.30.0"]
redis = ["redis>=6.2.0"]
pinecone = ["pinecone[asyncio]>=7.3.0"]
dev = ["pytest>=8.0.0", "pytest-asyncio>=0.24.0", "pexpect>=4.9.0", "black>=24.2.0", "pre-commit>=3.5.0", "pyright>=1.1.347", "pytest-order>=1.2.0", "autoflake>=2.3.0", "isort>=5.13.2", "locust>=2.31.5"]
experimental = ["uvloop>=0.21.0; sys_platform != 'win32'", "granian[reload]>=2.3.2", "google-cloud-profiler>=4.1.0"]
server = ["websockets>=12.0", "fastapi>=0.115.6", "uvicorn>=0.24.0.post1"]
cloud-tool-sandbox = ["e2b-code-interpreter==1.5.2", "modal>=1.1.0"]
external-tools = ["docker>=7.1.0", "langchain>=0.3.7", "wikipedia>=1.4.0", "langchain-community>=0.3.7", "firecrawl-py==2.16.5"]
tests = ["wikipedia>=1.4.0", "pytest-asyncio>=0.24.0"]
sqlite = ["aiosqlite>=0.21.0", "sqlite-vec>=0.1.7a2"]
bedrock = ["boto3>=1.36.24", "aioboto3>=14.3.0"]
google = ["google-genai>=1.15.0"]
desktop = ["pyright>=1.1.347", "fastapi>=0.115.6", "uvicorn>=0.24.0.post1", "docker>=7.1.0", "langchain>=0.3.7", "wikipedia>=1.4.0", "langchain-community>=0.3.7", "locust>=2.31.5", "sqlite-vec>=0.1.7a2", "pgvector>=0.2.3"]
all = ["pgvector>=0.2.3", "turbopuffer>=0.5.17", "pg8000>=1.30.3", "psycopg2-binary>=2.9.10", "psycopg2>=2.9.10", "pytest", "pytest-asyncio>=0.24.0", "pexpect>=4.9.0", "black>=24.2.0", "pre-commit>=3.5.0", "pyright>=1.1.347", "pytest-order>=1.2.0", "autoflake>=2.3.0", "isort>=5.13.2", "fastapi>=0.115.6", "uvicorn>=0.24.0.post1", "docker>=7.1.0", "langchain>=0.3.7", "wikipedia>=1.4.0", "langchain-community>=0.3.7", "locust>=2.31.5", "uvloop>=0.21.0; sys_platform != 'win32'", "granian[reload]>=2.3.2", "redis>=6.2.0", "pinecone[asyncio]>=7.3.0", "google-cloud-profiler>=4.1.0"]
[project.scripts]
letta = "letta.main:app"
[tool.poetry]
name = "letta"
version = "0.10.0"

View File

@@ -1,10 +1,12 @@
import logging
import os
from datetime import datetime, timezone
from typing import Generator
import pytest
from anthropic.types.beta.messages import BetaMessageBatch, BetaMessageBatchRequestCounts
from letta.server.db import db_registry
from letta.services.organization_manager import OrganizationManager
from letta.services.user_manager import UserManager
from letta.settings import tool_settings
@@ -14,6 +16,36 @@ def pytest_configure(config):
logging.basicConfig(level=logging.DEBUG)
@pytest.fixture(scope="session", autouse=True)
def disable_db_pooling_for_tests():
"""Disable database connection pooling for the entire test session."""
os.environ["LETTA_DISABLE_SQLALCHEMY_POOLING"] = "true"
yield
if "LETTA_DISABLE_SQLALCHEMY_POOLING" in os.environ:
del os.environ["LETTA_DISABLE_SQLALCHEMY_POOLING"]
@pytest.fixture(autouse=True)
async def cleanup_db_connections():
"""Cleanup database connections after each test."""
yield
try:
if hasattr(db_registry, "_async_engines"):
for engine in db_registry._async_engines.values():
if engine:
try:
await engine.dispose()
except Exception:
# Suppress common teardown errors that don't affect test validity
pass
db_registry._initialized["async"] = False
db_registry._async_engines.clear()
db_registry._async_session_factories.clear()
except Exception:
# Suppress all cleanup errors to avoid confusing test failures
pass
@pytest.fixture
def disable_e2b_api_key() -> Generator[None, None, None]:
"""

View File

@@ -26,13 +26,6 @@ agent_uuid = str(uuid.uuid5(namespace, "test_agent_tool_graph"))
config_file = "tests/configs/llm_model_configs/openai-gpt-4o.json"
@pytest.fixture(scope="module")
def event_loop():
loop = asyncio.new_event_loop()
yield loop
loop.close()
@pytest.fixture()
def server():
config = LettaConfig.load()

View File

@@ -321,7 +321,7 @@ def tool_with_pip_requirements(test_user):
import requests
# Simple usage to verify packages work
response = requests.get("https://httpbin.org/json", timeout=5)
response = requests.get("https://httpbin.org/json", timeout=30)
arr = np.array([1, 2, 3])
return f"Success! Status: {response.status_code}, Array sum: {np.sum(arr)}"
except ImportError as e:

View File

@@ -70,7 +70,7 @@ def client(server_url: str) -> Letta:
yield client_instance
@pytest.fixture(scope="module")
@pytest.fixture(scope="function")
def agent_state(client: Letta) -> AgentState:
"""
Creates and returns an agent state for testing with a pre-configured agent.
@@ -333,7 +333,7 @@ def test_web_search(
], f"Invalid api_key_source: {response_json['api_key_source']}"
@pytest.mark.asyncio
@pytest.mark.asyncio(scope="function")
async def test_web_search_uses_agent_env_var_model():
"""Test that web search uses the model specified in agent tool exec env vars."""

View File

@@ -32,20 +32,6 @@ from letta.services.tool_sandbox.modal_sandbox_v2 import AsyncToolSandboxModalV2
from letta.services.tool_sandbox.modal_version_manager import ModalVersionManager, get_version_manager
from letta.services.user_manager import UserManager
@pytest.fixture
def event_loop():
"""Create an instance of the default event loop for the test session."""
loop = asyncio.new_event_loop()
yield loop
# Cleanup tasks before closing loop
pending = asyncio.all_tasks(loop)
for task in pending:
task.cancel()
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
loop.close()
# ============================================================================
# SHARED FIXTURES
# ============================================================================
@@ -90,12 +76,12 @@ def basic_tool(test_user):
source_code="""
def calculate(operation: str, a: float, b: float) -> float:
'''Perform a calculation on two numbers.
Args:
operation: The operation to perform (add, subtract, multiply, divide)
a: The first number
b: The second number
Returns:
float: The result of the calculation
'''
@@ -145,11 +131,11 @@ import asyncio
async def fetch_data(url: str, delay: float = 0.1) -> Dict:
'''Simulate fetching data from a URL.
Args:
url: The URL to fetch data from
delay: The delay in seconds before returning
Returns:
Dict: A dictionary containing the fetched data
'''
@@ -194,17 +180,17 @@ import hashlib
def process_json(data: str) -> Dict:
'''Process JSON data and return metadata.
Args:
data: The JSON string to process
Returns:
Dict: Metadata about the JSON data
'''
try:
parsed = json.loads(data)
data_hash = hashlib.md5(data.encode()).hexdigest()
return {
"valid": True,
"keys": list(parsed.keys()) if isinstance(parsed, dict) else None,

View File

@@ -1,4 +1,3 @@
import asyncio
from typing import List, Optional
import pytest
@@ -36,14 +35,6 @@ from tests.utils import create_tool_from_func
# ------------------------------
@pytest.fixture(scope="module")
def event_loop():
"""Use a single asyncio loop for the entire test session."""
loop = asyncio.new_event_loop()
yield loop
loop.close()
def _clear_tables():
from letta.server.db import db_context

View File

@@ -49,14 +49,6 @@ EXPECTED_ROLES = ["system", "assistant", "tool", "user", "user"]
# --------------------------------------------------------------------------- #
@pytest.fixture(scope="module")
def event_loop():
"""Use a single asyncio loop for the entire test session."""
loop = asyncio.new_event_loop()
yield loop
loop.close()
@pytest.fixture
def weather_tool(server):
def get_weather(location: str) -> str:

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@ from letta.settings import settings
@pytest.mark.asyncio
async def test_default_experimental_decorator(event_loop):
async def test_default_experimental_decorator():
settings.plugin_register = "experimental_check=tests.helpers.plugins_helper:is_experimental_okay"
@experimental("test_just_pass", fallback_function=lambda: False, kwarg1=3)
@@ -18,7 +18,7 @@ async def test_default_experimental_decorator(event_loop):
@pytest.mark.asyncio
async def test_overwrite_arg_success(event_loop):
async def test_overwrite_arg_success():
settings.plugin_register = "experimental_check=tests.helpers.plugins_helper:is_experimental_okay"
@experimental("test_override_kwarg", fallback_function=lambda *args, **kwargs: False, bool_val=True)
@@ -31,7 +31,7 @@ async def test_overwrite_arg_success(event_loop):
@pytest.mark.asyncio
async def test_overwrite_arg_fail(event_loop):
async def test_overwrite_arg_fail():
# Should fallback to lambda
settings.plugin_register = "experimental_check=tests.helpers.plugins_helper:is_experimental_okay"
@@ -61,7 +61,7 @@ async def test_overwrite_arg_fail(event_loop):
@pytest.mark.asyncio
async def test_redis_flag(event_loop):
async def test_redis_flag():
settings.plugin_register = "experimental_check=tests.helpers.plugins_helper:is_experimental_okay"
@experimental("test_redis_flag", fallback_function=lambda *args, **kwargs: _raise())

View File

@@ -130,7 +130,7 @@ async def test_provider_trace_experimental_step(message, agent_state, default_us
@pytest.mark.asyncio
@pytest.mark.parametrize("message", ["Get the weather in San Francisco."])
async def test_provider_trace_experimental_step_stream(message, agent_state, default_user, event_loop):
async def test_provider_trace_experimental_step_stream(message, agent_state, default_user):
experimental_agent = LettaAgent(
agent_id=agent_state.id,
message_manager=MessageManager(),
@@ -169,7 +169,7 @@ async def test_provider_trace_experimental_step_stream(message, agent_state, def
@pytest.mark.asyncio
@pytest.mark.parametrize("message", ["Get the weather in San Francisco."])
async def test_provider_trace_step(client, agent_state, default_user, message, event_loop):
async def test_provider_trace_step(client, agent_state, default_user, message):
client.agents.messages.create(agent_id=agent_state.id, messages=[])
response = client.agents.messages.create(
agent_id=agent_state.id,
@@ -186,7 +186,7 @@ async def test_provider_trace_step(client, agent_state, default_user, message, e
@pytest.mark.asyncio
@pytest.mark.parametrize("message", ["Get the weather in San Francisco."])
async def test_noop_provider_trace(message, agent_state, default_user, event_loop):
async def test_noop_provider_trace(message, agent_state, default_user):
experimental_agent = LettaAgent(
agent_id=agent_state.id,
message_manager=MessageManager(),

View File

@@ -4,7 +4,7 @@ from letta.data_sources.redis_client import get_redis_client
@pytest.mark.asyncio
async def test_redis_client(event_loop):
async def test_redis_client():
test_values = {"LETTA_TEST_0": [1, 2, 3], "LETTA_TEST_1": ["apple", "pear", "banana"], "LETTA_TEST_2": ["{}", 3.2, "cat"]}
redis_client = await get_redis_client()

View File

@@ -485,7 +485,7 @@ def test_delete_agent_same_org(server: SyncServer, org_id: str, user: User):
@pytest.mark.asyncio
async def test_read_local_llm_configs(server: SyncServer, user: User, event_loop):
async def test_read_local_llm_configs(server: SyncServer, user: User):
configs_base_dir = os.path.join(os.path.expanduser("~"), ".letta", "llm_configs")
clean_up_dir = False
if not os.path.exists(configs_base_dir):
@@ -1016,7 +1016,7 @@ async def test_add_remove_tools_update_agent(server: SyncServer, user_id: str, b
@pytest.mark.asyncio
async def test_messages_with_provider_override(server: SyncServer, user_id: str, event_loop):
async def test_messages_with_provider_override(server: SyncServer, user_id: str):
actor = await server.user_manager.get_actor_or_default_async(actor_id=user_id)
provider = server.provider_manager.create_provider(
request=ProviderCreate(
@@ -1096,7 +1096,7 @@ async def test_messages_with_provider_override(server: SyncServer, user_id: str,
@pytest.mark.asyncio
async def test_unique_handles_for_provider_configs(server: SyncServer, user: User, event_loop):
async def test_unique_handles_for_provider_configs(server: SyncServer, user: User):
models = await server.list_llm_models_async(actor=user)
model_handles = [model.handle for model in models]
assert sorted(model_handles) == sorted(list(set(model_handles))), "All models should have unique handles"

View File

@@ -1,5 +1,3 @@
import asyncio
import pytest
from letta.constants import MAX_FILENAME_LENGTH
@@ -522,19 +520,8 @@ def test_line_chunker_only_start_parameter():
# ---------------------- Alembic Revision TESTS ---------------------- #
@pytest.fixture(scope="module")
def event_loop():
"""
Create an event loop for the entire test session.
Ensures all async tasks use the same loop, avoiding cross-loop errors.
"""
loop = asyncio.new_event_loop()
yield loop
loop.close()
@pytest.mark.asyncio
async def test_get_latest_alembic_revision(event_loop):
async def test_get_latest_alembic_revision():
"""Test that get_latest_alembic_revision returns a valid revision ID from the database."""
from letta.utils import get_latest_alembic_revision
@@ -553,7 +540,7 @@ async def test_get_latest_alembic_revision(event_loop):
@pytest.mark.asyncio
async def test_get_latest_alembic_revision_consistency(event_loop):
async def test_get_latest_alembic_revision_consistency():
"""Test that get_latest_alembic_revision returns the same value on multiple calls."""
from letta.utils import get_latest_alembic_revision

5958
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff