chore(ci): Add uv support and use for unit tests (#4127)

* cherrypick just relevant commits? * make work with poetry * update poetry? * regen? * change tests and dev to dependency groups instead of optional extras * Fix Poetry/UV compatibility issues - Fix sqlite-vec dependency: Remove optional flag from Poetry section to match main deps - Regenerate poetry.lock to sync with pyproject.toml changes - Test both package managers successfully: - Poetry: `poetry install --with dev --with test -E postgres -E external-tools -E cloud-tool-sandbox` - UV: `uv sync --group dev --group test --extra postgres --extra external-tools --extra cloud-tool-sandbox` Resolves Poetry lock sync errors and ensures sqlite-vec is available for tests. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * more robust pip install * Fix fern SDK wheel installation in CI workflow Replace unreliable command substitution with proper error handling: - Check if directory exists before attempting to find wheels - Store wheel file path in variable to avoid empty arguments - Provide clear error messages when directory/wheels are missing - Prevents "required arguments were not provided" error in uv pip install Fixes: error: the following required arguments were not provided: <PACKAGE> 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com> * debugging * trigger CI * ls * revert whl installation to -e * programmatic HIT version insertion * version templating properly * set var properly * labelling * remove version insertion * ? * try using sed '2r /dev/stdin' * version * try again smh * not trigger on poetry version * only add once * filter only for project not poetry * hand re-construct the file * save tail? * fix docker command * please please please * rename test -> tests * update poetry and rename group to -E * move async into tests extra and regen lock files and add sqlite extra * remove loading cached venv from cloud api integration * add uv dependency to CI runners * test removing the custom event loop * regen poetry.lock and try to fix async tests * wrap async pg exception and event loop tweak in plugins * remove event loop from plugins test and remove caching from cloud-api-integration-test * migrate all tests away from event loop for pytest-asyncio * pin firecrawl * pin e2b * take claude's suggestion * deeper down the claude rabbit hole * increase timeout for httpbin.org --------- Co-authored-by: Claude <noreply@anthropic.com>
2025-08-26 11:51:31 -07:00
parent 5bb13d9baa
commit 45c4dbd5e8
16 changed files with 7674 additions and 2003 deletions
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,93 @@
+[project]
+name = "letta"
+version = "0.10.0"
+description = "Create LLM agents with long-term memory and custom tools"
+authors = [
+    {name = "Letta Team", email = "contact@letta.com"},
+]
+license = {text = "Apache License"}
+readme = "README.md"
+requires-python = "<3.14,>=3.11"
+dependencies = [
+    "typer>=0.15.2",
+    "questionary>=2.0.1",
+    "pytz>=2023.3.post1",
+    "tqdm>=4.66.1",
+    "black[jupyter]>=24.2.0",
+    "setuptools>=70",
+    "prettytable>=3.9.0",
+    "docstring-parser>=0.16,<0.17",
+    "httpx>=0.28.0",
+    "numpy>=2.1.0",
+    "demjson3>=3.0.6",
+    "pyyaml>=6.0.1",
+    "sqlalchemy-json>=0.7.0",
+    "pydantic>=2.10.6",
+    "html2text>=2020.1.16",
+    "sqlalchemy[asyncio]>=2.0.41",
+    "python-box>=7.1.1",
+    "sqlmodel>=0.0.16",
+    "python-multipart>=0.0.19",
+    "sqlalchemy-utils>=0.41.2",
+    "pydantic-settings>=2.2.1",
+    "httpx-sse>=0.4.0",
+    "nltk>=3.8.1",
+    "jinja2>=3.1.5",
+    "composio-core>=0.7.7",
+    "alembic>=1.13.3",
+    "pyhumps>=3.8.0",
+    "pathvalidate>=3.2.1",
+    "sentry-sdk[fastapi]==2.19.1",
+    "rich>=13.9.4",
+    "brotli>=1.1.0",
+    "grpcio>=1.68.1",
+    "grpcio-tools>=1.68.1",
+    "llama-index>=0.12.2",
+    "llama-index-embeddings-openai>=0.3.1",
+    "anthropic>=0.49.0",
+    "letta_client>=0.1.276",
+    "openai>=1.99.9",
+    "opentelemetry-api==1.30.0",
+    "opentelemetry-sdk==1.30.0",
+    "opentelemetry-instrumentation-requests==0.51b0",
+    "opentelemetry-instrumentation-sqlalchemy==0.51b0",
+    "opentelemetry-exporter-otlp==1.30.0",
+    "faker>=36.1.0",
+    "colorama>=0.4.6",
+    "marshmallow-sqlalchemy>=1.4.1",
+    "datamodel-code-generator[http]>=0.25.0",
+    "mcp[cli]>=1.9.4",
+    "firecrawl-py==2.16.5",
+    "apscheduler>=3.11.0",
+    "aiomultiprocess>=0.9.1",
+    "matplotlib>=3.10.1",
+    "tavily-python>=0.7.2",
+    "mistralai>=1.8.1",
+    "structlog>=25.4.0",
+    "certifi>=2025.6.15",
+    "markitdown[docx,pdf,pptx]>=0.1.2",
+    "orjson>=3.11.1",
+]
+
+[project.optional-dependencies]
+postgres = ["pgvector>=0.2.3", "pg8000>=1.30.3", "psycopg2-binary>=2.9.10", "psycopg2>=2.9.10", "asyncpg>=0.30.0"]
+redis = ["redis>=6.2.0"]
+pinecone = ["pinecone[asyncio]>=7.3.0"]
+dev = ["pytest>=8.0.0", "pytest-asyncio>=0.24.0", "pexpect>=4.9.0", "black>=24.2.0", "pre-commit>=3.5.0", "pyright>=1.1.347", "pytest-order>=1.2.0", "autoflake>=2.3.0", "isort>=5.13.2", "locust>=2.31.5"]
+experimental = ["uvloop>=0.21.0; sys_platform != 'win32'", "granian[reload]>=2.3.2", "google-cloud-profiler>=4.1.0"]
+server = ["websockets>=12.0", "fastapi>=0.115.6", "uvicorn>=0.24.0.post1"]
+cloud-tool-sandbox = ["e2b-code-interpreter==1.5.2", "modal>=1.1.0"]
+external-tools = ["docker>=7.1.0", "langchain>=0.3.7", "wikipedia>=1.4.0", "langchain-community>=0.3.7", "firecrawl-py==2.16.5"]
+tests = ["wikipedia>=1.4.0", "pytest-asyncio>=0.24.0"]
+sqlite = ["aiosqlite>=0.21.0", "sqlite-vec>=0.1.7a2"]
+bedrock = ["boto3>=1.36.24", "aioboto3>=14.3.0"]
+google = ["google-genai>=1.15.0"]
+desktop = ["pyright>=1.1.347", "fastapi>=0.115.6", "uvicorn>=0.24.0.post1", "docker>=7.1.0", "langchain>=0.3.7", "wikipedia>=1.4.0", "langchain-community>=0.3.7", "locust>=2.31.5", "sqlite-vec>=0.1.7a2", "pgvector>=0.2.3"]
+all = ["pgvector>=0.2.3", "turbopuffer>=0.5.17", "pg8000>=1.30.3", "psycopg2-binary>=2.9.10", "psycopg2>=2.9.10", "pytest", "pytest-asyncio>=0.24.0", "pexpect>=4.9.0", "black>=24.2.0", "pre-commit>=3.5.0", "pyright>=1.1.347", "pytest-order>=1.2.0", "autoflake>=2.3.0", "isort>=5.13.2", "fastapi>=0.115.6", "uvicorn>=0.24.0.post1", "docker>=7.1.0", "langchain>=0.3.7", "wikipedia>=1.4.0", "langchain-community>=0.3.7", "locust>=2.31.5", "uvloop>=0.21.0; sys_platform != 'win32'", "granian[reload]>=2.3.2", "redis>=6.2.0", "pinecone[asyncio]>=7.3.0", "google-cloud-profiler>=4.1.0"]
+
+[project.scripts]
+letta = "letta.main:app"
+
 [tool.poetry]
 name = "letta"
 version = "0.10.0"
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,10 +1,12 @@
 import logging
+import os
 from datetime import datetime, timezone
 from typing import Generator

 import pytest
 from anthropic.types.beta.messages import BetaMessageBatch, BetaMessageBatchRequestCounts

+from letta.server.db import db_registry
 from letta.services.organization_manager import OrganizationManager
 from letta.services.user_manager import UserManager
 from letta.settings import tool_settings
@@ -14,6 +16,36 @@ def pytest_configure(config):
    logging.basicConfig(level=logging.DEBUG)


+@pytest.fixture(scope="session", autouse=True)
+def disable_db_pooling_for_tests():
+    """Disable database connection pooling for the entire test session."""
+    os.environ["LETTA_DISABLE_SQLALCHEMY_POOLING"] = "true"
+    yield
+    if "LETTA_DISABLE_SQLALCHEMY_POOLING" in os.environ:
+        del os.environ["LETTA_DISABLE_SQLALCHEMY_POOLING"]
+
+
+@pytest.fixture(autouse=True)
+async def cleanup_db_connections():
+    """Cleanup database connections after each test."""
+    yield
+    try:
+        if hasattr(db_registry, "_async_engines"):
+            for engine in db_registry._async_engines.values():
+                if engine:
+                    try:
+                        await engine.dispose()
+                    except Exception:
+                        # Suppress common teardown errors that don't affect test validity
+                        pass
+        db_registry._initialized["async"] = False
+        db_registry._async_engines.clear()
+        db_registry._async_session_factories.clear()
+    except Exception:
+        # Suppress all cleanup errors to avoid confusing test failures
+        pass
+
+
@pytest.fixture
 def disable_e2b_api_key() -> Generator[None, None, None]:
    """
--- a/tests/integration_test_agent_tool_graph.py
+++ b/tests/integration_test_agent_tool_graph.py
@@ -26,13 +26,6 @@ agent_uuid = str(uuid.uuid5(namespace, "test_agent_tool_graph"))
 config_file = "tests/configs/llm_model_configs/openai-gpt-4o.json"


-@pytest.fixture(scope="module")
-def event_loop():
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
-
@pytest.fixture()
 def server():
    config = LettaConfig.load()
--- a/tests/integration_test_async_tool_sandbox.py
+++ b/tests/integration_test_async_tool_sandbox.py
@@ -321,7 +321,7 @@ def tool_with_pip_requirements(test_user):
            import requests

            # Simple usage to verify packages work
-            response = requests.get("https://httpbin.org/json", timeout=5)
+            response = requests.get("https://httpbin.org/json", timeout=30)
            arr = np.array([1, 2, 3])
            return f"Success! Status: {response.status_code}, Array sum: {np.sum(arr)}"
        except ImportError as e:
--- a/tests/integration_test_builtin_tools.py
+++ b/tests/integration_test_builtin_tools.py
@@ -70,7 +70,7 @@ def client(server_url: str) -> Letta:
    yield client_instance


-@pytest.fixture(scope="module")
+@pytest.fixture(scope="function")
 def agent_state(client: Letta) -> AgentState:
    """
    Creates and returns an agent state for testing with a pre-configured agent.
@@ -333,7 +333,7 @@ def test_web_search(
    ], f"Invalid api_key_source: {response_json['api_key_source']}"


-@pytest.mark.asyncio
+@pytest.mark.asyncio(scope="function")
 async def test_web_search_uses_agent_env_var_model():
    """Test that web search uses the model specified in agent tool exec env vars."""

--- a/tests/integration_test_modal_sandbox_v2.py
+++ b/tests/integration_test_modal_sandbox_v2.py
@@ -32,20 +32,6 @@ from letta.services.tool_sandbox.modal_sandbox_v2 import AsyncToolSandboxModalV2
 from letta.services.tool_sandbox.modal_version_manager import ModalVersionManager, get_version_manager
 from letta.services.user_manager import UserManager

-
-@pytest.fixture
-def event_loop():
-    """Create an instance of the default event loop for the test session."""
-    loop = asyncio.new_event_loop()
-    yield loop
-    # Cleanup tasks before closing loop
-    pending = asyncio.all_tasks(loop)
-    for task in pending:
-        task.cancel()
-    loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
-    loop.close()
-
-
 # ============================================================================
 # SHARED FIXTURES
 # ============================================================================
@@ -90,12 +76,12 @@ def basic_tool(test_user):
        source_code="""
 def calculate(operation: str, a: float, b: float) -> float:
    '''Perform a calculation on two numbers.
-    
+
    Args:
        operation: The operation to perform (add, subtract, multiply, divide)
        a: The first number
        b: The second number
-    
+
    Returns:
        float: The result of the calculation
    '''
@@ -145,11 +131,11 @@ import asyncio

 async def fetch_data(url: str, delay: float = 0.1) -> Dict:
    '''Simulate fetching data from a URL.
-    
+
    Args:
        url: The URL to fetch data from
        delay: The delay in seconds before returning
-    
+
    Returns:
        Dict: A dictionary containing the fetched data
    '''
@@ -194,17 +180,17 @@ import hashlib

 def process_json(data: str) -> Dict:
    '''Process JSON data and return metadata.
-    
+
    Args:
        data: The JSON string to process
-    
+
    Returns:
        Dict: Metadata about the JSON data
    '''
    try:
        parsed = json.loads(data)
        data_hash = hashlib.md5(data.encode()).hexdigest()
-        
+
        return {
            "valid": True,
            "keys": list(parsed.keys()) if isinstance(parsed, dict) else None,
--- a/tests/test_agent_serialization_v2.py
+++ b/tests/test_agent_serialization_v2.py
@@ -1,4 +1,3 @@
-import asyncio
 from typing import List, Optional

 import pytest
@@ -36,14 +35,6 @@ from tests.utils import create_tool_from_func
 # ------------------------------


-@pytest.fixture(scope="module")
-def event_loop():
-    """Use a single asyncio loop for the entire test session."""
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
-
 def _clear_tables():
    from letta.server.db import db_context

--- a/tests/test_letta_agent_batch.py
+++ b/tests/test_letta_agent_batch.py
@@ -49,14 +49,6 @@ EXPECTED_ROLES = ["system", "assistant", "tool", "user", "user"]
 # --------------------------------------------------------------------------- #


-@pytest.fixture(scope="module")
-def event_loop():
-    """Use a single asyncio loop for the entire test session."""
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
-
@pytest.fixture
 def weather_tool(server):
    def get_weather(location: str) -> str:
--- a/tests/test_managers.py
+++ b/tests/test_managers.py
--- a/tests/test_plugins.py
+++ b/tests/test_plugins.py
@@ -6,7 +6,7 @@ from letta.settings import settings


@pytest.mark.asyncio
-async def test_default_experimental_decorator(event_loop):
+async def test_default_experimental_decorator():
    settings.plugin_register = "experimental_check=tests.helpers.plugins_helper:is_experimental_okay"

    @experimental("test_just_pass", fallback_function=lambda: False, kwarg1=3)
@@ -18,7 +18,7 @@ async def test_default_experimental_decorator(event_loop):


@pytest.mark.asyncio
-async def test_overwrite_arg_success(event_loop):
+async def test_overwrite_arg_success():
    settings.plugin_register = "experimental_check=tests.helpers.plugins_helper:is_experimental_okay"

    @experimental("test_override_kwarg", fallback_function=lambda *args, **kwargs: False, bool_val=True)
@@ -31,7 +31,7 @@ async def test_overwrite_arg_success(event_loop):


@pytest.mark.asyncio
-async def test_overwrite_arg_fail(event_loop):
+async def test_overwrite_arg_fail():
    # Should fallback to lambda
    settings.plugin_register = "experimental_check=tests.helpers.plugins_helper:is_experimental_okay"

@@ -61,7 +61,7 @@ async def test_overwrite_arg_fail(event_loop):


@pytest.mark.asyncio
-async def test_redis_flag(event_loop):
+async def test_redis_flag():
    settings.plugin_register = "experimental_check=tests.helpers.plugins_helper:is_experimental_okay"

    @experimental("test_redis_flag", fallback_function=lambda *args, **kwargs: _raise())
--- a/tests/test_provider_trace.py
+++ b/tests/test_provider_trace.py
@@ -130,7 +130,7 @@ async def test_provider_trace_experimental_step(message, agent_state, default_us

@pytest.mark.asyncio
@pytest.mark.parametrize("message", ["Get the weather in San Francisco."])
-async def test_provider_trace_experimental_step_stream(message, agent_state, default_user, event_loop):
+async def test_provider_trace_experimental_step_stream(message, agent_state, default_user):
    experimental_agent = LettaAgent(
        agent_id=agent_state.id,
        message_manager=MessageManager(),
@@ -169,7 +169,7 @@ async def test_provider_trace_experimental_step_stream(message, agent_state, def

@pytest.mark.asyncio
@pytest.mark.parametrize("message", ["Get the weather in San Francisco."])
-async def test_provider_trace_step(client, agent_state, default_user, message, event_loop):
+async def test_provider_trace_step(client, agent_state, default_user, message):
    client.agents.messages.create(agent_id=agent_state.id, messages=[])
    response = client.agents.messages.create(
        agent_id=agent_state.id,
@@ -186,7 +186,7 @@ async def test_provider_trace_step(client, agent_state, default_user, message, e

@pytest.mark.asyncio
@pytest.mark.parametrize("message", ["Get the weather in San Francisco."])
-async def test_noop_provider_trace(message, agent_state, default_user, event_loop):
+async def test_noop_provider_trace(message, agent_state, default_user):
    experimental_agent = LettaAgent(
        agent_id=agent_state.id,
        message_manager=MessageManager(),
--- a/tests/test_redis_client.py
+++ b/tests/test_redis_client.py
@@ -4,7 +4,7 @@ from letta.data_sources.redis_client import get_redis_client


@pytest.mark.asyncio
-async def test_redis_client(event_loop):
+async def test_redis_client():
    test_values = {"LETTA_TEST_0": [1, 2, 3], "LETTA_TEST_1": ["apple", "pear", "banana"], "LETTA_TEST_2": ["{}", 3.2, "cat"]}
    redis_client = await get_redis_client()

--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -485,7 +485,7 @@ def test_delete_agent_same_org(server: SyncServer, org_id: str, user: User):


@pytest.mark.asyncio
-async def test_read_local_llm_configs(server: SyncServer, user: User, event_loop):
+async def test_read_local_llm_configs(server: SyncServer, user: User):
    configs_base_dir = os.path.join(os.path.expanduser("~"), ".letta", "llm_configs")
    clean_up_dir = False
    if not os.path.exists(configs_base_dir):
@@ -1016,7 +1016,7 @@ async def test_add_remove_tools_update_agent(server: SyncServer, user_id: str, b


@pytest.mark.asyncio
-async def test_messages_with_provider_override(server: SyncServer, user_id: str, event_loop):
+async def test_messages_with_provider_override(server: SyncServer, user_id: str):
    actor = await server.user_manager.get_actor_or_default_async(actor_id=user_id)
    provider = server.provider_manager.create_provider(
        request=ProviderCreate(
@@ -1096,7 +1096,7 @@ async def test_messages_with_provider_override(server: SyncServer, user_id: str,


@pytest.mark.asyncio
-async def test_unique_handles_for_provider_configs(server: SyncServer, user: User, event_loop):
+async def test_unique_handles_for_provider_configs(server: SyncServer, user: User):
    models = await server.list_llm_models_async(actor=user)
    model_handles = [model.handle for model in models]
    assert sorted(model_handles) == sorted(list(set(model_handles))), "All models should have unique handles"
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,5 +1,3 @@
-import asyncio
-
 import pytest

 from letta.constants import MAX_FILENAME_LENGTH
@@ -522,19 +520,8 @@ def test_line_chunker_only_start_parameter():
 # ---------------------- Alembic Revision TESTS ---------------------- #


-@pytest.fixture(scope="module")
-def event_loop():
-    """
-    Create an event loop for the entire test session.
-    Ensures all async tasks use the same loop, avoiding cross-loop errors.
-    """
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
-
@pytest.mark.asyncio
-async def test_get_latest_alembic_revision(event_loop):
+async def test_get_latest_alembic_revision():
    """Test that get_latest_alembic_revision returns a valid revision ID from the database."""
    from letta.utils import get_latest_alembic_revision

@@ -553,7 +540,7 @@ async def test_get_latest_alembic_revision(event_loop):


@pytest.mark.asyncio
-async def test_get_latest_alembic_revision_consistency(event_loop):
+async def test_get_latest_alembic_revision_consistency():
    """Test that get_latest_alembic_revision returns the same value on multiple calls."""
    from letta.utils import get_latest_alembic_revision

--- a/uv.lock
+++ b/uv.lock