letta-server/tests/integration_test_override_model.py

"""
Integration tests for the override_model functionality.

Tests the ability to send messages to agents using a different model than the agent's default
configured model, without permanently changing the agent's configuration.

Note: Some type: ignore comments are present because the SDK types haven't been regenerated
to include the new override_model parameter yet.
"""

import logging
import os
import threading
import time
import uuid
from typing import Any, Generator, List

import pytest
import requests
from dotenv import load_dotenv
from letta_client import APIError, AsyncLetta, Letta
from letta_client.types import AgentState, MessageCreateParam

logger = logging.getLogger(__name__)

# Test message that forces a simple response
USER_MESSAGE_OTID = str(uuid.uuid4())
USER_MESSAGE_SIMPLE: List[MessageCreateParam] = [
    MessageCreateParam(
        role="user",
        content="This is an automated test. Please respond with exactly: 'Test successful'",
        otid=USER_MESSAGE_OTID,
    )
]


# ------------------------------
# Fixtures
# ------------------------------


@pytest.fixture(scope="module")
def server_url() -> str:
    """
    Provides the URL for the Letta server.
    If LETTA_SERVER_URL is not set, starts the server in a background thread
    and polls until it's accepting connections.
    """

    def _run_server() -> None:
        load_dotenv()
        from letta.server.rest_api.app import start_server

        start_server(debug=True)

    url: str = os.getenv("LETTA_SERVER_URL", "http://localhost:8283")

    if not os.getenv("LETTA_SERVER_URL"):
        thread = threading.Thread(target=_run_server, daemon=True)
        thread.start()

        # Poll until the server is up (or timeout)
        timeout_seconds = 60
        deadline = time.time() + timeout_seconds
        while time.time() < deadline:
            try:
                resp = requests.get(url + "/v1/health")
                if resp.status_code < 500:
                    break
            except requests.exceptions.RequestException:
                pass
            time.sleep(0.1)
        else:
            raise RuntimeError(f"Could not reach {url} within {timeout_seconds}s")

    return url


@pytest.fixture(scope="module")
def client(server_url: str) -> Generator[Letta, None, None]:
    """
    Creates and returns a synchronous Letta REST client for testing.
    """
    client_instance = Letta(base_url=server_url)
    yield client_instance


@pytest.fixture(scope="function")
def async_client(server_url: str) -> Generator[AsyncLetta, None, None]:
    """
    Creates and returns an asynchronous Letta REST client for testing.
    """
    async_client_instance = AsyncLetta(base_url=server_url)
    yield async_client_instance


@pytest.fixture(scope="function")
def agent_with_gpt4o_mini(client: Letta) -> Generator[AgentState, None, None]:
    """
    Creates an agent configured with gpt-4o-mini for testing model override.
    """
    agent_state = client.agents.create(
        name=f"override_model_test_{uuid.uuid4().hex[:8]}",
        model="openai/gpt-4o-mini",
        embedding="openai/text-embedding-3-small",
        tags=["override_model_test"],
        memory_blocks=[
            {"label": "human", "value": "Test user"},
            {"label": "persona", "value": "You are a helpful assistant."},
        ],
    )
    yield agent_state

    # Cleanup
    try:
        client.agents.delete(agent_state.id)
    except Exception as e:
        logger.warning(f"Failed to delete agent {agent_state.id}: {e}")


@pytest.fixture(scope="function")
def agent_with_gemini(client: Letta) -> Generator[AgentState, None, None]:
    """
    Creates an agent configured with Gemini for testing model override.
    """
    agent_state = client.agents.create(
        name=f"override_model_test_gemini_{uuid.uuid4().hex[:8]}",
        model="google_ai/gemini-2.0-flash",
        embedding="openai/text-embedding-3-small",
        tags=["override_model_test"],
        memory_blocks=[
            {"label": "human", "value": "Test user"},
            {"label": "persona", "value": "You are a helpful assistant."},
        ],
    )
    yield agent_state

    # Cleanup
    try:
        client.agents.delete(agent_state.id)
    except Exception as e:
        logger.warning(f"Failed to delete agent {agent_state.id}: {e}")


# ------------------------------
# Test Cases
# ------------------------------


class TestOverrideModelSync:
    """Tests for override_model with synchronous message sending."""

    def test_override_model_changes_model_used(
        self,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test that override_model causes the message to be processed by a different model.
        Agent is configured with gpt-4o-mini, but we override with gpt-4o.
        """
        agent = agent_with_gpt4o_mini

        # Verify agent's default model
        assert agent.model is not None
        assert "gpt-4o-mini" in agent.model

        # Send message with override model
        response = client.agents.messages.create(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
            extra_body={"override_model": "openai/gpt-4o"},
        )

        # Verify we got a response
        assert response.messages is not None
        assert len(response.messages) > 0

        # Verify agent's model was not permanently changed
        agent_after = client.agents.retrieve(agent.id)
        assert agent_after.model is not None
        assert "gpt-4o-mini" in agent_after.model, "Agent's model should not be permanently changed"

    def test_override_model_cross_provider(
        self,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test overriding from one provider to another (OpenAI -> Google AI).
        """
        agent = agent_with_gpt4o_mini

        # Verify agent's default model is OpenAI
        assert agent.model is not None
        assert "openai" in agent.model.lower() or "gpt" in agent.model.lower()

        # Send message with Google AI model override
        response = client.agents.messages.create(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
            extra_body={"override_model": "google_ai/gemini-2.0-flash"},
        )

        # Verify we got a response
        assert response.messages is not None
        assert len(response.messages) > 0

        # Verify agent's model was not permanently changed
        agent_after = client.agents.retrieve(agent.id)
        assert agent_after.model is not None
        assert "gpt-4o-mini" in agent_after.model, "Agent's model should not be permanently changed"

    def test_override_model_with_none_uses_default(
        self,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test that not setting override_model (None) uses the agent's default model.
        """
        agent = agent_with_gpt4o_mini

        # Send message without override_model
        response = client.agents.messages.create(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
        )

        # Verify we got a response
        assert response.messages is not None
        assert len(response.messages) > 0

    def test_override_model_invalid_handle(
        self,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test that an invalid override_model handle raises an appropriate error.
        """
        agent = agent_with_gpt4o_mini

        with pytest.raises(APIError) as exc_info:
            client.agents.messages.create(
                agent_id=agent.id,
                messages=USER_MESSAGE_SIMPLE,
                extra_body={"override_model": "invalid/nonexistent-model-xyz"},
            )

        # Verify the error is related to the invalid model handle
        # The error could be a 400, 404, or 422 depending on implementation
        error = exc_info.value
        # APIError should have status_code attribute
        assert hasattr(error, "status_code") and error.status_code in [400, 404, 422]  # type: ignore[attr-defined]


class TestOverrideModelStreaming:
    """Tests for override_model with streaming message sending."""

    def test_override_model_streaming(
        self,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test that override_model works correctly with streaming enabled.
        """
        agent = agent_with_gpt4o_mini

        # Send message with streaming and override model
        # Note: Using messages.create with streaming=True, not create_stream
        response = client.agents.messages.create(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
            extra_body={"override_model": "openai/gpt-4o"},
            streaming=True,
        )

        # For streaming, the response object should still have messages
        # (they're accumulated from the stream internally)
        assert response is not None

        # Verify agent's model was not permanently changed
        agent_after = client.agents.retrieve(agent.id)
        assert agent_after.model is not None
        assert "gpt-4o-mini" in agent_after.model

    def test_override_model_streaming_cross_provider(
        self,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test streaming with cross-provider model override (OpenAI -> Google AI).
        """
        agent = agent_with_gpt4o_mini

        # Send message with streaming and Google AI override
        response = client.agents.messages.create(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
            extra_body={"override_model": "google_ai/gemini-2.0-flash"},
            streaming=True,
        )

        # Verify we got a response
        assert response is not None


class TestOverrideModelAsync:
    """Tests for override_model with async message sending."""

    @pytest.mark.asyncio
    async def test_override_model_async(
        self,
        async_client: AsyncLetta,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test that override_model works correctly with async message sending.
        """
        agent = agent_with_gpt4o_mini

        # Send message asynchronously with override model
        run = await async_client.agents.messages.create_async(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
            extra_body={"override_model": "openai/gpt-4o"},
        )

        # Verify we got a run object
        assert run is not None
        assert run.id is not None

        # Wait for the run to complete (poll status)
        max_wait = 60  # seconds
        poll_interval = 1  # second
        elapsed = 0
        run_status = None

        while elapsed < max_wait:
            run_status = client.runs.retrieve(run.id)
            if run_status.status in ["completed", "failed", "cancelled"]:
                break
            time.sleep(poll_interval)
            elapsed += poll_interval

        # Verify run completed
        assert run_status is not None
        assert run_status.status == "completed", f"Run failed with status: {run_status.status}"

        # Verify agent's model was not permanently changed
        agent_after = client.agents.retrieve(agent.id)
        assert agent_after.model is not None
        assert "gpt-4o-mini" in agent_after.model


class TestOverrideModelConversation:
    """Tests for override_model with conversation-based messaging."""

    def test_override_model_conversation(
        self,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test that override_model works correctly with conversation endpoints.
        """
        agent = agent_with_gpt4o_mini

        # Create a conversation
        conversation = client.conversations.create(agent_id=agent.id)
        assert conversation is not None
        assert conversation.id is not None

        # Send message through conversation with override model
        response = client.conversations.messages.create(
            conversation_id=conversation.id,
            messages=USER_MESSAGE_SIMPLE,
            extra_body={"override_model": "openai/gpt-4o"},
        )

        # Verify we got a response
        assert response is not None

        # Verify agent's model was not permanently changed
        agent_after = client.agents.retrieve(agent.id)
        assert agent_after.model is not None
        assert "gpt-4o-mini" in agent_after.model


class TestOverrideModelConsistency:
    """Tests to ensure override_model doesn't affect agent state persistently."""

    def test_multiple_override_models_in_sequence(
        self,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test sending multiple messages with different override models.
        Agent's default model should remain unchanged throughout.
        """
        agent = agent_with_gpt4o_mini
        original_model = agent.model

        # First message with gpt-4o
        response1 = client.agents.messages.create(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
            extra_body={"override_model": "openai/gpt-4o"},
        )
        assert response1.messages is not None

        # Second message with Gemini
        response2 = client.agents.messages.create(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
            extra_body={"override_model": "google_ai/gemini-2.0-flash"},
        )
        assert response2.messages is not None

        # Third message without override (should use default)
        response3 = client.agents.messages.create(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
        )
        assert response3.messages is not None

        # Verify agent's model is still the original
        agent_after = client.agents.retrieve(agent.id)
        assert agent_after.model == original_model

    def test_override_model_does_not_modify_agent_state(
        self,
        client: Letta,
        agent_with_gpt4o_mini: AgentState,
    ) -> None:
        """
        Test that using override_model doesn't modify any part of the agent state.
        """
        agent = agent_with_gpt4o_mini

        # Get full agent state before
        agent_before = client.agents.retrieve(agent.id)

        # Send message with override
        response = client.agents.messages.create(
            agent_id=agent.id,
            messages=USER_MESSAGE_SIMPLE,
            extra_body={"override_model": "openai/gpt-4o"},
        )
        assert response.messages is not None

        # Get full agent state after
        agent_after = client.agents.retrieve(agent.id)

        # Verify key fields are unchanged
        assert agent_after.model == agent_before.model
        assert agent_after.name == agent_before.name
        assert agent_after.agent_type == agent_before.agent_type