feat: openrouter byok (#9148)

* feat: openrouter byok * new client is unnecessary * revert json diffs
2026-01-28 12:13:13 -08:00
parent d992aa0df4
commit 9ce1249738
13 changed files with 110 additions and 35 deletions
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -36204,6 +36204,7 @@
              "deepseek",
              "xai",
              "zai",
+              "openrouter",
              "chatgpt_oauth"
            ],
            "title": "Model Endpoint Type",
@@ -38696,6 +38697,7 @@
              "deepseek",
              "xai",
              "zai",
+              "openrouter",
              "chatgpt_oauth"
            ],
            "title": "Model Endpoint Type",
@@ -40313,6 +40315,7 @@
          "together",
          "vllm",
          "sglang",
+          "openrouter",
          "xai",
          "zai"
        ],
--- a/letta/adapters/letta_llm_stream_adapter.py
+++ b/letta/adapters/letta_llm_stream_adapter.py
@@ -70,7 +70,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
                run_id=self.run_id,
                step_id=step_id,
            )
-        elif self.llm_config.model_endpoint_type == ProviderType.openai:
+        elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.openrouter]:
            # For non-v1 agents, always use Chat Completions streaming interface
            self.interface = OpenAIStreamingInterface(
                use_assistant_message=use_assistant_message,
--- a/letta/adapters/simple_llm_stream_adapter.py
+++ b/letta/adapters/simple_llm_stream_adapter.py
@@ -84,6 +84,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
        elif self.llm_config.model_endpoint_type in [
            ProviderType.openai,
            ProviderType.deepseek,
+            ProviderType.openrouter,
            ProviderType.zai,
            ProviderType.chatgpt_oauth,
        ]:
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -25,7 +25,7 @@ PROVIDER_ORDER = {
    "xai": 12,
    "lmstudio": 13,
    "zai": 14,
-    "openrouter": 15,  # Note: OpenRouter uses OpenRouterProvider, not a ProviderType enum
+    "openrouter": 15,
 }

 ADMIN_PREFIX = "/v1/admin"
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -167,8 +167,8 @@ def create(
        printd("unsetting function_call because functions is None")
        function_call = None

-    # openai
-    if llm_config.model_endpoint_type == "openai":
+    # openai and openrouter (OpenAI-compatible)
+    if llm_config.model_endpoint_type in ["openai", "openrouter"]:
        if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
            # only is a problem if we are *not* using an openai proxy
            raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
--- a/letta/llm_api/llm_client.py
+++ b/letta/llm_api/llm_client.py
@@ -100,6 +100,14 @@ class LLMClient:
                    put_inner_thoughts_first=put_inner_thoughts_first,
                    actor=actor,
                )
+            case ProviderType.openrouter:
+                # OpenRouter uses OpenAI-compatible API, so we can use the OpenAI client directly
+                from letta.llm_api.openai_client import OpenAIClient
+
+                return OpenAIClient(
+                    put_inner_thoughts_first=put_inner_thoughts_first,
+                    actor=actor,
+                )
            case ProviderType.deepseek:
                from letta.llm_api.deepseek_client import DeepseekClient

--- a/letta/schemas/enums.py
+++ b/letta/schemas/enums.py
@@ -70,6 +70,7 @@ class ProviderType(str, Enum):
    together = "together"
    vllm = "vllm"
    sglang = "sglang"
+    openrouter = "openrouter"
    xai = "xai"
    zai = "zai"

--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -50,6 +50,7 @@ class LLMConfig(BaseModel):
        "deepseek",
        "xai",
        "zai",
+        "openrouter",
        "chatgpt_oauth",
    ] = Field(..., description="The endpoint type for the model.")
    model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
--- a/letta/schemas/model.py
+++ b/letta/schemas/model.py
@@ -49,6 +49,7 @@ class Model(LLMConfig, ModelBase):
        "deepseek",
        "xai",
        "zai",
+        "openrouter",
        "chatgpt_oauth",
    ] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the model.", deprecated=True)
    context_window: int = Field(
--- a/letta/schemas/providers/base.py
+++ b/letta/schemas/providers/base.py
@@ -196,6 +196,7 @@ class Provider(ProviderBase):
            MistralProvider,
            OllamaProvider,
            OpenAIProvider,
+            OpenRouterProvider,
            SGLangProvider,
            TogetherProvider,
            VLLMProvider,
@@ -247,6 +248,8 @@ class Provider(ProviderBase):
                return BedrockProvider(**self.model_dump(exclude_none=True))
            case ProviderType.minimax:
                return MiniMaxProvider(**self.model_dump(exclude_none=True))
+            case ProviderType.openrouter:
+                return OpenRouterProvider(**self.model_dump(exclude_none=True))
            case _:
                raise ValueError(f"Unknown provider type: {self.provider_type}")

--- a/letta/schemas/providers/openai.py
+++ b/letta/schemas/providers/openai.py
@@ -64,12 +64,15 @@ class OpenAIProvider(Provider):
    async def _get_models_async(self) -> list[dict]:
        from letta.llm_api.openai import openai_get_model_list_async

-        # Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
-        # See: https://openrouter.ai/docs/requests
-        extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None
-
-        # Similar to Nebius
-        extra_params = {"verbose": True} if "nebius.com" in self.base_url else None
+        # Provider-specific extra parameters for model listing
+        extra_params = None
+        if "openrouter.ai" in self.base_url:
+            # OpenRouter: filter for models with tool calling support
+            # See: https://openrouter.ai/docs/requests
+            extra_params = {"supported_parameters": "tools"}
+        elif "nebius.com" in self.base_url:
+            # Nebius: use verbose mode for better model info
+            extra_params = {"verbose": True}

        # Decrypt API key before using
        api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
--- a/letta/schemas/providers/openrouter.py
+++ b/letta/schemas/providers/openrouter.py
@@ -1,52 +1,106 @@
 from typing import Literal

+from openai import AsyncOpenAI, AuthenticationError
 from pydantic import Field

-from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
 from letta.log import get_logger
-from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.providers.openai import OpenAIProvider

 logger = get_logger(__name__)

-# ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
-# DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"}
-# DEFAULT_EMBEDDING_BATCH_SIZE = 1024
+# Default context window for models not in the API response
+DEFAULT_CONTEXT_WINDOW = 128000


 class OpenRouterProvider(OpenAIProvider):
-    provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
+    """
+    OpenRouter provider - https://openrouter.ai/
+
+    OpenRouter is an OpenAI-compatible API gateway that provides access to
+    multiple LLM providers (Anthropic, Meta, Mistral, etc.) through a unified API.
+    """
+
+    provider_type: Literal[ProviderType.openrouter] = Field(ProviderType.openrouter, description="The type of the provider.")
    provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
    api_key: str | None = Field(None, description="API key for the OpenRouter API.", deprecated=True)
    base_url: str = Field("https://openrouter.ai/api/v1", description="Base URL for the OpenRouter API.")

-    def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
+    async def check_api_key(self):
+        """Check if the API key is valid by making a test request to the OpenRouter API."""
+        api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
+        if not api_key:
+            raise ValueError("No API key provided")
+
+        try:
+            # Use async OpenAI client pointed at OpenRouter's endpoint
+            client = AsyncOpenAI(api_key=api_key, base_url=self.base_url)
+            # Just list models to verify API key works
+            await client.models.list()
+        except AuthenticationError as e:
+            raise LLMAuthenticationError(message=f"Failed to authenticate with OpenRouter: {e}", code=ErrorCode.UNAUTHENTICATED)
+        except Exception as e:
+            raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
+
+    def get_model_context_window_size(self, model_name: str) -> int | None:
+        """Get the context window size for an OpenRouter model.
+
+        OpenRouter models provide context_length in the API response,
+        so this is mainly a fallback.
        """
-        This handles filtering out LLM Models by provider that meet Letta's requirements.
+        return DEFAULT_CONTEXT_WINDOW
+
+    async def list_llm_models_async(self) -> list[LLMConfig]:
        """
+        Return available OpenRouter models that support tool calling.
+
+        OpenRouter provides a models endpoint that supports filtering by supported_parameters.
+        We filter for models that support 'tools' to ensure Letta compatibility.
+        """
+        from letta.llm_api.openai import openai_get_model_list_async
+
+        api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
+
+        # OpenRouter supports filtering models by supported parameters
+        # See: https://openrouter.ai/docs/requests
+        extra_params = {"supported_parameters": "tools"}
+
+        response = await openai_get_model_list_async(
+            self.base_url,
+            api_key=api_key,
+            extra_params=extra_params,
+        )
+
+        data = response.get("data", response)
+
        configs = []
        for model in data:
-            check = self._do_model_checks_for_name_and_context_size(model)
-            if check is None:
+            if "id" not in model:
+                logger.warning(f"OpenRouter model missing 'id' field: {model}")
                continue
-            model_name, context_window_size = check

-            handle = self.get_handle(model_name)
+            model_name = model["id"]

-            config = LLMConfig(
-                model=model_name,
-                model_endpoint_type="openai",
-                model_endpoint=self.base_url,
-                context_window=context_window_size,
-                handle=handle,
-                max_tokens=self.get_default_max_output_tokens(model_name),
-                provider_name=self.name,
-                provider_category=self.provider_category,
+            # OpenRouter returns context_length in the model listing
+            if "context_length" in model and model["context_length"]:
+                context_window_size = model["context_length"]
+            else:
+                context_window_size = self.get_model_context_window_size(model_name)
+                logger.debug(f"Model {model_name} missing context_length, using default: {context_window_size}")
+
+            configs.append(
+                LLMConfig(
+                    model=model_name,
+                    model_endpoint_type="openrouter",
+                    model_endpoint=self.base_url,
+                    context_window=context_window_size,
+                    handle=self.get_handle(model_name),
+                    max_tokens=self.get_default_max_output_tokens(model_name),
+                    provider_name=self.name,
+                    provider_category=self.provider_category,
+                )
            )

-            config = self._set_model_parameter_tuned_defaults(model_name, config)
-            configs.append(config)
-
        return configs
--- a/letta/services/provider_manager.py
+++ b/letta/services/provider_manager.py
@@ -700,7 +700,7 @@ class ProviderManager:
                        enabled=True,
                        model_endpoint_type=llm_config.model_endpoint_type,
                        max_context_window=llm_config.context_window,
-                        supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek"],
+                        supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek", "openrouter"],
                        supports_tool_calling=True,  # Assume true for LLMs for now
                    )