From 9ce12497383913319f8a64e1f262f1658c193554 Mon Sep 17 00:00:00 2001
From: Ari Webb <arijwebb@gmail.com>
Date: Wed, 28 Jan 2026 12:13:13 -0800
Subject: [PATCH] feat: openrouter byok (#9148)

* feat: openrouter byok

* new client is unnecessary

* revert json diffs
---
 fern/openapi.json                           |   3 +
 letta/adapters/letta_llm_stream_adapter.py  |   2 +-
 letta/adapters/simple_llm_stream_adapter.py |   1 +
 letta/constants.py                          |   2 +-
 letta/llm_api/llm_api_tools.py              |   4 +-
 letta/llm_api/llm_client.py                 |   8 ++
 letta/schemas/enums.py                      |   1 +
 letta/schemas/llm_config.py                 |   1 +
 letta/schemas/model.py                      |   1 +
 letta/schemas/providers/base.py             |   3 +
 letta/schemas/providers/openai.py           |  15 +--
 letta/schemas/providers/openrouter.py       | 102 +++++++++++++++-----
 letta/services/provider_manager.py          |   2 +-
 13 files changed, 110 insertions(+), 35 deletions(-)

diff --git a/fern/openapi.json b/fern/openapi.json
index 92eac32f..44fbf5ef 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -36204,6 +36204,7 @@
               "deepseek",
               "xai",
               "zai",
+              "openrouter",
               "chatgpt_oauth"
             ],
             "title": "Model Endpoint Type",
@@ -38696,6 +38697,7 @@
               "deepseek",
               "xai",
               "zai",
+              "openrouter",
               "chatgpt_oauth"
             ],
             "title": "Model Endpoint Type",
@@ -40313,6 +40315,7 @@
           "together",
           "vllm",
           "sglang",
+          "openrouter",
           "xai",
           "zai"
         ],
diff --git a/letta/adapters/letta_llm_stream_adapter.py b/letta/adapters/letta_llm_stream_adapter.py
index 5e1fa7a6..4ae64e91 100644
--- a/letta/adapters/letta_llm_stream_adapter.py
+++ b/letta/adapters/letta_llm_stream_adapter.py
@@ -70,7 +70,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
                 run_id=self.run_id,
                 step_id=step_id,
             )
-        elif self.llm_config.model_endpoint_type == ProviderType.openai:
+        elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.openrouter]:
             # For non-v1 agents, always use Chat Completions streaming interface
             self.interface = OpenAIStreamingInterface(
                 use_assistant_message=use_assistant_message,
diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py
index a475c098..2313ff2b 100644
--- a/letta/adapters/simple_llm_stream_adapter.py
+++ b/letta/adapters/simple_llm_stream_adapter.py
@@ -84,6 +84,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
         elif self.llm_config.model_endpoint_type in [
             ProviderType.openai,
             ProviderType.deepseek,
+            ProviderType.openrouter,
             ProviderType.zai,
             ProviderType.chatgpt_oauth,
         ]:
diff --git a/letta/constants.py b/letta/constants.py
index 9d9c94fb..a6b53c6d 100644
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -25,7 +25,7 @@ PROVIDER_ORDER = {
     "xai": 12,
     "lmstudio": 13,
     "zai": 14,
-    "openrouter": 15,  # Note: OpenRouter uses OpenRouterProvider, not a ProviderType enum
+    "openrouter": 15,
 }
 
 ADMIN_PREFIX = "/v1/admin"
diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py
index 0bee61a6..280b3eb0 100644
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -167,8 +167,8 @@ def create(
         printd("unsetting function_call because functions is None")
         function_call = None
 
-    # openai
-    if llm_config.model_endpoint_type == "openai":
+    # openai and openrouter (OpenAI-compatible)
+    if llm_config.model_endpoint_type in ["openai", "openrouter"]:
         if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
             # only is a problem if we are *not* using an openai proxy
             raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
diff --git a/letta/llm_api/llm_client.py b/letta/llm_api/llm_client.py
index c10b67f5..18159743 100644
--- a/letta/llm_api/llm_client.py
+++ b/letta/llm_api/llm_client.py
@@ -100,6 +100,14 @@ class LLMClient:
                     put_inner_thoughts_first=put_inner_thoughts_first,
                     actor=actor,
                 )
+            case ProviderType.openrouter:
+                # OpenRouter uses OpenAI-compatible API, so we can use the OpenAI client directly
+                from letta.llm_api.openai_client import OpenAIClient
+
+                return OpenAIClient(
+                    put_inner_thoughts_first=put_inner_thoughts_first,
+                    actor=actor,
+                )
             case ProviderType.deepseek:
                 from letta.llm_api.deepseek_client import DeepseekClient
 
diff --git a/letta/schemas/enums.py b/letta/schemas/enums.py
index d4db5c18..96efb446 100644
--- a/letta/schemas/enums.py
+++ b/letta/schemas/enums.py
@@ -70,6 +70,7 @@ class ProviderType(str, Enum):
     together = "together"
     vllm = "vllm"
     sglang = "sglang"
+    openrouter = "openrouter"
     xai = "xai"
     zai = "zai"
 
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index f440ff8b..4dbc96b0 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -50,6 +50,7 @@ class LLMConfig(BaseModel):
         "deepseek",
         "xai",
         "zai",
+        "openrouter",
         "chatgpt_oauth",
     ] = Field(..., description="The endpoint type for the model.")
     model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
diff --git a/letta/schemas/model.py b/letta/schemas/model.py
index 4023c0a0..fa657e5e 100644
--- a/letta/schemas/model.py
+++ b/letta/schemas/model.py
@@ -49,6 +49,7 @@ class Model(LLMConfig, ModelBase):
         "deepseek",
         "xai",
         "zai",
+        "openrouter",
         "chatgpt_oauth",
     ] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the model.", deprecated=True)
     context_window: int = Field(
diff --git a/letta/schemas/providers/base.py b/letta/schemas/providers/base.py
index f24794d6..73e4a239 100644
--- a/letta/schemas/providers/base.py
+++ b/letta/schemas/providers/base.py
@@ -196,6 +196,7 @@ class Provider(ProviderBase):
             MistralProvider,
             OllamaProvider,
             OpenAIProvider,
+            OpenRouterProvider,
             SGLangProvider,
             TogetherProvider,
             VLLMProvider,
@@ -247,6 +248,8 @@ class Provider(ProviderBase):
                 return BedrockProvider(**self.model_dump(exclude_none=True))
             case ProviderType.minimax:
                 return MiniMaxProvider(**self.model_dump(exclude_none=True))
+            case ProviderType.openrouter:
+                return OpenRouterProvider(**self.model_dump(exclude_none=True))
             case _:
                 raise ValueError(f"Unknown provider type: {self.provider_type}")
 
diff --git a/letta/schemas/providers/openai.py b/letta/schemas/providers/openai.py
index 80cb5b64..a1bdbb26 100644
--- a/letta/schemas/providers/openai.py
+++ b/letta/schemas/providers/openai.py
@@ -64,12 +64,15 @@ class OpenAIProvider(Provider):
     async def _get_models_async(self) -> list[dict]:
         from letta.llm_api.openai import openai_get_model_list_async
 
-        # Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
-        # See: https://openrouter.ai/docs/requests
-        extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None
-
-        # Similar to Nebius
-        extra_params = {"verbose": True} if "nebius.com" in self.base_url else None
+        # Provider-specific extra parameters for model listing
+        extra_params = None
+        if "openrouter.ai" in self.base_url:
+            # OpenRouter: filter for models with tool calling support
+            # See: https://openrouter.ai/docs/requests
+            extra_params = {"supported_parameters": "tools"}
+        elif "nebius.com" in self.base_url:
+            # Nebius: use verbose mode for better model info
+            extra_params = {"verbose": True}
 
         # Decrypt API key before using
         api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
diff --git a/letta/schemas/providers/openrouter.py b/letta/schemas/providers/openrouter.py
index 4423b0d5..7f1ba419 100644
--- a/letta/schemas/providers/openrouter.py
+++ b/letta/schemas/providers/openrouter.py
@@ -1,52 +1,106 @@
 from typing import Literal
 
+from openai import AsyncOpenAI, AuthenticationError
 from pydantic import Field
 
-from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
+from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
 from letta.log import get_logger
-from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.providers.openai import OpenAIProvider
 
 logger = get_logger(__name__)
 
-# ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
-# DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"}
-# DEFAULT_EMBEDDING_BATCH_SIZE = 1024
+# Default context window for models not in the API response
+DEFAULT_CONTEXT_WINDOW = 128000
 
 
 class OpenRouterProvider(OpenAIProvider):
-    provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
+    """
+    OpenRouter provider - https://openrouter.ai/
+
+    OpenRouter is an OpenAI-compatible API gateway that provides access to
+    multiple LLM providers (Anthropic, Meta, Mistral, etc.) through a unified API.
+    """
+
+    provider_type: Literal[ProviderType.openrouter] = Field(ProviderType.openrouter, description="The type of the provider.")
     provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
     api_key: str | None = Field(None, description="API key for the OpenRouter API.", deprecated=True)
     base_url: str = Field("https://openrouter.ai/api/v1", description="Base URL for the OpenRouter API.")
 
-    def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
+    async def check_api_key(self):
+        """Check if the API key is valid by making a test request to the OpenRouter API."""
+        api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
+        if not api_key:
+            raise ValueError("No API key provided")
+
+        try:
+            # Use async OpenAI client pointed at OpenRouter's endpoint
+            client = AsyncOpenAI(api_key=api_key, base_url=self.base_url)
+            # Just list models to verify API key works
+            await client.models.list()
+        except AuthenticationError as e:
+            raise LLMAuthenticationError(message=f"Failed to authenticate with OpenRouter: {e}", code=ErrorCode.UNAUTHENTICATED)
+        except Exception as e:
+            raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
+
+    def get_model_context_window_size(self, model_name: str) -> int | None:
+        """Get the context window size for an OpenRouter model.
+
+        OpenRouter models provide context_length in the API response,
+        so this is mainly a fallback.
         """
-        This handles filtering out LLM Models by provider that meet Letta's requirements.
+        return DEFAULT_CONTEXT_WINDOW
+
+    async def list_llm_models_async(self) -> list[LLMConfig]:
         """
+        Return available OpenRouter models that support tool calling.
+
+        OpenRouter provides a models endpoint that supports filtering by supported_parameters.
+        We filter for models that support 'tools' to ensure Letta compatibility.
+        """
+        from letta.llm_api.openai import openai_get_model_list_async
+
+        api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
+
+        # OpenRouter supports filtering models by supported parameters
+        # See: https://openrouter.ai/docs/requests
+        extra_params = {"supported_parameters": "tools"}
+
+        response = await openai_get_model_list_async(
+            self.base_url,
+            api_key=api_key,
+            extra_params=extra_params,
+        )
+
+        data = response.get("data", response)
+
         configs = []
         for model in data:
-            check = self._do_model_checks_for_name_and_context_size(model)
-            if check is None:
+            if "id" not in model:
+                logger.warning(f"OpenRouter model missing 'id' field: {model}")
                 continue
-            model_name, context_window_size = check
 
-            handle = self.get_handle(model_name)
+            model_name = model["id"]
 
-            config = LLMConfig(
-                model=model_name,
-                model_endpoint_type="openai",
-                model_endpoint=self.base_url,
-                context_window=context_window_size,
-                handle=handle,
-                max_tokens=self.get_default_max_output_tokens(model_name),
-                provider_name=self.name,
-                provider_category=self.provider_category,
+            # OpenRouter returns context_length in the model listing
+            if "context_length" in model and model["context_length"]:
+                context_window_size = model["context_length"]
+            else:
+                context_window_size = self.get_model_context_window_size(model_name)
+                logger.debug(f"Model {model_name} missing context_length, using default: {context_window_size}")
+
+            configs.append(
+                LLMConfig(
+                    model=model_name,
+                    model_endpoint_type="openrouter",
+                    model_endpoint=self.base_url,
+                    context_window=context_window_size,
+                    handle=self.get_handle(model_name),
+                    max_tokens=self.get_default_max_output_tokens(model_name),
+                    provider_name=self.name,
+                    provider_category=self.provider_category,
+                )
             )
 
-            config = self._set_model_parameter_tuned_defaults(model_name, config)
-            configs.append(config)
-
         return configs
diff --git a/letta/services/provider_manager.py b/letta/services/provider_manager.py
index 670c9e4a..1556ecef 100644
--- a/letta/services/provider_manager.py
+++ b/letta/services/provider_manager.py
@@ -700,7 +700,7 @@ class ProviderManager:
                         enabled=True,
                         model_endpoint_type=llm_config.model_endpoint_type,
                         max_context_window=llm_config.context_window,
-                        supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek"],
+                        supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek", "openrouter"],
                         supports_tool_calling=True,  # Assume true for LLMs for now
                     )