From 9ce12497383913319f8a64e1f262f1658c193554 Mon Sep 17 00:00:00 2001 From: Ari Webb Date: Wed, 28 Jan 2026 12:13:13 -0800 Subject: [PATCH] feat: openrouter byok (#9148) * feat: openrouter byok * new client is unnecessary * revert json diffs --- fern/openapi.json | 3 + letta/adapters/letta_llm_stream_adapter.py | 2 +- letta/adapters/simple_llm_stream_adapter.py | 1 + letta/constants.py | 2 +- letta/llm_api/llm_api_tools.py | 4 +- letta/llm_api/llm_client.py | 8 ++ letta/schemas/enums.py | 1 + letta/schemas/llm_config.py | 1 + letta/schemas/model.py | 1 + letta/schemas/providers/base.py | 3 + letta/schemas/providers/openai.py | 15 +-- letta/schemas/providers/openrouter.py | 102 +++++++++++++++----- letta/services/provider_manager.py | 2 +- 13 files changed, 110 insertions(+), 35 deletions(-) diff --git a/fern/openapi.json b/fern/openapi.json index 92eac32f..44fbf5ef 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -36204,6 +36204,7 @@ "deepseek", "xai", "zai", + "openrouter", "chatgpt_oauth" ], "title": "Model Endpoint Type", @@ -38696,6 +38697,7 @@ "deepseek", "xai", "zai", + "openrouter", "chatgpt_oauth" ], "title": "Model Endpoint Type", @@ -40313,6 +40315,7 @@ "together", "vllm", "sglang", + "openrouter", "xai", "zai" ], diff --git a/letta/adapters/letta_llm_stream_adapter.py b/letta/adapters/letta_llm_stream_adapter.py index 5e1fa7a6..4ae64e91 100644 --- a/letta/adapters/letta_llm_stream_adapter.py +++ b/letta/adapters/letta_llm_stream_adapter.py @@ -70,7 +70,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter): run_id=self.run_id, step_id=step_id, ) - elif self.llm_config.model_endpoint_type == ProviderType.openai: + elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.openrouter]: # For non-v1 agents, always use Chat Completions streaming interface self.interface = OpenAIStreamingInterface( use_assistant_message=use_assistant_message, diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py index a475c098..2313ff2b 100644 --- a/letta/adapters/simple_llm_stream_adapter.py +++ b/letta/adapters/simple_llm_stream_adapter.py @@ -84,6 +84,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): elif self.llm_config.model_endpoint_type in [ ProviderType.openai, ProviderType.deepseek, + ProviderType.openrouter, ProviderType.zai, ProviderType.chatgpt_oauth, ]: diff --git a/letta/constants.py b/letta/constants.py index 9d9c94fb..a6b53c6d 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -25,7 +25,7 @@ PROVIDER_ORDER = { "xai": 12, "lmstudio": 13, "zai": 14, - "openrouter": 15, # Note: OpenRouter uses OpenRouterProvider, not a ProviderType enum + "openrouter": 15, } ADMIN_PREFIX = "/v1/admin" diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 0bee61a6..280b3eb0 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -167,8 +167,8 @@ def create( printd("unsetting function_call because functions is None") function_call = None - # openai - if llm_config.model_endpoint_type == "openai": + # openai and openrouter (OpenAI-compatible) + if llm_config.model_endpoint_type in ["openai", "openrouter"]: if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1": # only is a problem if we are *not* using an openai proxy raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"]) diff --git a/letta/llm_api/llm_client.py b/letta/llm_api/llm_client.py index c10b67f5..18159743 100644 --- a/letta/llm_api/llm_client.py +++ b/letta/llm_api/llm_client.py @@ -100,6 +100,14 @@ class LLMClient: put_inner_thoughts_first=put_inner_thoughts_first, actor=actor, ) + case ProviderType.openrouter: + # OpenRouter uses OpenAI-compatible API, so we can use the OpenAI client directly + from letta.llm_api.openai_client import OpenAIClient + + return OpenAIClient( + put_inner_thoughts_first=put_inner_thoughts_first, + actor=actor, + ) case ProviderType.deepseek: from letta.llm_api.deepseek_client import DeepseekClient diff --git a/letta/schemas/enums.py b/letta/schemas/enums.py index d4db5c18..96efb446 100644 --- a/letta/schemas/enums.py +++ b/letta/schemas/enums.py @@ -70,6 +70,7 @@ class ProviderType(str, Enum): together = "together" vllm = "vllm" sglang = "sglang" + openrouter = "openrouter" xai = "xai" zai = "zai" diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index f440ff8b..4dbc96b0 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -50,6 +50,7 @@ class LLMConfig(BaseModel): "deepseek", "xai", "zai", + "openrouter", "chatgpt_oauth", ] = Field(..., description="The endpoint type for the model.") model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.") diff --git a/letta/schemas/model.py b/letta/schemas/model.py index 4023c0a0..fa657e5e 100644 --- a/letta/schemas/model.py +++ b/letta/schemas/model.py @@ -49,6 +49,7 @@ class Model(LLMConfig, ModelBase): "deepseek", "xai", "zai", + "openrouter", "chatgpt_oauth", ] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the model.", deprecated=True) context_window: int = Field( diff --git a/letta/schemas/providers/base.py b/letta/schemas/providers/base.py index f24794d6..73e4a239 100644 --- a/letta/schemas/providers/base.py +++ b/letta/schemas/providers/base.py @@ -196,6 +196,7 @@ class Provider(ProviderBase): MistralProvider, OllamaProvider, OpenAIProvider, + OpenRouterProvider, SGLangProvider, TogetherProvider, VLLMProvider, @@ -247,6 +248,8 @@ class Provider(ProviderBase): return BedrockProvider(**self.model_dump(exclude_none=True)) case ProviderType.minimax: return MiniMaxProvider(**self.model_dump(exclude_none=True)) + case ProviderType.openrouter: + return OpenRouterProvider(**self.model_dump(exclude_none=True)) case _: raise ValueError(f"Unknown provider type: {self.provider_type}") diff --git a/letta/schemas/providers/openai.py b/letta/schemas/providers/openai.py index 80cb5b64..a1bdbb26 100644 --- a/letta/schemas/providers/openai.py +++ b/letta/schemas/providers/openai.py @@ -64,12 +64,15 @@ class OpenAIProvider(Provider): async def _get_models_async(self) -> list[dict]: from letta.llm_api.openai import openai_get_model_list_async - # Some hardcoded support for OpenRouter (so that we only get models with tool calling support)... - # See: https://openrouter.ai/docs/requests - extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None - - # Similar to Nebius - extra_params = {"verbose": True} if "nebius.com" in self.base_url else None + # Provider-specific extra parameters for model listing + extra_params = None + if "openrouter.ai" in self.base_url: + # OpenRouter: filter for models with tool calling support + # See: https://openrouter.ai/docs/requests + extra_params = {"supported_parameters": "tools"} + elif "nebius.com" in self.base_url: + # Nebius: use verbose mode for better model info + extra_params = {"verbose": True} # Decrypt API key before using api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None diff --git a/letta/schemas/providers/openrouter.py b/letta/schemas/providers/openrouter.py index 4423b0d5..7f1ba419 100644 --- a/letta/schemas/providers/openrouter.py +++ b/letta/schemas/providers/openrouter.py @@ -1,52 +1,106 @@ from typing import Literal +from openai import AsyncOpenAI, AuthenticationError from pydantic import Field -from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW +from letta.errors import ErrorCode, LLMAuthenticationError, LLMError from letta.log import get_logger -from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.enums import ProviderCategory, ProviderType from letta.schemas.llm_config import LLMConfig from letta.schemas.providers.openai import OpenAIProvider logger = get_logger(__name__) -# ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"} -# DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"} -# DEFAULT_EMBEDDING_BATCH_SIZE = 1024 +# Default context window for models not in the API response +DEFAULT_CONTEXT_WINDOW = 128000 class OpenRouterProvider(OpenAIProvider): - provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.") + """ + OpenRouter provider - https://openrouter.ai/ + + OpenRouter is an OpenAI-compatible API gateway that provides access to + multiple LLM providers (Anthropic, Meta, Mistral, etc.) through a unified API. + """ + + provider_type: Literal[ProviderType.openrouter] = Field(ProviderType.openrouter, description="The type of the provider.") provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)") api_key: str | None = Field(None, description="API key for the OpenRouter API.", deprecated=True) base_url: str = Field("https://openrouter.ai/api/v1", description="Base URL for the OpenRouter API.") - def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]: + async def check_api_key(self): + """Check if the API key is valid by making a test request to the OpenRouter API.""" + api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None + if not api_key: + raise ValueError("No API key provided") + + try: + # Use async OpenAI client pointed at OpenRouter's endpoint + client = AsyncOpenAI(api_key=api_key, base_url=self.base_url) + # Just list models to verify API key works + await client.models.list() + except AuthenticationError as e: + raise LLMAuthenticationError(message=f"Failed to authenticate with OpenRouter: {e}", code=ErrorCode.UNAUTHENTICATED) + except Exception as e: + raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR) + + def get_model_context_window_size(self, model_name: str) -> int | None: + """Get the context window size for an OpenRouter model. + + OpenRouter models provide context_length in the API response, + so this is mainly a fallback. """ - This handles filtering out LLM Models by provider that meet Letta's requirements. + return DEFAULT_CONTEXT_WINDOW + + async def list_llm_models_async(self) -> list[LLMConfig]: """ + Return available OpenRouter models that support tool calling. + + OpenRouter provides a models endpoint that supports filtering by supported_parameters. + We filter for models that support 'tools' to ensure Letta compatibility. + """ + from letta.llm_api.openai import openai_get_model_list_async + + api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None + + # OpenRouter supports filtering models by supported parameters + # See: https://openrouter.ai/docs/requests + extra_params = {"supported_parameters": "tools"} + + response = await openai_get_model_list_async( + self.base_url, + api_key=api_key, + extra_params=extra_params, + ) + + data = response.get("data", response) + configs = [] for model in data: - check = self._do_model_checks_for_name_and_context_size(model) - if check is None: + if "id" not in model: + logger.warning(f"OpenRouter model missing 'id' field: {model}") continue - model_name, context_window_size = check - handle = self.get_handle(model_name) + model_name = model["id"] - config = LLMConfig( - model=model_name, - model_endpoint_type="openai", - model_endpoint=self.base_url, - context_window=context_window_size, - handle=handle, - max_tokens=self.get_default_max_output_tokens(model_name), - provider_name=self.name, - provider_category=self.provider_category, + # OpenRouter returns context_length in the model listing + if "context_length" in model and model["context_length"]: + context_window_size = model["context_length"] + else: + context_window_size = self.get_model_context_window_size(model_name) + logger.debug(f"Model {model_name} missing context_length, using default: {context_window_size}") + + configs.append( + LLMConfig( + model=model_name, + model_endpoint_type="openrouter", + model_endpoint=self.base_url, + context_window=context_window_size, + handle=self.get_handle(model_name), + max_tokens=self.get_default_max_output_tokens(model_name), + provider_name=self.name, + provider_category=self.provider_category, + ) ) - config = self._set_model_parameter_tuned_defaults(model_name, config) - configs.append(config) - return configs diff --git a/letta/services/provider_manager.py b/letta/services/provider_manager.py index 670c9e4a..1556ecef 100644 --- a/letta/services/provider_manager.py +++ b/letta/services/provider_manager.py @@ -700,7 +700,7 @@ class ProviderManager: enabled=True, model_endpoint_type=llm_config.model_endpoint_type, max_context_window=llm_config.context_window, - supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek"], + supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek", "openrouter"], supports_tool_calling=True, # Assume true for LLMs for now )