feat: openrouter byok (#9148)

* feat: openrouter byok

* new client is unnecessary

* revert json diffs
This commit is contained in:
Ari Webb
2026-01-28 12:13:13 -08:00
committed by Caren Thomas
parent d992aa0df4
commit 9ce1249738
13 changed files with 110 additions and 35 deletions

View File

@@ -36204,6 +36204,7 @@
"deepseek",
"xai",
"zai",
"openrouter",
"chatgpt_oauth"
],
"title": "Model Endpoint Type",
@@ -38696,6 +38697,7 @@
"deepseek",
"xai",
"zai",
"openrouter",
"chatgpt_oauth"
],
"title": "Model Endpoint Type",
@@ -40313,6 +40315,7 @@
"together",
"vllm",
"sglang",
"openrouter",
"xai",
"zai"
],

View File

@@ -70,7 +70,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
run_id=self.run_id,
step_id=step_id,
)
elif self.llm_config.model_endpoint_type == ProviderType.openai:
elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.openrouter]:
# For non-v1 agents, always use Chat Completions streaming interface
self.interface = OpenAIStreamingInterface(
use_assistant_message=use_assistant_message,

View File

@@ -84,6 +84,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
elif self.llm_config.model_endpoint_type in [
ProviderType.openai,
ProviderType.deepseek,
ProviderType.openrouter,
ProviderType.zai,
ProviderType.chatgpt_oauth,
]:

View File

@@ -25,7 +25,7 @@ PROVIDER_ORDER = {
"xai": 12,
"lmstudio": 13,
"zai": 14,
"openrouter": 15, # Note: OpenRouter uses OpenRouterProvider, not a ProviderType enum
"openrouter": 15,
}
ADMIN_PREFIX = "/v1/admin"

View File

@@ -167,8 +167,8 @@ def create(
printd("unsetting function_call because functions is None")
function_call = None
# openai
if llm_config.model_endpoint_type == "openai":
# openai and openrouter (OpenAI-compatible)
if llm_config.model_endpoint_type in ["openai", "openrouter"]:
if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
# only is a problem if we are *not* using an openai proxy
raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])

View File

@@ -100,6 +100,14 @@ class LLMClient:
put_inner_thoughts_first=put_inner_thoughts_first,
actor=actor,
)
case ProviderType.openrouter:
# OpenRouter uses OpenAI-compatible API, so we can use the OpenAI client directly
from letta.llm_api.openai_client import OpenAIClient
return OpenAIClient(
put_inner_thoughts_first=put_inner_thoughts_first,
actor=actor,
)
case ProviderType.deepseek:
from letta.llm_api.deepseek_client import DeepseekClient

View File

@@ -70,6 +70,7 @@ class ProviderType(str, Enum):
together = "together"
vllm = "vllm"
sglang = "sglang"
openrouter = "openrouter"
xai = "xai"
zai = "zai"

View File

@@ -50,6 +50,7 @@ class LLMConfig(BaseModel):
"deepseek",
"xai",
"zai",
"openrouter",
"chatgpt_oauth",
] = Field(..., description="The endpoint type for the model.")
model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")

View File

@@ -49,6 +49,7 @@ class Model(LLMConfig, ModelBase):
"deepseek",
"xai",
"zai",
"openrouter",
"chatgpt_oauth",
] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the model.", deprecated=True)
context_window: int = Field(

View File

@@ -196,6 +196,7 @@ class Provider(ProviderBase):
MistralProvider,
OllamaProvider,
OpenAIProvider,
OpenRouterProvider,
SGLangProvider,
TogetherProvider,
VLLMProvider,
@@ -247,6 +248,8 @@ class Provider(ProviderBase):
return BedrockProvider(**self.model_dump(exclude_none=True))
case ProviderType.minimax:
return MiniMaxProvider(**self.model_dump(exclude_none=True))
case ProviderType.openrouter:
return OpenRouterProvider(**self.model_dump(exclude_none=True))
case _:
raise ValueError(f"Unknown provider type: {self.provider_type}")

View File

@@ -64,12 +64,15 @@ class OpenAIProvider(Provider):
async def _get_models_async(self) -> list[dict]:
from letta.llm_api.openai import openai_get_model_list_async
# Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
# See: https://openrouter.ai/docs/requests
extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None
# Similar to Nebius
extra_params = {"verbose": True} if "nebius.com" in self.base_url else None
# Provider-specific extra parameters for model listing
extra_params = None
if "openrouter.ai" in self.base_url:
# OpenRouter: filter for models with tool calling support
# See: https://openrouter.ai/docs/requests
extra_params = {"supported_parameters": "tools"}
elif "nebius.com" in self.base_url:
# Nebius: use verbose mode for better model info
extra_params = {"verbose": True}
# Decrypt API key before using
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None

View File

@@ -1,52 +1,106 @@
from typing import Literal
from openai import AsyncOpenAI, AuthenticationError
from pydantic import Field
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
from letta.log import get_logger
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.providers.openai import OpenAIProvider
logger = get_logger(__name__)
# ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
# DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"}
# DEFAULT_EMBEDDING_BATCH_SIZE = 1024
# Default context window for models not in the API response
DEFAULT_CONTEXT_WINDOW = 128000
class OpenRouterProvider(OpenAIProvider):
provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
"""
OpenRouter provider - https://openrouter.ai/
OpenRouter is an OpenAI-compatible API gateway that provides access to
multiple LLM providers (Anthropic, Meta, Mistral, etc.) through a unified API.
"""
provider_type: Literal[ProviderType.openrouter] = Field(ProviderType.openrouter, description="The type of the provider.")
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
api_key: str | None = Field(None, description="API key for the OpenRouter API.", deprecated=True)
base_url: str = Field("https://openrouter.ai/api/v1", description="Base URL for the OpenRouter API.")
def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
async def check_api_key(self):
"""Check if the API key is valid by making a test request to the OpenRouter API."""
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
if not api_key:
raise ValueError("No API key provided")
try:
# Use async OpenAI client pointed at OpenRouter's endpoint
client = AsyncOpenAI(api_key=api_key, base_url=self.base_url)
# Just list models to verify API key works
await client.models.list()
except AuthenticationError as e:
raise LLMAuthenticationError(message=f"Failed to authenticate with OpenRouter: {e}", code=ErrorCode.UNAUTHENTICATED)
except Exception as e:
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
def get_model_context_window_size(self, model_name: str) -> int | None:
"""Get the context window size for an OpenRouter model.
OpenRouter models provide context_length in the API response,
so this is mainly a fallback.
"""
This handles filtering out LLM Models by provider that meet Letta's requirements.
return DEFAULT_CONTEXT_WINDOW
async def list_llm_models_async(self) -> list[LLMConfig]:
"""
Return available OpenRouter models that support tool calling.
OpenRouter provides a models endpoint that supports filtering by supported_parameters.
We filter for models that support 'tools' to ensure Letta compatibility.
"""
from letta.llm_api.openai import openai_get_model_list_async
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
# OpenRouter supports filtering models by supported parameters
# See: https://openrouter.ai/docs/requests
extra_params = {"supported_parameters": "tools"}
response = await openai_get_model_list_async(
self.base_url,
api_key=api_key,
extra_params=extra_params,
)
data = response.get("data", response)
configs = []
for model in data:
check = self._do_model_checks_for_name_and_context_size(model)
if check is None:
if "id" not in model:
logger.warning(f"OpenRouter model missing 'id' field: {model}")
continue
model_name, context_window_size = check
handle = self.get_handle(model_name)
model_name = model["id"]
config = LLMConfig(
model=model_name,
model_endpoint_type="openai",
model_endpoint=self.base_url,
context_window=context_window_size,
handle=handle,
max_tokens=self.get_default_max_output_tokens(model_name),
provider_name=self.name,
provider_category=self.provider_category,
# OpenRouter returns context_length in the model listing
if "context_length" in model and model["context_length"]:
context_window_size = model["context_length"]
else:
context_window_size = self.get_model_context_window_size(model_name)
logger.debug(f"Model {model_name} missing context_length, using default: {context_window_size}")
configs.append(
LLMConfig(
model=model_name,
model_endpoint_type="openrouter",
model_endpoint=self.base_url,
context_window=context_window_size,
handle=self.get_handle(model_name),
max_tokens=self.get_default_max_output_tokens(model_name),
provider_name=self.name,
provider_category=self.provider_category,
)
)
config = self._set_model_parameter_tuned_defaults(model_name, config)
configs.append(config)
return configs

View File

@@ -700,7 +700,7 @@ class ProviderManager:
enabled=True,
model_endpoint_type=llm_config.model_endpoint_type,
max_context_window=llm_config.context_window,
supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek"],
supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek", "openrouter"],
supports_tool_calling=True, # Assume true for LLMs for now
)