feat: openrouter byok (#9148)
* feat: openrouter byok * new client is unnecessary * revert json diffs
This commit is contained in:
@@ -36204,6 +36204,7 @@
|
||||
"deepseek",
|
||||
"xai",
|
||||
"zai",
|
||||
"openrouter",
|
||||
"chatgpt_oauth"
|
||||
],
|
||||
"title": "Model Endpoint Type",
|
||||
@@ -38696,6 +38697,7 @@
|
||||
"deepseek",
|
||||
"xai",
|
||||
"zai",
|
||||
"openrouter",
|
||||
"chatgpt_oauth"
|
||||
],
|
||||
"title": "Model Endpoint Type",
|
||||
@@ -40313,6 +40315,7 @@
|
||||
"together",
|
||||
"vllm",
|
||||
"sglang",
|
||||
"openrouter",
|
||||
"xai",
|
||||
"zai"
|
||||
],
|
||||
|
||||
@@ -70,7 +70,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
||||
run_id=self.run_id,
|
||||
step_id=step_id,
|
||||
)
|
||||
elif self.llm_config.model_endpoint_type == ProviderType.openai:
|
||||
elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.openrouter]:
|
||||
# For non-v1 agents, always use Chat Completions streaming interface
|
||||
self.interface = OpenAIStreamingInterface(
|
||||
use_assistant_message=use_assistant_message,
|
||||
|
||||
@@ -84,6 +84,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
|
||||
elif self.llm_config.model_endpoint_type in [
|
||||
ProviderType.openai,
|
||||
ProviderType.deepseek,
|
||||
ProviderType.openrouter,
|
||||
ProviderType.zai,
|
||||
ProviderType.chatgpt_oauth,
|
||||
]:
|
||||
|
||||
@@ -25,7 +25,7 @@ PROVIDER_ORDER = {
|
||||
"xai": 12,
|
||||
"lmstudio": 13,
|
||||
"zai": 14,
|
||||
"openrouter": 15, # Note: OpenRouter uses OpenRouterProvider, not a ProviderType enum
|
||||
"openrouter": 15,
|
||||
}
|
||||
|
||||
ADMIN_PREFIX = "/v1/admin"
|
||||
|
||||
@@ -167,8 +167,8 @@ def create(
|
||||
printd("unsetting function_call because functions is None")
|
||||
function_call = None
|
||||
|
||||
# openai
|
||||
if llm_config.model_endpoint_type == "openai":
|
||||
# openai and openrouter (OpenAI-compatible)
|
||||
if llm_config.model_endpoint_type in ["openai", "openrouter"]:
|
||||
if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
|
||||
# only is a problem if we are *not* using an openai proxy
|
||||
raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
|
||||
|
||||
@@ -100,6 +100,14 @@ class LLMClient:
|
||||
put_inner_thoughts_first=put_inner_thoughts_first,
|
||||
actor=actor,
|
||||
)
|
||||
case ProviderType.openrouter:
|
||||
# OpenRouter uses OpenAI-compatible API, so we can use the OpenAI client directly
|
||||
from letta.llm_api.openai_client import OpenAIClient
|
||||
|
||||
return OpenAIClient(
|
||||
put_inner_thoughts_first=put_inner_thoughts_first,
|
||||
actor=actor,
|
||||
)
|
||||
case ProviderType.deepseek:
|
||||
from letta.llm_api.deepseek_client import DeepseekClient
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ class ProviderType(str, Enum):
|
||||
together = "together"
|
||||
vllm = "vllm"
|
||||
sglang = "sglang"
|
||||
openrouter = "openrouter"
|
||||
xai = "xai"
|
||||
zai = "zai"
|
||||
|
||||
|
||||
@@ -50,6 +50,7 @@ class LLMConfig(BaseModel):
|
||||
"deepseek",
|
||||
"xai",
|
||||
"zai",
|
||||
"openrouter",
|
||||
"chatgpt_oauth",
|
||||
] = Field(..., description="The endpoint type for the model.")
|
||||
model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
|
||||
|
||||
@@ -49,6 +49,7 @@ class Model(LLMConfig, ModelBase):
|
||||
"deepseek",
|
||||
"xai",
|
||||
"zai",
|
||||
"openrouter",
|
||||
"chatgpt_oauth",
|
||||
] = Field(..., description="Deprecated: Use 'provider_type' field instead. The endpoint type for the model.", deprecated=True)
|
||||
context_window: int = Field(
|
||||
|
||||
@@ -196,6 +196,7 @@ class Provider(ProviderBase):
|
||||
MistralProvider,
|
||||
OllamaProvider,
|
||||
OpenAIProvider,
|
||||
OpenRouterProvider,
|
||||
SGLangProvider,
|
||||
TogetherProvider,
|
||||
VLLMProvider,
|
||||
@@ -247,6 +248,8 @@ class Provider(ProviderBase):
|
||||
return BedrockProvider(**self.model_dump(exclude_none=True))
|
||||
case ProviderType.minimax:
|
||||
return MiniMaxProvider(**self.model_dump(exclude_none=True))
|
||||
case ProviderType.openrouter:
|
||||
return OpenRouterProvider(**self.model_dump(exclude_none=True))
|
||||
case _:
|
||||
raise ValueError(f"Unknown provider type: {self.provider_type}")
|
||||
|
||||
|
||||
@@ -64,12 +64,15 @@ class OpenAIProvider(Provider):
|
||||
async def _get_models_async(self) -> list[dict]:
|
||||
from letta.llm_api.openai import openai_get_model_list_async
|
||||
|
||||
# Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
|
||||
# Provider-specific extra parameters for model listing
|
||||
extra_params = None
|
||||
if "openrouter.ai" in self.base_url:
|
||||
# OpenRouter: filter for models with tool calling support
|
||||
# See: https://openrouter.ai/docs/requests
|
||||
extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None
|
||||
|
||||
# Similar to Nebius
|
||||
extra_params = {"verbose": True} if "nebius.com" in self.base_url else None
|
||||
extra_params = {"supported_parameters": "tools"}
|
||||
elif "nebius.com" in self.base_url:
|
||||
# Nebius: use verbose mode for better model info
|
||||
extra_params = {"verbose": True}
|
||||
|
||||
# Decrypt API key before using
|
||||
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
|
||||
|
||||
@@ -1,52 +1,106 @@
|
||||
from typing import Literal
|
||||
|
||||
from openai import AsyncOpenAI, AuthenticationError
|
||||
from pydantic import Field
|
||||
|
||||
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
|
||||
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
|
||||
from letta.log import get_logger
|
||||
from letta.schemas.embedding_config import EmbeddingConfig
|
||||
from letta.schemas.enums import ProviderCategory, ProviderType
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.providers.openai import OpenAIProvider
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
|
||||
# DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro", "chat"}
|
||||
# DEFAULT_EMBEDDING_BATCH_SIZE = 1024
|
||||
# Default context window for models not in the API response
|
||||
DEFAULT_CONTEXT_WINDOW = 128000
|
||||
|
||||
|
||||
class OpenRouterProvider(OpenAIProvider):
|
||||
provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
|
||||
"""
|
||||
OpenRouter provider - https://openrouter.ai/
|
||||
|
||||
OpenRouter is an OpenAI-compatible API gateway that provides access to
|
||||
multiple LLM providers (Anthropic, Meta, Mistral, etc.) through a unified API.
|
||||
"""
|
||||
|
||||
provider_type: Literal[ProviderType.openrouter] = Field(ProviderType.openrouter, description="The type of the provider.")
|
||||
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
||||
api_key: str | None = Field(None, description="API key for the OpenRouter API.", deprecated=True)
|
||||
base_url: str = Field("https://openrouter.ai/api/v1", description="Base URL for the OpenRouter API.")
|
||||
|
||||
def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
|
||||
async def check_api_key(self):
|
||||
"""Check if the API key is valid by making a test request to the OpenRouter API."""
|
||||
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
|
||||
if not api_key:
|
||||
raise ValueError("No API key provided")
|
||||
|
||||
try:
|
||||
# Use async OpenAI client pointed at OpenRouter's endpoint
|
||||
client = AsyncOpenAI(api_key=api_key, base_url=self.base_url)
|
||||
# Just list models to verify API key works
|
||||
await client.models.list()
|
||||
except AuthenticationError as e:
|
||||
raise LLMAuthenticationError(message=f"Failed to authenticate with OpenRouter: {e}", code=ErrorCode.UNAUTHENTICATED)
|
||||
except Exception as e:
|
||||
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
|
||||
|
||||
def get_model_context_window_size(self, model_name: str) -> int | None:
|
||||
"""Get the context window size for an OpenRouter model.
|
||||
|
||||
OpenRouter models provide context_length in the API response,
|
||||
so this is mainly a fallback.
|
||||
"""
|
||||
This handles filtering out LLM Models by provider that meet Letta's requirements.
|
||||
return DEFAULT_CONTEXT_WINDOW
|
||||
|
||||
async def list_llm_models_async(self) -> list[LLMConfig]:
|
||||
"""
|
||||
Return available OpenRouter models that support tool calling.
|
||||
|
||||
OpenRouter provides a models endpoint that supports filtering by supported_parameters.
|
||||
We filter for models that support 'tools' to ensure Letta compatibility.
|
||||
"""
|
||||
from letta.llm_api.openai import openai_get_model_list_async
|
||||
|
||||
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
|
||||
|
||||
# OpenRouter supports filtering models by supported parameters
|
||||
# See: https://openrouter.ai/docs/requests
|
||||
extra_params = {"supported_parameters": "tools"}
|
||||
|
||||
response = await openai_get_model_list_async(
|
||||
self.base_url,
|
||||
api_key=api_key,
|
||||
extra_params=extra_params,
|
||||
)
|
||||
|
||||
data = response.get("data", response)
|
||||
|
||||
configs = []
|
||||
for model in data:
|
||||
check = self._do_model_checks_for_name_and_context_size(model)
|
||||
if check is None:
|
||||
if "id" not in model:
|
||||
logger.warning(f"OpenRouter model missing 'id' field: {model}")
|
||||
continue
|
||||
model_name, context_window_size = check
|
||||
|
||||
handle = self.get_handle(model_name)
|
||||
model_name = model["id"]
|
||||
|
||||
config = LLMConfig(
|
||||
# OpenRouter returns context_length in the model listing
|
||||
if "context_length" in model and model["context_length"]:
|
||||
context_window_size = model["context_length"]
|
||||
else:
|
||||
context_window_size = self.get_model_context_window_size(model_name)
|
||||
logger.debug(f"Model {model_name} missing context_length, using default: {context_window_size}")
|
||||
|
||||
configs.append(
|
||||
LLMConfig(
|
||||
model=model_name,
|
||||
model_endpoint_type="openai",
|
||||
model_endpoint_type="openrouter",
|
||||
model_endpoint=self.base_url,
|
||||
context_window=context_window_size,
|
||||
handle=handle,
|
||||
handle=self.get_handle(model_name),
|
||||
max_tokens=self.get_default_max_output_tokens(model_name),
|
||||
provider_name=self.name,
|
||||
provider_category=self.provider_category,
|
||||
)
|
||||
|
||||
config = self._set_model_parameter_tuned_defaults(model_name, config)
|
||||
configs.append(config)
|
||||
)
|
||||
|
||||
return configs
|
||||
|
||||
@@ -700,7 +700,7 @@ class ProviderManager:
|
||||
enabled=True,
|
||||
model_endpoint_type=llm_config.model_endpoint_type,
|
||||
max_context_window=llm_config.context_window,
|
||||
supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek"],
|
||||
supports_token_streaming=llm_config.model_endpoint_type in ["openai", "anthropic", "deepseek", "openrouter"],
|
||||
supports_tool_calling=True, # Assume true for LLMs for now
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user