Files
letta-server/letta/schemas/providers/openai.py
Kevin Lin 4e2bf3ecd6 feat: add gpt-5.3-chat-latest model support (#9746)
Add OpenAI's GPT-5.3 Chat model (128K context, 16K output) with pricing
specs, and remove the "chat" keyword filter so chat variants are listed.

🐾 Generated with [Letta Code](https://letta.com)

Co-authored-by: Letta Code <noreply@letta.com>
2026-03-03 18:34:15 -08:00

294 lines
13 KiB
Python

from typing import Literal
from openai import AsyncOpenAI, AuthenticationError, PermissionDeniedError
from pydantic import Field
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError, LLMPermissionDeniedError
from letta.log import get_logger
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.providers.base import Provider
logger = get_logger(__name__)
ALLOWED_PREFIXES = {"gpt-4", "gpt-5", "o1", "o3", "o4"}
DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro"}
DEFAULT_EMBEDDING_BATCH_SIZE = 1024
class OpenAIProvider(Provider):
provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
api_key: str | None = Field(None, description="API key for the OpenAI API.", deprecated=True)
base_url: str = Field("https://api.openai.com/v1", description="Base URL for the OpenAI API.")
async def check_api_key(self):
# Decrypt API key before using
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
if not api_key:
raise ValueError("No API key provided")
try:
# Use async OpenAI client to check API key validity
client = AsyncOpenAI(api_key=api_key, base_url=self.base_url)
# Just list models to verify API key works
await client.models.list()
except AuthenticationError as e:
raise LLMAuthenticationError(message=f"Failed to authenticate with OpenAI: {e}", code=ErrorCode.UNAUTHENTICATED)
except PermissionDeniedError as e:
raise LLMPermissionDeniedError(message=f"Permission denied by OpenAI: {e}", code=ErrorCode.PERMISSION_DENIED)
except AttributeError as e:
if "_set_private_attributes" in str(e):
raise LLMError(
message=f"OpenAI-compatible endpoint at {self.base_url} returned an unexpected non-JSON response. Verify the base URL and that the endpoint is reachable.",
code=ErrorCode.INTERNAL_SERVER_ERROR,
)
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
except Exception as e:
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
@staticmethod
def _openai_default_max_output_tokens(model_name: str) -> int:
"""Return a sensible max-output-tokens default for OpenAI models.
gpt-5.2* / gpt-5.3* support 128k output tokens, except the
`-chat` variants which are capped at 16k.
"""
import re
if re.match(r"^gpt-5\.[23]", model_name) and "-chat" not in model_name:
return 128000
return 16384
def get_default_max_output_tokens(self, model_name: str) -> int:
"""Get the default max output tokens for OpenAI models (sync fallback)."""
return self._openai_default_max_output_tokens(model_name)
async def get_default_max_output_tokens_async(self, model_name: str) -> int:
"""Get the default max output tokens for OpenAI models.
Uses litellm model specifications with a simple fallback.
"""
from letta.model_specs.litellm_model_specs import get_max_output_tokens
# Try litellm specs
max_output = await get_max_output_tokens(model_name)
if max_output is not None:
return max_output
return self._openai_default_max_output_tokens(model_name)
async def _get_models_async(self) -> list[dict]:
from letta.llm_api.openai import openai_get_model_list_async
# Provider-specific extra parameters for model listing
extra_params = None
if "openrouter.ai" in self.base_url:
# OpenRouter: filter for models with tool calling support
# See: https://openrouter.ai/docs/requests
extra_params = {"supported_parameters": "tools"}
elif "nebius.com" in self.base_url:
# Nebius: use verbose mode for better model info
extra_params = {"verbose": True}
# Decrypt API key before using
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
response = await openai_get_model_list_async(
self.base_url,
api_key=api_key,
extra_params=extra_params,
# fix_url=True, # NOTE: make sure together ends with /v1
)
# TODO (cliandy): this is brittle as TogetherAI seems to result in a list instead of having a 'data' field
data = response.get("data", response)
assert isinstance(data, list)
return data
async def list_llm_models_async(self) -> list[LLMConfig]:
data = await self._get_models_async()
return await self._list_llm_models(data)
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
"""Return known OpenAI embedding models.
Note: we intentionally do not attempt to fetch embedding models from the remote endpoint here.
The OpenAI "models" list does not reliably expose embedding metadata needed for filtering,
and in tests we frequently point OPENAI_BASE_URL at a local mock server.
"""
return [
EmbeddingConfig(
embedding_model="text-embedding-ada-002",
embedding_endpoint_type="openai",
embedding_endpoint=self.base_url,
embedding_dim=1536,
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
handle=self.get_handle("text-embedding-ada-002", is_embedding=True),
batch_size=DEFAULT_EMBEDDING_BATCH_SIZE,
),
EmbeddingConfig(
embedding_model="text-embedding-3-small",
embedding_endpoint_type="openai",
embedding_endpoint=self.base_url,
embedding_dim=1536,
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
handle=self.get_handle("text-embedding-3-small", is_embedding=True),
batch_size=DEFAULT_EMBEDDING_BATCH_SIZE,
),
EmbeddingConfig(
embedding_model="text-embedding-3-large",
embedding_endpoint_type="openai",
embedding_endpoint=self.base_url,
embedding_dim=3072,
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
handle=self.get_handle("text-embedding-3-large", is_embedding=True),
batch_size=DEFAULT_EMBEDDING_BATCH_SIZE,
),
]
async def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
"""
This handles filtering out LLM Models by provider that meet Letta's requirements.
"""
configs = []
for model in data:
check = await self._do_model_checks_for_name_and_context_size_async(model)
if check is None:
continue
model_name, context_window_size = check
# ===== Provider filtering =====
# TogetherAI: includes the type, which we can use to filter out embedding models
if "api.together.ai" in self.base_url or "api.together.xyz" in self.base_url:
if "type" in model and model["type"] not in ["chat", "language"]:
continue
# for TogetherAI, we need to skip the models that don't support JSON mode / function calling
# requests.exceptions.HTTPError: HTTP error occurred: 400 Client Error: Bad Request for url: https://api.together.ai/v1/chat/completions | Status code: 400, Message: {
# "error": {
# "message": "mistralai/Mixtral-8x7B-v0.1 is not supported for JSON mode/function calling",
# "type": "invalid_request_error",
# "param": null,
# "code": "constraints_model"
# }
# }
if "config" not in model:
continue
# Nebius: includes the type, which we can use to filter for text models
if "nebius.com" in self.base_url:
model_type = model.get("architecture", {}).get("modality")
if model_type not in ["text->text", "text+image->text"]:
continue
# OpenAI
# NOTE: o1-mini and o1-preview do not support tool calling
# NOTE: o1-mini does not support system messages
# NOTE: o1-pro is only available in Responses API
if self.base_url == "https://api.openai.com/v1":
if any(keyword in model_name for keyword in DISALLOWED_KEYWORDS) or not any(
model_name.startswith(prefix) for prefix in ALLOWED_PREFIXES
):
continue
# We'll set the model endpoint based on the base URL
# Note: openai-proxy just means that the model is using the OpenAIProvider
if self.base_url != "https://api.openai.com/v1":
handle = self.get_handle(model_name, base_name="openai-proxy")
else:
handle = self.get_handle(model_name)
config = LLMConfig(
model=model_name,
model_endpoint_type="openai",
model_endpoint=self.base_url,
context_window=context_window_size,
handle=handle,
max_tokens=await self.get_default_max_output_tokens_async(model_name),
provider_name=self.name,
provider_category=self.provider_category,
)
config = self._set_model_parameter_tuned_defaults(model_name, config)
configs.append(config)
# for OpenAI, sort in reverse order
if self.base_url == "https://api.openai.com/v1":
configs.sort(key=lambda x: x.model, reverse=True)
return configs
def _do_model_checks_for_name_and_context_size(self, model: dict, length_key: str = "context_length") -> tuple[str, int] | None:
"""Sync version - uses sync get_model_context_window_size (for subclasses with hardcoded values)."""
if "id" not in model:
logger.warning("Model missing 'id' field for provider: %s and model: %s", self.provider_type, model)
return None
model_name = model["id"]
context_window_size = self.get_model_context_window_size(model_name)
if not context_window_size:
logger.info("No context window size found for model: %s", model_name)
return None
return model_name, context_window_size
async def _do_model_checks_for_name_and_context_size_async(
self, model: dict, length_key: str = "context_length"
) -> tuple[str, int] | None:
"""Async version - uses async get_model_context_window_size_async (for litellm lookup)."""
if "id" not in model:
logger.warning("Model missing 'id' field for provider: %s and model: %s", self.provider_type, model)
return None
model_name = model["id"]
context_window_size = await self.get_model_context_window_size_async(model_name)
if not context_window_size:
logger.info("No context window size found for model: %s", model_name)
return None
return model_name, context_window_size
@staticmethod
def _set_model_parameter_tuned_defaults(model_name: str, llm_config: LLMConfig):
"""This function is used to tune LLMConfig parameters to improve model performance."""
# gpt-4o-mini has started to regress with pretty bad emoji spam loops (2025-07)
if "gpt-4o" in model_name or "gpt-4.1-mini" in model_name or model_name == "letta-free":
llm_config.frequency_penalty = 1.0
return llm_config
def get_model_context_window_size(self, model_name: str) -> int | None:
"""Get the context window size for a model (sync fallback)."""
return LLM_MAX_CONTEXT_WINDOW["DEFAULT"]
async def get_model_context_window_size_async(self, model_name: str) -> int | None:
"""Get the context window size for a model.
Uses litellm model specifications which covers all OpenAI models.
"""
from letta.model_specs.litellm_model_specs import get_context_window
context_window = await get_context_window(model_name)
if context_window is not None:
return context_window
# Simple fallback
logger.debug(
"Model %s not found in litellm specs. Using default of %s",
model_name,
LLM_MAX_CONTEXT_WINDOW["DEFAULT"],
)
return LLM_MAX_CONTEXT_WINDOW["DEFAULT"]
def get_model_context_window(self, model_name: str) -> int | None:
return self.get_model_context_window_size(model_name)
async def get_model_context_window_async(self, model_name: str) -> int | None:
return await self.get_model_context_window_size_async(model_name)