* fix(core): handle PermissionDeniedError in provider API key validation Fixed OpenAI PermissionDeniedError being raised as unknown error when validating provider API keys. The check_api_key methods in OpenAI-based providers (OpenAI, OpenRouter, Azure, Together) now properly catch and re-raise PermissionDeniedError as LLMPermissionDeniedError. 🐛 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): handle Unicode surrogates in OpenAI requests Sanitize invalid UTF-16 surrogates before sending requests to OpenAI API. Fixes UnicodeEncodeError when message content contains unpaired surrogates from corrupted emoji data or malformed Unicode sequences. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): handle MCP tool schema validation errors gracefully Catch fastmcp.exceptions.ToolError in execute_mcp_tool endpoint and convert to LettaInvalidArgumentError (400) instead of letting it propagate as 500 error. This is an expected user error when tool arguments don't match the MCP tool's schema. Fixes Datadog issue 8f2d874a-f8e5-11f0-9b25-da7ad0900000 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): handle ExceptionGroup-wrapped ToolError in MCP executor When MCP tools fail with validation errors (e.g., missing required parameters), fastmcp raises ToolError exceptions that may be wrapped in ExceptionGroup by Python's async TaskGroup. The exception handler now unwraps single-exception groups before checking if the error should be handled gracefully. Fixes Calendly API "organization parameter missing" errors being logged to Datadog instead of returning friendly error messages to users. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: handle missing agent in create_conversation to prevent foreign key violation * Update .gitignore --------- Co-authored-by: Letta <noreply@letta.com>
187 lines
8.4 KiB
Python
187 lines
8.4 KiB
Python
from collections import defaultdict
|
|
from typing import ClassVar, Literal
|
|
|
|
import httpx
|
|
from openai import AsyncAzureOpenAI, AuthenticationError, PermissionDeniedError
|
|
from pydantic import Field, field_validator
|
|
|
|
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
|
|
from letta.errors import ErrorCode, LLMAuthenticationError, LLMPermissionDeniedError
|
|
from letta.schemas.embedding_config import EmbeddingConfig
|
|
from letta.schemas.enums import ProviderCategory, ProviderType
|
|
from letta.schemas.llm_config import LLMConfig
|
|
from letta.schemas.providers.base import Provider
|
|
|
|
AZURE_MODEL_TO_CONTEXT_LENGTH = {
|
|
"babbage-002": 16384,
|
|
"davinci-002": 16384,
|
|
"gpt-35-turbo-0613": 4096,
|
|
"gpt-35-turbo-1106": 16385,
|
|
"gpt-35-turbo-0125": 16385,
|
|
"gpt-4-0613": 8192,
|
|
"gpt-4o-mini-2024-07-18": 128000,
|
|
"gpt-4o-mini": 128000,
|
|
"gpt-4o": 128000,
|
|
}
|
|
|
|
|
|
class AzureProvider(Provider):
|
|
LATEST_API_VERSION: ClassVar[str] = "2024-09-01-preview"
|
|
|
|
provider_type: Literal[ProviderType.azure] = Field(ProviderType.azure, description="The type of the provider.")
|
|
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
|
# Note: 2024-09-01-preview was set here until 2025-07-16.
|
|
# set manually, see: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
|
|
latest_api_version: str = "2025-04-01-preview"
|
|
base_url: str = Field(
|
|
..., description="Base URL for the Azure API endpoint. This should be specific to your org, e.g. `https://letta.openai.azure.com`."
|
|
)
|
|
api_key: str | None = Field(None, description="API key for the Azure API.", deprecated=True)
|
|
api_version: str = Field(default=LATEST_API_VERSION, description="API version for the Azure API")
|
|
|
|
@field_validator("api_version", mode="before")
|
|
def replace_none_with_default(cls, v):
|
|
return v if v is not None else cls.LATEST_API_VERSION
|
|
|
|
def get_azure_chat_completions_endpoint(self, model: str):
|
|
return f"{self.base_url}/openai/deployments/{model}/chat/completions?api-version={self.api_version}"
|
|
|
|
def get_azure_embeddings_endpoint(self, model: str):
|
|
return f"{self.base_url}/openai/deployments/{model}/embeddings?api-version={self.api_version}"
|
|
|
|
def get_azure_model_list_endpoint(self):
|
|
return f"{self.base_url}/openai/models?api-version={self.api_version}"
|
|
|
|
def get_azure_deployment_list_endpoint(self):
|
|
# Please note that it has to be 2023-03-15-preview
|
|
# That's the only api version that works with this deployments endpoint
|
|
return f"{self.base_url}/openai/deployments?api-version=2023-03-15-preview"
|
|
|
|
async def azure_openai_get_deployed_model_list(self) -> list:
|
|
"""https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP"""
|
|
|
|
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
|
|
client = AsyncAzureOpenAI(api_key=api_key, api_version=self.api_version, azure_endpoint=self.base_url)
|
|
|
|
try:
|
|
models_list = await client.models.list()
|
|
except (AuthenticationError, PermissionDeniedError):
|
|
# Re-raise auth/permission errors so they're properly handled upstream
|
|
raise
|
|
except Exception:
|
|
return []
|
|
|
|
all_available_models = [model.to_dict() for model in models_list.data]
|
|
|
|
# https://xxx.openai.azure.com/openai/models?api-version=xxx
|
|
headers = {"Content-Type": "application/json"}
|
|
if api_key is not None:
|
|
headers["api-key"] = f"{api_key}"
|
|
|
|
# 2. Get all the deployed models
|
|
url = self.get_azure_deployment_list_endpoint()
|
|
try:
|
|
# Azure API can be slow (8+ seconds), use a generous timeout
|
|
timeout = httpx.Timeout(15.0, connect=10.0)
|
|
async with httpx.AsyncClient(timeout=timeout) as http_client:
|
|
response = await http_client.get(url, headers=headers)
|
|
response.raise_for_status()
|
|
except httpx.TimeoutException as e:
|
|
raise RuntimeError(f"Azure API timeout after 15s: {e}")
|
|
except httpx.HTTPStatusError as e:
|
|
raise RuntimeError(f"Failed to retrieve model list: {e}")
|
|
|
|
deployed_models = response.json().get("data", [])
|
|
deployed_model_names = set([m["id"] for m in deployed_models])
|
|
|
|
# 3. Only return the models in available models if they have been deployed
|
|
deployed_models = [m for m in all_available_models if m["id"] in deployed_model_names]
|
|
|
|
# 4. Remove redundant deployments, only include the ones with the latest deployment
|
|
# Create a dictionary to store the latest model for each ID
|
|
latest_models = defaultdict()
|
|
|
|
# Iterate through the models and update the dictionary with the most recent model
|
|
for model in deployed_models:
|
|
model_id = model["id"]
|
|
updated_at = model["created_at"]
|
|
|
|
# If the model ID is new or the current model has a more recent created_at, update the dictionary
|
|
if model_id not in latest_models or updated_at > latest_models[model_id]["created_at"]:
|
|
latest_models[model_id] = model
|
|
|
|
# Extract the unique models
|
|
return list(latest_models.values())
|
|
|
|
async def list_llm_models_async(self) -> list[LLMConfig]:
|
|
model_list = await self.azure_openai_get_deployed_model_list()
|
|
# Extract models that support text generation
|
|
model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
|
|
|
|
configs = []
|
|
for model_option in model_options:
|
|
model_name = model_option["id"]
|
|
context_window_size = self.get_model_context_window(model_name)
|
|
model_endpoint = self.get_azure_chat_completions_endpoint(model_name)
|
|
configs.append(
|
|
LLMConfig(
|
|
model=model_name,
|
|
model_endpoint_type="azure",
|
|
model_endpoint=model_endpoint,
|
|
context_window=context_window_size,
|
|
handle=self.get_handle(model_name),
|
|
max_tokens=self.get_default_max_output_tokens(model_name),
|
|
provider_name=self.name,
|
|
provider_category=self.provider_category,
|
|
)
|
|
)
|
|
return configs
|
|
|
|
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
|
|
def valid_embedding_model(m: dict, require_embedding_in_name: bool = True):
|
|
valid_name = True
|
|
if require_embedding_in_name:
|
|
valid_name = "embedding" in m["id"]
|
|
|
|
return m.get("capabilities").get("embeddings") == True and valid_name
|
|
|
|
model_list = await self.azure_openai_get_deployed_model_list()
|
|
# Extract models that support embeddings
|
|
|
|
model_options = [m for m in model_list if valid_embedding_model(m)]
|
|
|
|
configs = []
|
|
for model_option in model_options:
|
|
model_name = model_option["id"]
|
|
model_endpoint = self.get_azure_embeddings_endpoint(model_name)
|
|
configs.append(
|
|
EmbeddingConfig(
|
|
embedding_model=model_name,
|
|
embedding_endpoint_type="azure",
|
|
embedding_endpoint=model_endpoint,
|
|
embedding_dim=768, # TODO generated 1536?
|
|
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # old note: max is 2048
|
|
handle=self.get_handle(model_name, is_embedding=True),
|
|
batch_size=1024,
|
|
)
|
|
)
|
|
return configs
|
|
|
|
def get_model_context_window(self, model_name: str) -> int | None:
|
|
# Hard coded as there are no API endpoints for this
|
|
llm_default = LLM_MAX_CONTEXT_WINDOW.get(model_name, 4096)
|
|
return AZURE_MODEL_TO_CONTEXT_LENGTH.get(model_name, llm_default)
|
|
|
|
async def check_api_key(self):
|
|
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
|
|
if not api_key:
|
|
raise ValueError("No API key provided")
|
|
|
|
try:
|
|
await self.list_llm_models_async()
|
|
except (LLMAuthenticationError, LLMPermissionDeniedError):
|
|
# Re-raise specific LLM errors as-is
|
|
raise
|
|
except Exception as e:
|
|
raise LLMAuthenticationError(message=f"Failed to authenticate with Azure: {e}", code=ErrorCode.UNAUTHENTICATED)
|