Files
letta-server/letta/schemas/providers/azure.py
Kian Jones be60697a62 fix(core): handle protocol errors and foreign key violations (#9308)
* fix(core): handle PermissionDeniedError in provider API key validation

Fixed OpenAI PermissionDeniedError being raised as unknown error when
validating provider API keys. The check_api_key methods in OpenAI-based
providers (OpenAI, OpenRouter, Azure, Together) now properly catch and
re-raise PermissionDeniedError as LLMPermissionDeniedError.

🐛 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): handle Unicode surrogates in OpenAI requests

Sanitize invalid UTF-16 surrogates before sending requests to OpenAI API.
Fixes UnicodeEncodeError when message content contains unpaired surrogates
from corrupted emoji data or malformed Unicode sequences.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): handle MCP tool schema validation errors gracefully

Catch fastmcp.exceptions.ToolError in execute_mcp_tool endpoint and
convert to LettaInvalidArgumentError (400) instead of letting it
propagate as 500 error. This is an expected user error when tool
arguments don't match the MCP tool's schema.

Fixes Datadog issue 8f2d874a-f8e5-11f0-9b25-da7ad0900000

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): handle ExceptionGroup-wrapped ToolError in MCP executor

When MCP tools fail with validation errors (e.g., missing required parameters),
fastmcp raises ToolError exceptions that may be wrapped in ExceptionGroup by
Python's async TaskGroup. The exception handler now unwraps single-exception
groups before checking if the error should be handled gracefully.

Fixes Calendly API "organization parameter missing" errors being logged to
Datadog instead of returning friendly error messages to users.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix: handle missing agent in create_conversation to prevent foreign key violation

* Update .gitignore

---------

Co-authored-by: Letta <noreply@letta.com>
2026-02-24 10:52:06 -08:00

187 lines
8.4 KiB
Python

from collections import defaultdict
from typing import ClassVar, Literal
import httpx
from openai import AsyncAzureOpenAI, AuthenticationError, PermissionDeniedError
from pydantic import Field, field_validator
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_CONTEXT_WINDOW
from letta.errors import ErrorCode, LLMAuthenticationError, LLMPermissionDeniedError
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.providers.base import Provider
AZURE_MODEL_TO_CONTEXT_LENGTH = {
"babbage-002": 16384,
"davinci-002": 16384,
"gpt-35-turbo-0613": 4096,
"gpt-35-turbo-1106": 16385,
"gpt-35-turbo-0125": 16385,
"gpt-4-0613": 8192,
"gpt-4o-mini-2024-07-18": 128000,
"gpt-4o-mini": 128000,
"gpt-4o": 128000,
}
class AzureProvider(Provider):
LATEST_API_VERSION: ClassVar[str] = "2024-09-01-preview"
provider_type: Literal[ProviderType.azure] = Field(ProviderType.azure, description="The type of the provider.")
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
# Note: 2024-09-01-preview was set here until 2025-07-16.
# set manually, see: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
latest_api_version: str = "2025-04-01-preview"
base_url: str = Field(
..., description="Base URL for the Azure API endpoint. This should be specific to your org, e.g. `https://letta.openai.azure.com`."
)
api_key: str | None = Field(None, description="API key for the Azure API.", deprecated=True)
api_version: str = Field(default=LATEST_API_VERSION, description="API version for the Azure API")
@field_validator("api_version", mode="before")
def replace_none_with_default(cls, v):
return v if v is not None else cls.LATEST_API_VERSION
def get_azure_chat_completions_endpoint(self, model: str):
return f"{self.base_url}/openai/deployments/{model}/chat/completions?api-version={self.api_version}"
def get_azure_embeddings_endpoint(self, model: str):
return f"{self.base_url}/openai/deployments/{model}/embeddings?api-version={self.api_version}"
def get_azure_model_list_endpoint(self):
return f"{self.base_url}/openai/models?api-version={self.api_version}"
def get_azure_deployment_list_endpoint(self):
# Please note that it has to be 2023-03-15-preview
# That's the only api version that works with this deployments endpoint
return f"{self.base_url}/openai/deployments?api-version=2023-03-15-preview"
async def azure_openai_get_deployed_model_list(self) -> list:
"""https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP"""
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
client = AsyncAzureOpenAI(api_key=api_key, api_version=self.api_version, azure_endpoint=self.base_url)
try:
models_list = await client.models.list()
except (AuthenticationError, PermissionDeniedError):
# Re-raise auth/permission errors so they're properly handled upstream
raise
except Exception:
return []
all_available_models = [model.to_dict() for model in models_list.data]
# https://xxx.openai.azure.com/openai/models?api-version=xxx
headers = {"Content-Type": "application/json"}
if api_key is not None:
headers["api-key"] = f"{api_key}"
# 2. Get all the deployed models
url = self.get_azure_deployment_list_endpoint()
try:
# Azure API can be slow (8+ seconds), use a generous timeout
timeout = httpx.Timeout(15.0, connect=10.0)
async with httpx.AsyncClient(timeout=timeout) as http_client:
response = await http_client.get(url, headers=headers)
response.raise_for_status()
except httpx.TimeoutException as e:
raise RuntimeError(f"Azure API timeout after 15s: {e}")
except httpx.HTTPStatusError as e:
raise RuntimeError(f"Failed to retrieve model list: {e}")
deployed_models = response.json().get("data", [])
deployed_model_names = set([m["id"] for m in deployed_models])
# 3. Only return the models in available models if they have been deployed
deployed_models = [m for m in all_available_models if m["id"] in deployed_model_names]
# 4. Remove redundant deployments, only include the ones with the latest deployment
# Create a dictionary to store the latest model for each ID
latest_models = defaultdict()
# Iterate through the models and update the dictionary with the most recent model
for model in deployed_models:
model_id = model["id"]
updated_at = model["created_at"]
# If the model ID is new or the current model has a more recent created_at, update the dictionary
if model_id not in latest_models or updated_at > latest_models[model_id]["created_at"]:
latest_models[model_id] = model
# Extract the unique models
return list(latest_models.values())
async def list_llm_models_async(self) -> list[LLMConfig]:
model_list = await self.azure_openai_get_deployed_model_list()
# Extract models that support text generation
model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
configs = []
for model_option in model_options:
model_name = model_option["id"]
context_window_size = self.get_model_context_window(model_name)
model_endpoint = self.get_azure_chat_completions_endpoint(model_name)
configs.append(
LLMConfig(
model=model_name,
model_endpoint_type="azure",
model_endpoint=model_endpoint,
context_window=context_window_size,
handle=self.get_handle(model_name),
max_tokens=self.get_default_max_output_tokens(model_name),
provider_name=self.name,
provider_category=self.provider_category,
)
)
return configs
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
def valid_embedding_model(m: dict, require_embedding_in_name: bool = True):
valid_name = True
if require_embedding_in_name:
valid_name = "embedding" in m["id"]
return m.get("capabilities").get("embeddings") == True and valid_name
model_list = await self.azure_openai_get_deployed_model_list()
# Extract models that support embeddings
model_options = [m for m in model_list if valid_embedding_model(m)]
configs = []
for model_option in model_options:
model_name = model_option["id"]
model_endpoint = self.get_azure_embeddings_endpoint(model_name)
configs.append(
EmbeddingConfig(
embedding_model=model_name,
embedding_endpoint_type="azure",
embedding_endpoint=model_endpoint,
embedding_dim=768, # TODO generated 1536?
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # old note: max is 2048
handle=self.get_handle(model_name, is_embedding=True),
batch_size=1024,
)
)
return configs
def get_model_context_window(self, model_name: str) -> int | None:
# Hard coded as there are no API endpoints for this
llm_default = LLM_MAX_CONTEXT_WINDOW.get(model_name, 4096)
return AZURE_MODEL_TO_CONTEXT_LENGTH.get(model_name, llm_default)
async def check_api_key(self):
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
if not api_key:
raise ValueError("No API key provided")
try:
await self.list_llm_models_async()
except (LLMAuthenticationError, LLMPermissionDeniedError):
# Re-raise specific LLM errors as-is
raise
except Exception as e:
raise LLMAuthenticationError(message=f"Failed to authenticate with Azure: {e}", code=ErrorCode.UNAUTHENTICATED)