diff --git a/letta/llm_api/azure_openai.py b/letta/llm_api/azure_openai.py deleted file mode 100644 index 52cb6d3a..00000000 --- a/letta/llm_api/azure_openai.py +++ /dev/null @@ -1,94 +0,0 @@ -from collections import defaultdict - -import requests -from openai import AzureOpenAI - - -def get_azure_chat_completions_endpoint(base_url: str, model: str, api_version: str): - return f"{base_url}/openai/deployments/{model}/chat/completions?api-version={api_version}" - - -def get_azure_embeddings_endpoint(base_url: str, model: str, api_version: str): - return f"{base_url}/openai/deployments/{model}/embeddings?api-version={api_version}" - - -def get_azure_model_list_endpoint(base_url: str, api_version: str): - return f"{base_url}/openai/models?api-version={api_version}" - - -def get_azure_deployment_list_endpoint(base_url: str): - # Please note that it has to be 2023-03-15-preview - # That's the only api version that works with this deployments endpoint - # TODO: Use the Azure Client library here instead - return f"{base_url}/openai/deployments?api-version=2023-03-15-preview" - - -def azure_openai_get_deployed_model_list(base_url: str, api_key: str, api_version: str) -> list: - """https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP""" - - client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=base_url) - - try: - models_list = client.models.list() - except Exception: - return [] - - all_available_models = [model.to_dict() for model in models_list.data] - - # https://xxx.openai.azure.com/openai/models?api-version=xxx - headers = {"Content-Type": "application/json"} - if api_key is not None: - headers["api-key"] = f"{api_key}" - - # 2. Get all the deployed models - url = get_azure_deployment_list_endpoint(base_url) - try: - response = requests.get(url, headers=headers) - response.raise_for_status() - except requests.RequestException as e: - raise RuntimeError(f"Failed to retrieve model list: {e}") - - deployed_models = response.json().get("data", []) - deployed_model_names = set([m["id"] for m in deployed_models]) - - # 3. Only return the models in available models if they have been deployed - deployed_models = [m for m in all_available_models if m["id"] in deployed_model_names] - - # 4. Remove redundant deployments, only include the ones with the latest deployment - # Create a dictionary to store the latest model for each ID - latest_models = defaultdict() - - # Iterate through the models and update the dictionary with the most recent model - for model in deployed_models: - model_id = model["id"] - updated_at = model["created_at"] - - # If the model ID is new or the current model has a more recent created_at, update the dictionary - if model_id not in latest_models or updated_at > latest_models[model_id]["created_at"]: - latest_models[model_id] = model - - # Extract the unique models - return list(latest_models.values()) - - -def azure_openai_get_chat_completion_model_list(base_url: str, api_key: str, api_version: str) -> list: - model_list = azure_openai_get_deployed_model_list(base_url, api_key, api_version) - # Extract models that support text generation - model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True] - return model_options - - -def azure_openai_get_embeddings_model_list(base_url: str, api_key: str, api_version: str, require_embedding_in_name: bool = True) -> list: - def valid_embedding_model(m: dict): - valid_name = True - if require_embedding_in_name: - valid_name = "embedding" in m["id"] - - return m.get("capabilities").get("embeddings") == True and valid_name - - model_list = azure_openai_get_deployed_model_list(base_url, api_key, api_version) - # Extract models that support embeddings - - model_options = [m for m in model_list if valid_embedding_model(m)] - - return model_options diff --git a/letta/llm_api/azure_openai_constants.py b/letta/llm_api/azure_openai_constants.py deleted file mode 100644 index ba4248ef..00000000 --- a/letta/llm_api/azure_openai_constants.py +++ /dev/null @@ -1,11 +0,0 @@ -AZURE_MODEL_TO_CONTEXT_LENGTH = { - "babbage-002": 16384, - "davinci-002": 16384, - "gpt-35-turbo-0613": 4096, - "gpt-35-turbo-1106": 16385, - "gpt-35-turbo-0125": 16385, - "gpt-4-0613": 8192, - "gpt-4o-mini-2024-07-18": 128000, - "gpt-4o-mini": 128000, - "gpt-4o": 128000, -} diff --git a/letta/schemas/providers/azure.py b/letta/schemas/providers/azure.py index 32f871ca..0da8c5fa 100644 --- a/letta/schemas/providers/azure.py +++ b/letta/schemas/providers/azure.py @@ -1,16 +1,29 @@ +from collections import defaultdict from typing import ClassVar, Literal +import requests +from openai import AzureOpenAI from pydantic import Field, field_validator from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS from letta.errors import ErrorCode, LLMAuthenticationError -from letta.llm_api.azure_openai import get_azure_chat_completions_endpoint, get_azure_embeddings_endpoint -from letta.llm_api.azure_openai_constants import AZURE_MODEL_TO_CONTEXT_LENGTH from letta.schemas.embedding_config import EmbeddingConfig from letta.schemas.enums import ProviderCategory, ProviderType from letta.schemas.llm_config import LLMConfig from letta.schemas.providers.base import Provider +AZURE_MODEL_TO_CONTEXT_LENGTH = { + "babbage-002": 16384, + "davinci-002": 16384, + "gpt-35-turbo-0613": 4096, + "gpt-35-turbo-1106": 16385, + "gpt-35-turbo-0125": 16385, + "gpt-4-0613": 8192, + "gpt-4o-mini-2024-07-18": 128000, + "gpt-4o-mini": 128000, + "gpt-4o": 128000, +} + class AzureProvider(Provider): LATEST_API_VERSION: ClassVar[str] = "2024-09-01-preview" @@ -30,16 +43,78 @@ class AzureProvider(Provider): def replace_none_with_default(cls, v): return v if v is not None else cls.LATEST_API_VERSION + def get_azure_chat_completions_endpoint(self, model: str): + return f"{self.base_url}/openai/deployments/{model}/chat/completions?api-version={self.api_version}" + + def get_azure_embeddings_endpoint(self, model: str): + return f"{self.base_url}/openai/deployments/{model}/embeddings?api-version={self.api_version}" + + def get_azure_model_list_endpoint(self): + return f"{self.base_url}/openai/models?api-version={self.api_version}" + + def get_azure_deployment_list_endpoint(self): + # Please note that it has to be 2023-03-15-preview + # That's the only api version that works with this deployments endpoint + return f"{self.base_url}/openai/deployments?api-version=2023-03-15-preview" + + def azure_openai_get_deployed_model_list(self) -> list: + """https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP""" + + client = AzureOpenAI(api_key=self.api_key, api_version=self.api_version, azure_endpoint=self.base_url) + + try: + models_list = client.models.list() + except Exception: + return [] + + all_available_models = [model.to_dict() for model in models_list.data] + + # https://xxx.openai.azure.com/openai/models?api-version=xxx + headers = {"Content-Type": "application/json"} + if self.api_key is not None: + headers["api-key"] = f"{self.api_key}" + + # 2. Get all the deployed models + url = self.get_azure_deployment_list_endpoint() + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + except requests.RequestException as e: + raise RuntimeError(f"Failed to retrieve model list: {e}") + + deployed_models = response.json().get("data", []) + deployed_model_names = set([m["id"] for m in deployed_models]) + + # 3. Only return the models in available models if they have been deployed + deployed_models = [m for m in all_available_models if m["id"] in deployed_model_names] + + # 4. Remove redundant deployments, only include the ones with the latest deployment + # Create a dictionary to store the latest model for each ID + latest_models = defaultdict() + + # Iterate through the models and update the dictionary with the most recent model + for model in deployed_models: + model_id = model["id"] + updated_at = model["created_at"] + + # If the model ID is new or the current model has a more recent created_at, update the dictionary + if model_id not in latest_models or updated_at > latest_models[model_id]["created_at"]: + latest_models[model_id] = model + + # Extract the unique models + return list(latest_models.values()) + async def list_llm_models_async(self) -> list[LLMConfig]: # TODO (cliandy): asyncify - from letta.llm_api.azure_openai import azure_openai_get_chat_completion_model_list + model_list = self.azure_openai_get_deployed_model_list() + # Extract models that support text generation + model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True] - model_options = azure_openai_get_chat_completion_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version) configs = [] for model_option in model_options: model_name = model_option["id"] context_window_size = self.get_model_context_window(model_name) - model_endpoint = get_azure_chat_completions_endpoint(self.base_url, model_name, self.api_version) + model_endpoint = self.get_azure_chat_completions_endpoint(model_name) configs.append( LLMConfig( model=model_name, @@ -55,13 +130,22 @@ class AzureProvider(Provider): async def list_embedding_models_async(self) -> list[EmbeddingConfig]: # TODO (cliandy): asyncify dependent function calls - from letta.llm_api.azure_openai import azure_openai_get_embeddings_model_list + def valid_embedding_model(m: dict, require_embedding_in_name: bool = True): + valid_name = True + if require_embedding_in_name: + valid_name = "embedding" in m["id"] + + return m.get("capabilities").get("embeddings") == True and valid_name + + model_list = self.azure_openai_get_deployed_model_list() + # Extract models that support embeddings + + model_options = [m for m in model_list if valid_embedding_model(m)] - model_options = azure_openai_get_embeddings_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version) configs = [] for model_option in model_options: model_name = model_option["id"] - model_endpoint = get_azure_embeddings_endpoint(self.base_url, model_name, self.api_version) + model_endpoint = self.get_azure_embeddings_endpoint(model_name) configs.append( EmbeddingConfig( embedding_model=model_name,