chore: delete legacy azure client (#3911)
This commit is contained in:
@@ -1,94 +0,0 @@
|
||||
from collections import defaultdict
|
||||
|
||||
import requests
|
||||
from openai import AzureOpenAI
|
||||
|
||||
|
||||
def get_azure_chat_completions_endpoint(base_url: str, model: str, api_version: str):
|
||||
return f"{base_url}/openai/deployments/{model}/chat/completions?api-version={api_version}"
|
||||
|
||||
|
||||
def get_azure_embeddings_endpoint(base_url: str, model: str, api_version: str):
|
||||
return f"{base_url}/openai/deployments/{model}/embeddings?api-version={api_version}"
|
||||
|
||||
|
||||
def get_azure_model_list_endpoint(base_url: str, api_version: str):
|
||||
return f"{base_url}/openai/models?api-version={api_version}"
|
||||
|
||||
|
||||
def get_azure_deployment_list_endpoint(base_url: str):
|
||||
# Please note that it has to be 2023-03-15-preview
|
||||
# That's the only api version that works with this deployments endpoint
|
||||
# TODO: Use the Azure Client library here instead
|
||||
return f"{base_url}/openai/deployments?api-version=2023-03-15-preview"
|
||||
|
||||
|
||||
def azure_openai_get_deployed_model_list(base_url: str, api_key: str, api_version: str) -> list:
|
||||
"""https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP"""
|
||||
|
||||
client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=base_url)
|
||||
|
||||
try:
|
||||
models_list = client.models.list()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
all_available_models = [model.to_dict() for model in models_list.data]
|
||||
|
||||
# https://xxx.openai.azure.com/openai/models?api-version=xxx
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if api_key is not None:
|
||||
headers["api-key"] = f"{api_key}"
|
||||
|
||||
# 2. Get all the deployed models
|
||||
url = get_azure_deployment_list_endpoint(base_url)
|
||||
try:
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
raise RuntimeError(f"Failed to retrieve model list: {e}")
|
||||
|
||||
deployed_models = response.json().get("data", [])
|
||||
deployed_model_names = set([m["id"] for m in deployed_models])
|
||||
|
||||
# 3. Only return the models in available models if they have been deployed
|
||||
deployed_models = [m for m in all_available_models if m["id"] in deployed_model_names]
|
||||
|
||||
# 4. Remove redundant deployments, only include the ones with the latest deployment
|
||||
# Create a dictionary to store the latest model for each ID
|
||||
latest_models = defaultdict()
|
||||
|
||||
# Iterate through the models and update the dictionary with the most recent model
|
||||
for model in deployed_models:
|
||||
model_id = model["id"]
|
||||
updated_at = model["created_at"]
|
||||
|
||||
# If the model ID is new or the current model has a more recent created_at, update the dictionary
|
||||
if model_id not in latest_models or updated_at > latest_models[model_id]["created_at"]:
|
||||
latest_models[model_id] = model
|
||||
|
||||
# Extract the unique models
|
||||
return list(latest_models.values())
|
||||
|
||||
|
||||
def azure_openai_get_chat_completion_model_list(base_url: str, api_key: str, api_version: str) -> list:
|
||||
model_list = azure_openai_get_deployed_model_list(base_url, api_key, api_version)
|
||||
# Extract models that support text generation
|
||||
model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
|
||||
return model_options
|
||||
|
||||
|
||||
def azure_openai_get_embeddings_model_list(base_url: str, api_key: str, api_version: str, require_embedding_in_name: bool = True) -> list:
|
||||
def valid_embedding_model(m: dict):
|
||||
valid_name = True
|
||||
if require_embedding_in_name:
|
||||
valid_name = "embedding" in m["id"]
|
||||
|
||||
return m.get("capabilities").get("embeddings") == True and valid_name
|
||||
|
||||
model_list = azure_openai_get_deployed_model_list(base_url, api_key, api_version)
|
||||
# Extract models that support embeddings
|
||||
|
||||
model_options = [m for m in model_list if valid_embedding_model(m)]
|
||||
|
||||
return model_options
|
||||
@@ -1,11 +0,0 @@
|
||||
AZURE_MODEL_TO_CONTEXT_LENGTH = {
|
||||
"babbage-002": 16384,
|
||||
"davinci-002": 16384,
|
||||
"gpt-35-turbo-0613": 4096,
|
||||
"gpt-35-turbo-1106": 16385,
|
||||
"gpt-35-turbo-0125": 16385,
|
||||
"gpt-4-0613": 8192,
|
||||
"gpt-4o-mini-2024-07-18": 128000,
|
||||
"gpt-4o-mini": 128000,
|
||||
"gpt-4o": 128000,
|
||||
}
|
||||
@@ -1,16 +1,29 @@
|
||||
from collections import defaultdict
|
||||
from typing import ClassVar, Literal
|
||||
|
||||
import requests
|
||||
from openai import AzureOpenAI
|
||||
from pydantic import Field, field_validator
|
||||
|
||||
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
|
||||
from letta.errors import ErrorCode, LLMAuthenticationError
|
||||
from letta.llm_api.azure_openai import get_azure_chat_completions_endpoint, get_azure_embeddings_endpoint
|
||||
from letta.llm_api.azure_openai_constants import AZURE_MODEL_TO_CONTEXT_LENGTH
|
||||
from letta.schemas.embedding_config import EmbeddingConfig
|
||||
from letta.schemas.enums import ProviderCategory, ProviderType
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.providers.base import Provider
|
||||
|
||||
AZURE_MODEL_TO_CONTEXT_LENGTH = {
|
||||
"babbage-002": 16384,
|
||||
"davinci-002": 16384,
|
||||
"gpt-35-turbo-0613": 4096,
|
||||
"gpt-35-turbo-1106": 16385,
|
||||
"gpt-35-turbo-0125": 16385,
|
||||
"gpt-4-0613": 8192,
|
||||
"gpt-4o-mini-2024-07-18": 128000,
|
||||
"gpt-4o-mini": 128000,
|
||||
"gpt-4o": 128000,
|
||||
}
|
||||
|
||||
|
||||
class AzureProvider(Provider):
|
||||
LATEST_API_VERSION: ClassVar[str] = "2024-09-01-preview"
|
||||
@@ -30,16 +43,78 @@ class AzureProvider(Provider):
|
||||
def replace_none_with_default(cls, v):
|
||||
return v if v is not None else cls.LATEST_API_VERSION
|
||||
|
||||
def get_azure_chat_completions_endpoint(self, model: str):
|
||||
return f"{self.base_url}/openai/deployments/{model}/chat/completions?api-version={self.api_version}"
|
||||
|
||||
def get_azure_embeddings_endpoint(self, model: str):
|
||||
return f"{self.base_url}/openai/deployments/{model}/embeddings?api-version={self.api_version}"
|
||||
|
||||
def get_azure_model_list_endpoint(self):
|
||||
return f"{self.base_url}/openai/models?api-version={self.api_version}"
|
||||
|
||||
def get_azure_deployment_list_endpoint(self):
|
||||
# Please note that it has to be 2023-03-15-preview
|
||||
# That's the only api version that works with this deployments endpoint
|
||||
return f"{self.base_url}/openai/deployments?api-version=2023-03-15-preview"
|
||||
|
||||
def azure_openai_get_deployed_model_list(self) -> list:
|
||||
"""https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP"""
|
||||
|
||||
client = AzureOpenAI(api_key=self.api_key, api_version=self.api_version, azure_endpoint=self.base_url)
|
||||
|
||||
try:
|
||||
models_list = client.models.list()
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
all_available_models = [model.to_dict() for model in models_list.data]
|
||||
|
||||
# https://xxx.openai.azure.com/openai/models?api-version=xxx
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if self.api_key is not None:
|
||||
headers["api-key"] = f"{self.api_key}"
|
||||
|
||||
# 2. Get all the deployed models
|
||||
url = self.get_azure_deployment_list_endpoint()
|
||||
try:
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
raise RuntimeError(f"Failed to retrieve model list: {e}")
|
||||
|
||||
deployed_models = response.json().get("data", [])
|
||||
deployed_model_names = set([m["id"] for m in deployed_models])
|
||||
|
||||
# 3. Only return the models in available models if they have been deployed
|
||||
deployed_models = [m for m in all_available_models if m["id"] in deployed_model_names]
|
||||
|
||||
# 4. Remove redundant deployments, only include the ones with the latest deployment
|
||||
# Create a dictionary to store the latest model for each ID
|
||||
latest_models = defaultdict()
|
||||
|
||||
# Iterate through the models and update the dictionary with the most recent model
|
||||
for model in deployed_models:
|
||||
model_id = model["id"]
|
||||
updated_at = model["created_at"]
|
||||
|
||||
# If the model ID is new or the current model has a more recent created_at, update the dictionary
|
||||
if model_id not in latest_models or updated_at > latest_models[model_id]["created_at"]:
|
||||
latest_models[model_id] = model
|
||||
|
||||
# Extract the unique models
|
||||
return list(latest_models.values())
|
||||
|
||||
async def list_llm_models_async(self) -> list[LLMConfig]:
|
||||
# TODO (cliandy): asyncify
|
||||
from letta.llm_api.azure_openai import azure_openai_get_chat_completion_model_list
|
||||
model_list = self.azure_openai_get_deployed_model_list()
|
||||
# Extract models that support text generation
|
||||
model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
|
||||
|
||||
model_options = azure_openai_get_chat_completion_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version)
|
||||
configs = []
|
||||
for model_option in model_options:
|
||||
model_name = model_option["id"]
|
||||
context_window_size = self.get_model_context_window(model_name)
|
||||
model_endpoint = get_azure_chat_completions_endpoint(self.base_url, model_name, self.api_version)
|
||||
model_endpoint = self.get_azure_chat_completions_endpoint(model_name)
|
||||
configs.append(
|
||||
LLMConfig(
|
||||
model=model_name,
|
||||
@@ -55,13 +130,22 @@ class AzureProvider(Provider):
|
||||
|
||||
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
|
||||
# TODO (cliandy): asyncify dependent function calls
|
||||
from letta.llm_api.azure_openai import azure_openai_get_embeddings_model_list
|
||||
def valid_embedding_model(m: dict, require_embedding_in_name: bool = True):
|
||||
valid_name = True
|
||||
if require_embedding_in_name:
|
||||
valid_name = "embedding" in m["id"]
|
||||
|
||||
return m.get("capabilities").get("embeddings") == True and valid_name
|
||||
|
||||
model_list = self.azure_openai_get_deployed_model_list()
|
||||
# Extract models that support embeddings
|
||||
|
||||
model_options = [m for m in model_list if valid_embedding_model(m)]
|
||||
|
||||
model_options = azure_openai_get_embeddings_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version)
|
||||
configs = []
|
||||
for model_option in model_options:
|
||||
model_name = model_option["id"]
|
||||
model_endpoint = get_azure_embeddings_endpoint(self.base_url, model_name, self.api_version)
|
||||
model_endpoint = self.get_azure_embeddings_endpoint(model_name)
|
||||
configs.append(
|
||||
EmbeddingConfig(
|
||||
embedding_model=model_name,
|
||||
|
||||
Reference in New Issue
Block a user