diff --git a/letta/llm_api/azure_openai.py b/letta/llm_api/azure_openai.py
deleted file mode 100644
index 52cb6d3a..00000000
--- a/letta/llm_api/azure_openai.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from collections import defaultdict
-
-import requests
-from openai import AzureOpenAI
-
-
-def get_azure_chat_completions_endpoint(base_url: str, model: str, api_version: str):
-    return f"{base_url}/openai/deployments/{model}/chat/completions?api-version={api_version}"
-
-
-def get_azure_embeddings_endpoint(base_url: str, model: str, api_version: str):
-    return f"{base_url}/openai/deployments/{model}/embeddings?api-version={api_version}"
-
-
-def get_azure_model_list_endpoint(base_url: str, api_version: str):
-    return f"{base_url}/openai/models?api-version={api_version}"
-
-
-def get_azure_deployment_list_endpoint(base_url: str):
-    # Please note that it has to be 2023-03-15-preview
-    # That's the only api version that works with this deployments endpoint
-    # TODO: Use the Azure Client library here instead
-    return f"{base_url}/openai/deployments?api-version=2023-03-15-preview"
-
-
-def azure_openai_get_deployed_model_list(base_url: str, api_key: str, api_version: str) -> list:
-    """https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP"""
-
-    client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=base_url)
-
-    try:
-        models_list = client.models.list()
-    except Exception:
-        return []
-
-    all_available_models = [model.to_dict() for model in models_list.data]
-
-    # https://xxx.openai.azure.com/openai/models?api-version=xxx
-    headers = {"Content-Type": "application/json"}
-    if api_key is not None:
-        headers["api-key"] = f"{api_key}"
-
-    # 2. Get all the deployed models
-    url = get_azure_deployment_list_endpoint(base_url)
-    try:
-        response = requests.get(url, headers=headers)
-        response.raise_for_status()
-    except requests.RequestException as e:
-        raise RuntimeError(f"Failed to retrieve model list: {e}")
-
-    deployed_models = response.json().get("data", [])
-    deployed_model_names = set([m["id"] for m in deployed_models])
-
-    # 3. Only return the models in available models if they have been deployed
-    deployed_models = [m for m in all_available_models if m["id"] in deployed_model_names]
-
-    # 4. Remove redundant deployments, only include the ones with the latest deployment
-    # Create a dictionary to store the latest model for each ID
-    latest_models = defaultdict()
-
-    # Iterate through the models and update the dictionary with the most recent model
-    for model in deployed_models:
-        model_id = model["id"]
-        updated_at = model["created_at"]
-
-        # If the model ID is new or the current model has a more recent created_at, update the dictionary
-        if model_id not in latest_models or updated_at > latest_models[model_id]["created_at"]:
-            latest_models[model_id] = model
-
-    # Extract the unique models
-    return list(latest_models.values())
-
-
-def azure_openai_get_chat_completion_model_list(base_url: str, api_key: str, api_version: str) -> list:
-    model_list = azure_openai_get_deployed_model_list(base_url, api_key, api_version)
-    # Extract models that support text generation
-    model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
-    return model_options
-
-
-def azure_openai_get_embeddings_model_list(base_url: str, api_key: str, api_version: str, require_embedding_in_name: bool = True) -> list:
-    def valid_embedding_model(m: dict):
-        valid_name = True
-        if require_embedding_in_name:
-            valid_name = "embedding" in m["id"]
-
-        return m.get("capabilities").get("embeddings") == True and valid_name
-
-    model_list = azure_openai_get_deployed_model_list(base_url, api_key, api_version)
-    # Extract models that support embeddings
-
-    model_options = [m for m in model_list if valid_embedding_model(m)]
-
-    return model_options
diff --git a/letta/llm_api/azure_openai_constants.py b/letta/llm_api/azure_openai_constants.py
deleted file mode 100644
index ba4248ef..00000000
--- a/letta/llm_api/azure_openai_constants.py
+++ /dev/null
@@ -1,11 +0,0 @@
-AZURE_MODEL_TO_CONTEXT_LENGTH = {
-    "babbage-002": 16384,
-    "davinci-002": 16384,
-    "gpt-35-turbo-0613": 4096,
-    "gpt-35-turbo-1106": 16385,
-    "gpt-35-turbo-0125": 16385,
-    "gpt-4-0613": 8192,
-    "gpt-4o-mini-2024-07-18": 128000,
-    "gpt-4o-mini": 128000,
-    "gpt-4o": 128000,
-}
diff --git a/letta/schemas/providers/azure.py b/letta/schemas/providers/azure.py
index 32f871ca..0da8c5fa 100644
--- a/letta/schemas/providers/azure.py
+++ b/letta/schemas/providers/azure.py
@@ -1,16 +1,29 @@
+from collections import defaultdict
 from typing import ClassVar, Literal
 
+import requests
+from openai import AzureOpenAI
 from pydantic import Field, field_validator
 
 from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
 from letta.errors import ErrorCode, LLMAuthenticationError
-from letta.llm_api.azure_openai import get_azure_chat_completions_endpoint, get_azure_embeddings_endpoint
-from letta.llm_api.azure_openai_constants import AZURE_MODEL_TO_CONTEXT_LENGTH
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.providers.base import Provider
 
+AZURE_MODEL_TO_CONTEXT_LENGTH = {
+    "babbage-002": 16384,
+    "davinci-002": 16384,
+    "gpt-35-turbo-0613": 4096,
+    "gpt-35-turbo-1106": 16385,
+    "gpt-35-turbo-0125": 16385,
+    "gpt-4-0613": 8192,
+    "gpt-4o-mini-2024-07-18": 128000,
+    "gpt-4o-mini": 128000,
+    "gpt-4o": 128000,
+}
+
 
 class AzureProvider(Provider):
     LATEST_API_VERSION: ClassVar[str] = "2024-09-01-preview"
@@ -30,16 +43,78 @@ class AzureProvider(Provider):
     def replace_none_with_default(cls, v):
         return v if v is not None else cls.LATEST_API_VERSION
 
+    def get_azure_chat_completions_endpoint(self, model: str):
+        return f"{self.base_url}/openai/deployments/{model}/chat/completions?api-version={self.api_version}"
+
+    def get_azure_embeddings_endpoint(self, model: str):
+        return f"{self.base_url}/openai/deployments/{model}/embeddings?api-version={self.api_version}"
+
+    def get_azure_model_list_endpoint(self):
+        return f"{self.base_url}/openai/models?api-version={self.api_version}"
+
+    def get_azure_deployment_list_endpoint(self):
+        # Please note that it has to be 2023-03-15-preview
+        # That's the only api version that works with this deployments endpoint
+        return f"{self.base_url}/openai/deployments?api-version=2023-03-15-preview"
+
+    def azure_openai_get_deployed_model_list(self) -> list:
+        """https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP"""
+
+        client = AzureOpenAI(api_key=self.api_key, api_version=self.api_version, azure_endpoint=self.base_url)
+
+        try:
+            models_list = client.models.list()
+        except Exception:
+            return []
+
+        all_available_models = [model.to_dict() for model in models_list.data]
+
+        # https://xxx.openai.azure.com/openai/models?api-version=xxx
+        headers = {"Content-Type": "application/json"}
+        if self.api_key is not None:
+            headers["api-key"] = f"{self.api_key}"
+
+        # 2. Get all the deployed models
+        url = self.get_azure_deployment_list_endpoint()
+        try:
+            response = requests.get(url, headers=headers)
+            response.raise_for_status()
+        except requests.RequestException as e:
+            raise RuntimeError(f"Failed to retrieve model list: {e}")
+
+        deployed_models = response.json().get("data", [])
+        deployed_model_names = set([m["id"] for m in deployed_models])
+
+        # 3. Only return the models in available models if they have been deployed
+        deployed_models = [m for m in all_available_models if m["id"] in deployed_model_names]
+
+        # 4. Remove redundant deployments, only include the ones with the latest deployment
+        # Create a dictionary to store the latest model for each ID
+        latest_models = defaultdict()
+
+        # Iterate through the models and update the dictionary with the most recent model
+        for model in deployed_models:
+            model_id = model["id"]
+            updated_at = model["created_at"]
+
+            # If the model ID is new or the current model has a more recent created_at, update the dictionary
+            if model_id not in latest_models or updated_at > latest_models[model_id]["created_at"]:
+                latest_models[model_id] = model
+
+        # Extract the unique models
+        return list(latest_models.values())
+
     async def list_llm_models_async(self) -> list[LLMConfig]:
         # TODO (cliandy): asyncify
-        from letta.llm_api.azure_openai import azure_openai_get_chat_completion_model_list
+        model_list = self.azure_openai_get_deployed_model_list()
+        # Extract models that support text generation
+        model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
 
-        model_options = azure_openai_get_chat_completion_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version)
         configs = []
         for model_option in model_options:
             model_name = model_option["id"]
             context_window_size = self.get_model_context_window(model_name)
-            model_endpoint = get_azure_chat_completions_endpoint(self.base_url, model_name, self.api_version)
+            model_endpoint = self.get_azure_chat_completions_endpoint(model_name)
             configs.append(
                 LLMConfig(
                     model=model_name,
@@ -55,13 +130,22 @@ class AzureProvider(Provider):
 
     async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
         # TODO (cliandy): asyncify dependent function calls
-        from letta.llm_api.azure_openai import azure_openai_get_embeddings_model_list
+        def valid_embedding_model(m: dict, require_embedding_in_name: bool = True):
+            valid_name = True
+            if require_embedding_in_name:
+                valid_name = "embedding" in m["id"]
+
+            return m.get("capabilities").get("embeddings") == True and valid_name
+
+        model_list = self.azure_openai_get_deployed_model_list()
+        # Extract models that support embeddings
+
+        model_options = [m for m in model_list if valid_embedding_model(m)]
 
-        model_options = azure_openai_get_embeddings_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version)
         configs = []
         for model_option in model_options:
             model_name = model_option["id"]
-            model_endpoint = get_azure_embeddings_endpoint(self.base_url, model_name, self.api_version)
+            model_endpoint = self.get_azure_embeddings_endpoint(model_name)
             configs.append(
                 EmbeddingConfig(
                     embedding_model=model_name,