diff --git a/memgpt/agent.py b/memgpt/agent.py
index 9ba2ff11..e9ace060 100644
--- a/memgpt/agent.py
+++ b/memgpt/agent.py
@@ -8,7 +8,7 @@ from memgpt.persistence_manager import LocalStateManager
 from memgpt.config import AgentConfig, MemGPTConfig
 from .system import get_login_event, package_function_response, package_summarize_message, get_initial_boot_messages
 from .memory import CoreMemory as Memory, summarize_messages
-from .openai_tools import completions_with_backoff as create
+from .openai_tools import completions_with_backoff as create, is_context_overflow_error
 from memgpt.openai_tools import chat_completion_with_backoff
 from .utils import get_local_time, parse_json, united_diff, printd, count_tokens, get_schema_diff
 from .constants import (
@@ -649,14 +649,14 @@ class Agent(object):
             printd(f"step() failed\nuser_message = {user_message}\nerror = {e}")
 
             # If we got a context alert, try trimming the messages length, then try again
-            if "maximum context length" in str(e):
+            if is_context_overflow_error(e):
                 # A separate API call to run a summarizer
                 self.summarize_messages_inplace()
 
                 # Try step again
                 return self.step(user_message, first_message=first_message)
             else:
-                printd(f"step() failed with openai.InvalidRequestError, but didn't recognize the error message: '{str(e)}'")
+                printd(f"step() failed with an unrecognized exception: '{str(e)}'")
                 raise e
 
     def summarize_messages_inplace(self, cutoff=None, preserve_last_N_messages=True):
diff --git a/memgpt/cli/cli.py b/memgpt/cli/cli.py
index 17237165..8a942c4f 100644
--- a/memgpt/cli/cli.py
+++ b/memgpt/cli/cli.py
@@ -6,7 +6,6 @@ import logging
 import os
 from prettytable import PrettyTable
 import questionary
-import openai
 
 from llama_index import set_global_service_context
 from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
diff --git a/memgpt/cli/cli_config.py b/memgpt/cli/cli_config.py
index eb9cf854..889bfd50 100644
--- a/memgpt/cli/cli_config.py
+++ b/memgpt/cli/cli_config.py
@@ -1,6 +1,5 @@
 import builtins
 import questionary
-import openai
 from prettytable import PrettyTable
 import typer
 import os
diff --git a/memgpt/embeddings.py b/memgpt/embeddings.py
index 6c4a4525..8e3cd43c 100644
--- a/memgpt/embeddings.py
+++ b/memgpt/embeddings.py
@@ -105,9 +105,12 @@ def embedding_model():
         )
         return model
     elif endpoint == "azure":
+        # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
+        model = "text-embedding-ada-002"
+        deployment = config.azure_embedding_deployment if config.azure_embedding_deployment is not None else model
         return OpenAIEmbedding(
-            model="text-embedding-ada-002",
-            deployment_name=config.azure_embedding_deployment,
+            model=model,
+            deployment_name=deployment,
             api_key=config.azure_key,
             api_base=config.azure_endpoint,
             api_type="azure",
diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py
index cfeb718c..f249acb2 100644
--- a/memgpt/local_llm/chat_completion_proxy.py
+++ b/memgpt/local_llm/chat_completion_proxy.py
@@ -4,18 +4,20 @@ import os
 import requests
 import json
 
-from .webui.api import get_webui_completion
-from .webui.legacy_api import get_webui_completion as get_webui_completion_legacy
-from .lmstudio.api import get_lmstudio_completion
-from .llamacpp.api import get_llamacpp_completion
-from .koboldcpp.api import get_koboldcpp_completion
-from .ollama.api import get_ollama_completion
-from .vllm.api import get_vllm_completion
-from .llm_chat_completion_wrappers import airoboros, dolphin, zephyr, simple_summary_wrapper
-from .constants import DEFAULT_WRAPPER
-from .utils import DotDict, get_available_wrappers
-from ..prompts.gpt_summarize import SYSTEM as SUMMARIZE_SYSTEM_MESSAGE
-from ..errors import LocalLLMConnectionError, LocalLLMError
+from box import Box
+
+from memgpt.local_llm.webui.api import get_webui_completion
+from memgpt.local_llm.webui.legacy_api import get_webui_completion as get_webui_completion_legacy
+from memgpt.local_llm.lmstudio.api import get_lmstudio_completion
+from memgpt.local_llm.llamacpp.api import get_llamacpp_completion
+from memgpt.local_llm.koboldcpp.api import get_koboldcpp_completion
+from memgpt.local_llm.ollama.api import get_ollama_completion
+from memgpt.local_llm.vllm.api import get_vllm_completion
+from memgpt.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
+from memgpt.local_llm.constants import DEFAULT_WRAPPER
+from memgpt.local_llm.utils import get_available_wrappers
+from memgpt.prompts.gpt_summarize import SYSTEM as SUMMARIZE_SYSTEM_MESSAGE
+from memgpt.errors import LocalLLMConnectionError, LocalLLMError
 
 endpoint = os.getenv("OPENAI_API_BASE")
 endpoint_type = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
@@ -119,25 +121,21 @@ def get_chat_completion(
         raise LocalLLMError(f"Failed to parse JSON from local LLM response - error: {str(e)}")
 
     # unpack with response.choices[0].message.content
-    response = DotDict(
+    response = Box(
         {
             "model": model,
             "choices": [
-                DotDict(
-                    {
-                        "message": DotDict(chat_completion_result),
-                        "finish_reason": "stop",  # TODO vary based on backend response
-                    }
-                )
-            ],
-            "usage": DotDict(
                 {
-                    # TODO fix, actually use real info
-                    "prompt_tokens": 0,
-                    "completion_tokens": 0,
-                    "total_tokens": 0,
+                    "message": chat_completion_result,
+                    "finish_reason": "stop",  # TODO vary based on backend response
                 }
-            ),
+            ],
+            "usage": {
+                # TODO fix, actually use real info
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0,
+            },
         }
     )
     return response
diff --git a/memgpt/local_llm/utils.py b/memgpt/local_llm/utils.py
index c0a7ce3f..386bc63c 100644
--- a/memgpt/local_llm/utils.py
+++ b/memgpt/local_llm/utils.py
@@ -6,23 +6,6 @@ import memgpt.local_llm.llm_chat_completion_wrappers.dolphin as dolphin
 import memgpt.local_llm.llm_chat_completion_wrappers.zephyr as zephyr
 
 
-class DotDict(dict):
-    """Allow dot access on properties similar to OpenAI response object"""
-
-    def __getattr__(self, attr):
-        return self.get(attr)
-
-    def __setattr__(self, key, value):
-        self[key] = value
-
-    # following methods necessary for pickling
-    def __getstate__(self):
-        return vars(self)
-
-    def __setstate__(self, state):
-        vars(self).update(state)
-
-
 def load_grammar_file(grammar):
     # Set grammar
     grammar_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "grammars", f"{grammar}.gbnf")
diff --git a/memgpt/main.py b/memgpt/main.py
index 75ffd7eb..e493f14d 100644
--- a/memgpt/main.py
+++ b/memgpt/main.py
@@ -614,6 +614,11 @@ def run_agent_loop(memgpt_agent, first, no_verify=False, cfg=None, strip_ui=Fals
                     with console.status("[bold cyan]Thinking...") as status:
                         new_messages, user_message, skip_next_user_input = process_agent_step(user_message, no_verify)
                         break
+            except KeyboardInterrupt:
+                print("User interrupt occured.")
+                retry = questionary.confirm("Retry agent.step()?").ask()
+                if not retry:
+                    break
             except Exception as e:
                 print("An exception ocurred when running agent.step(): ")
                 traceback.print_exc()
diff --git a/memgpt/openai_tools.py b/memgpt/openai_tools.py
index 8f5622b6..72fd6a9d 100644
--- a/memgpt/openai_tools.py
+++ b/memgpt/openai_tools.py
@@ -1,9 +1,12 @@
 import random
 import os
 import time
-
+import requests
 import time
 from typing import Callable, TypeVar
+import urllib
+
+from box import Box
 
 from memgpt.local_llm.chat_completion_proxy import get_chat_completion
 
@@ -11,10 +14,250 @@ HOST = os.getenv("OPENAI_API_BASE")
 HOST_TYPE = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
 R = TypeVar("R")
 
-import openai
 
-if HOST is not None:
-    openai.api_base = HOST
+def is_context_overflow_error(exception):
+    from memgpt.utils import printd
+
+    match_string = "maximum context length"
+
+    # Backwards compatability with openai python package/client v0.28 (pre-v1 client migration)
+    if match_string in str(exception):
+        printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
+        return True
+
+    # Based on python requests + OpenAI REST API (/v1)
+    elif isinstance(exception, requests.exceptions.HTTPError):
+        if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
+            try:
+                error_details = exception.response.json()
+                if "error" not in error_details:
+                    printd(f"HTTPError occured, but couldn't find error field: {error_details}")
+                    return False
+                else:
+                    error_details = error_details["error"]
+
+                # Check for the specific error code
+                if error_details.get("code") == "context_length_exceeded":
+                    printd(f"HTTPError occured, caught error code {error_details.get('code')}")
+                    return True
+                # Soft-check for "maximum context length" inside of the message
+                elif error_details.get("message") and "maximum context length" in error_details.get("message"):
+                    printd(f"HTTPError occured, found '{match_string}' in error message contents ({error_details})")
+                    return True
+                else:
+                    printd(f"HTTPError occured, but unknown error message: {error_details}")
+                    return False
+            except ValueError:
+                # JSON decoding failed
+                printd(f"HTTPError occurred ({exception}), but no JSON error message.")
+
+    # Generic fail
+    else:
+        return False
+
+
+def smart_urljoin(base_url, relative_url):
+    """urljoin is stupid and wants a trailing / at the end of the endpoint address, or it will chop the suffix off"""
+    if not base_url.endswith("/"):
+        base_url += "/"
+    return urllib.parse.urljoin(base_url, relative_url)
+
+
+def clean_azure_endpoint(raw_endpoint_name):
+    """Make sure the endpoint is of format 'https://YOUR_RESOURCE_NAME.openai.azure.com'"""
+    endpoint_address = raw_endpoint_name.strip("/").replace(".openai.azure.com", "")
+    endpoint_address = endpoint_address.replace("http://", "")
+    endpoint_address = endpoint_address.replace("https://", "")
+    return endpoint_address
+
+
+def openai_chat_completions_request(url, api_key, data):
+    """https://platform.openai.com/docs/guides/text-generation?lang=curl"""
+    from memgpt.utils import printd
+
+    url = smart_urljoin(url, "chat/completions")
+    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
+
+    # If functions == None, strip from the payload
+    if "functions" in data and data["functions"] is None:
+        data.pop("functions")
+        data.pop("function_call", None)  # extra safe,  should exist always (default="auto")
+
+    printd(f"Sending request to {url}")
+    try:
+        # Example code to trigger a rate limit response:
+        # mock_response = requests.Response()
+        # mock_response.status_code = 429
+        # http_error = requests.exceptions.HTTPError("429 Client Error: Too Many Requests")
+        # http_error.response = mock_response
+        # raise http_error
+
+        # Example code to trigger a context overflow response (for an 8k model)
+        # data["messages"][-1]["content"] = " ".join(["repeat after me this is not a fluke"] * 1000)
+
+        response = requests.post(url, headers=headers, json=data)
+        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
+        response = response.json()  # convert to dict from string
+        printd(f"response = {response}")
+        response = Box(response)  # convert to 'dot-dict' style which is the openai python client default
+        return response
+    except requests.exceptions.HTTPError as http_err:
+        # Handle HTTP errors (e.g., response 4XX, 5XX)
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got HTTPError, exception={http_err}, payload={data}, response={response}")
+        raise http_err
+    except requests.exceptions.RequestException as req_err:
+        # Handle other requests-related errors (e.g., connection error)
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got RequestException, exception={req_err}, response={response}")
+        raise req_err
+    except Exception as e:
+        # Handle other potential errors
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got unknown Exception, exception={e}, response={response}")
+        raise e
+
+
+def openai_embeddings_request(url, api_key, data):
+    """https://platform.openai.com/docs/api-reference/embeddings/create"""
+    from memgpt.utils import printd
+
+    url = smart_urljoin(url, "embeddings")
+    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
+
+    printd(f"Sending request to {url}")
+    try:
+        response = requests.post(url, headers=headers, json=data)
+        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
+        response = response.json()  # convert to dict from string
+        printd(f"response = {response}")
+        response = Box(response)  # convert to 'dot-dict' style which is the openai python client default
+        return response
+    except requests.exceptions.HTTPError as http_err:
+        # Handle HTTP errors (e.g., response 4XX, 5XX)
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got HTTPError, exception={http_err}, response={response}")
+        raise http_err
+    except requests.exceptions.RequestException as req_err:
+        # Handle other requests-related errors (e.g., connection error)
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got RequestException, exception={req_err}, response={response}")
+        raise req_err
+    except Exception as e:
+        # Handle other potential errors
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got unknown Exception, exception={e}, response={response}")
+        raise e
+
+
+def azure_openai_chat_completions_request(resource_name, deployment_id, api_version, api_key, data):
+    """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions"""
+    from memgpt.utils import printd
+
+    resource_name = clean_azure_endpoint(resource_name)
+    url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/chat/completions?api-version={api_version}"
+    headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
+
+    # If functions == None, strip from the payload
+    if "functions" in data and data["functions"] is None:
+        data.pop("functions")
+        data.pop("function_call", None)  # extra safe,  should exist always (default="auto")
+
+    printd(f"Sending request to {url}")
+    try:
+        response = requests.post(url, headers=headers, json=data)
+        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
+        response = response.json()  # convert to dict from string
+        printd(f"response = {response}")
+        # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it
+        if "content" not in response["choices"][0].get("message"):
+            response["choices"][0]["message"]["content"] = None
+        response = Box(response)  # convert to 'dot-dict' style which is the openai python client default
+        return response
+    except requests.exceptions.HTTPError as http_err:
+        # Handle HTTP errors (e.g., response 4XX, 5XX)
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got HTTPError, exception={http_err}, response={response}")
+        raise http_err
+    except requests.exceptions.RequestException as req_err:
+        # Handle other requests-related errors (e.g., connection error)
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got RequestException, exception={req_err}, response={response}")
+        raise req_err
+    except Exception as e:
+        # Handle other potential errors
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got unknown Exception, exception={e}, response={response}")
+        raise e
+
+
+def azure_openai_embeddings_request(resource_name, deployment_id, api_version, api_key, data):
+    """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings"""
+    from memgpt.utils import printd
+
+    resource_name = clean_azure_endpoint(resource_name)
+    url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/embeddings?api-version={api_version}"
+    headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
+
+    printd(f"Sending request to {url}")
+    try:
+        response = requests.post(url, headers=headers, json=data)
+        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
+        response = response.json()  # convert to dict from string
+        printd(f"response = {response}")
+        response = Box(response)  # convert to 'dot-dict' style which is the openai python client default
+        return response
+    except requests.exceptions.HTTPError as http_err:
+        # Handle HTTP errors (e.g., response 4XX, 5XX)
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got HTTPError, exception={http_err}, response={response}")
+        raise http_err
+    except requests.exceptions.RequestException as req_err:
+        # Handle other requests-related errors (e.g., connection error)
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got RequestException, exception={req_err}, response={response}")
+        raise req_err
+    except Exception as e:
+        # Handle other potential errors
+        try:
+            response = response.json()
+        except:
+            pass
+        printd(f"Got unknown Exception, exception={e}, response={response}")
+        raise e
 
 
 def retry_with_exponential_backoff(
@@ -23,11 +266,15 @@ def retry_with_exponential_backoff(
     exponential_base: float = 2,
     jitter: bool = True,
     max_retries: int = 20,
-    errors: tuple = (openai.error.RateLimitError,),
+    # List of OpenAI error codes: https://github.com/openai/openai-python/blob/17ac6779958b2b74999c634c4ea4c7b74906027a/src/openai/_client.py#L227-L250
+    # 429 = rate limit
+    error_codes: tuple = (429,),
 ):
     """Retry a function with exponential backoff."""
 
     def wrapper(*args, **kwargs):
+        from memgpt.utils import printd
+
         # Initialize variables
         num_retries = 0
         delay = initial_delay
@@ -37,20 +284,25 @@ def retry_with_exponential_backoff(
             try:
                 return func(*args, **kwargs)
 
-            # Retry on specified errors
-            except errors as e:
-                # Increment retries
-                num_retries += 1
+            except requests.exceptions.HTTPError as http_err:
+                # Retry on specified errors
+                if http_err.response.status_code in error_codes:
+                    # Increment retries
+                    num_retries += 1
 
-                # Check if max retries has been reached
-                if num_retries > max_retries:
-                    raise Exception(f"Maximum number of retries ({max_retries}) exceeded.")
+                    # Check if max retries has been reached
+                    if num_retries > max_retries:
+                        raise Exception(f"Maximum number of retries ({max_retries}) exceeded.")
 
-                # Increment the delay
-                delay *= exponential_base * (1 + jitter * random.random())
+                    # Increment the delay
+                    delay *= exponential_base * (1 + jitter * random.random())
 
-                # Sleep for the delay
-                time.sleep(delay)
+                    # Sleep for the delay
+                    printd(f"Got a rate limit error ('{http_err}') on LLM backend request, waiting {int(delay)}s then retrying...")
+                    time.sleep(delay)
+                else:
+                    # For other HTTP errors, re-raise the exception
+                    raise
 
             # Raise exceptions for any errors not specified
             except Exception as e:
@@ -77,15 +329,20 @@ def completions_with_backoff(**kwargs):
                 kwargs.pop("model")
         if "context_window" in kwargs:
             kwargs.pop("context_window")
-        return openai.ChatCompletion.create(**kwargs)
+
+        api_url = "https://api.openai.com/v1"
+        api_key = os.get_env("OPENAI_API_KEY")
+        if api_key is None:
+            raise Exception("OPENAI_API_KEY is not defined - please set it")
+        return openai_chat_completions_request(api_url, api_key, data=kwargs)
 
 
 @retry_with_exponential_backoff
 def chat_completion_with_backoff(
     agent_config,
     messages,
-    functions,
-    function_call,
+    functions=None,
+    function_call="auto",
 ):
     from memgpt.utils import printd
     from memgpt.config import MemGPTConfig
@@ -95,32 +352,33 @@ def chat_completion_with_backoff(
     printd(f"Using model {agent_config.model_endpoint_type}, endpoint: {agent_config.model_endpoint}")
     if agent_config.model_endpoint_type == "openai":
         # openai
-        openai.api_base = agent_config.model_endpoint
-        return openai.ChatCompletion.create(
-            model=agent_config.model, messages=messages, functions=functions, function_call=function_call, user=config.anon_clientid
+        return openai_chat_completions_request(
+            url=agent_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
+            api_key=config.openai_key,  # 'sk....'
+            data=dict(
+                model=agent_config.model,
+                messages=messages,
+                functions=functions,
+                function_call=function_call,
+                user=config.anon_clientid,
+            ),
         )
     elif agent_config.model_endpoint_type == "azure":
         # azure
-        openai.api_type = "azure"
-        openai.api_key = config.azure_key
-        openai.api_base = config.azure_endpoint
-        openai.api_version = config.azure_version
-        if config.azure_deployment is not None:
-            deployment_id = config.azure_deployment
-            engine = None
-            model = config.model
-        else:
-            engine = MODEL_TO_AZURE_ENGINE[config.model]
-            model = None
-            deployment_id = None
-        return openai.ChatCompletion.create(
-            model=model,
-            messages=messages,
-            engine=engine,
-            deployment_id=deployment_id,
-            functions=functions,
-            function_call=function_call,
-            user=client_id,
+        azure_deployment = config.azure_deployment if config.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[agent_config.model]
+        return azure_openai_chat_completions_request(
+            resource_name=config.azure_endpoint,
+            deployment_id=azure_deployment,
+            api_version=config.azure_version,
+            api_key=config.azure_key,
+            data=dict(
+                # NOTE: don't pass model to Azure calls, that is the deployment_id
+                # model=agent_config.model,
+                messages=messages,
+                functions=functions,
+                function_call=function_call,
+                user=config.anon_clientid,
+            ),
         )
     else:  # local model
         return get_chat_completion(
@@ -146,7 +404,25 @@ def create_embedding_with_backoff(**kwargs):
         else:
             kwargs["engine"] = kwargs["model"]
             kwargs.pop("model")
-    return openai.Embedding.create(**kwargs)
+
+        api_key = os.get_env("AZURE_OPENAI_KEY")
+        if api_key is None:
+            raise Exception("AZURE_OPENAI_API_KEY is not defined - please set it")
+        # TODO check
+        # api_version???
+        # resource_name???
+        # "engine" instead of "model"???
+        return azure_openai_embeddings_request(
+            resource_name=None, deployment_id=azure_openai_deployment, api_version=None, api_key=api_key, data=kwargs
+        )
+
+    else:
+        # return openai.Embedding.create(**kwargs)
+        api_url = "https://api.openai.com/v1"
+        api_key = os.get_env("OPENAI_API_KEY")
+        if api_key is None:
+            raise Exception("OPENAI_API_KEY is not defined - please set it")
+        return openai_embeddings_request(url=api_url, api_key=api_key, data=kwargs)
 
 
 def get_embedding_with_backoff(text, model="text-embedding-ada-002"):
@@ -157,11 +433,12 @@ def get_embedding_with_backoff(text, model="text-embedding-ada-002"):
 
 
 MODEL_TO_AZURE_ENGINE = {
+    "gpt-4-1106-preview": "gpt-4-1106-preview",  # TODO check
     "gpt-4": "gpt-4",
     "gpt-4-32k": "gpt-4-32k",
-    "gpt-3.5": "gpt-35-turbo",
-    "gpt-3.5-turbo": "gpt-35-turbo",
-    "gpt-3.5-turbo-16k": "gpt-35-turbo-16k",
+    "gpt-3.5": "gpt-35-turbo",  # diff
+    "gpt-3.5-turbo": "gpt-35-turbo",  # diff
+    "gpt-3.5-turbo-16k": "gpt-35-turbo-16k",  # diff
 }
 
 
@@ -195,12 +472,6 @@ def configure_azure_support():
         print(f"Error: missing Azure OpenAI environment variables. Please see README section on Azure.")
         return
 
-    openai.api_type = "azure"
-    openai.api_key = azure_openai_key
-    openai.api_base = azure_openai_endpoint
-    openai.api_version = azure_openai_version
-    # deployment gets passed into chatcompletion
-
 
 def check_azure_embeddings():
     azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
diff --git a/poetry.lock b/poetry.lock
index c0b4f935..ecb8d852 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2353,26 +2353,31 @@ python-versions = ">=3.8"
 files = [
     {file = "PyMuPDF-1.23.6-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:c4eb71b88a22c1008f764b3121b36a9d25340f9920b870508356050a365d9ca1"},
     {file = "PyMuPDF-1.23.6-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:3ce2d3678dbf822cff213b1902f2e59756313e543efd516a2b4f15bb0353bd6c"},
+    {file = "PyMuPDF-1.23.6-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:2e27857a15c8a810d0b66455b8c8a79013640b6267a9b4ea808a5fe1f47711f2"},
     {file = "PyMuPDF-1.23.6-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:5cd05700c8f18c9dafef63ac2ed3b1099ca06017ca0c32deea13093cea1b8671"},
     {file = "PyMuPDF-1.23.6-cp310-none-win32.whl", hash = "sha256:951d280c1daafac2fd6a664b031f7f98b27eb2def55d39c92a19087bd8041c5d"},
     {file = "PyMuPDF-1.23.6-cp310-none-win_amd64.whl", hash = "sha256:19d1711d5908c4527ad2deef5af2d066649f3f9a12950faf30be5f7251d18abc"},
     {file = "PyMuPDF-1.23.6-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:3f0f9b76bc4f039e7587003cbd40684d93a98441549dd033cab38ca07d61988d"},
     {file = "PyMuPDF-1.23.6-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:e047571d799b30459ad7ee0bc6e68900a7f6b928876f956c976f279808814e72"},
+    {file = "PyMuPDF-1.23.6-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:1cbcf05c06f314fdf3042ceee674e9a0ac7fae598347d5442e2138c6046d4e82"},
     {file = "PyMuPDF-1.23.6-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:e33f8ec5ba7265fe78b30332840b8f454184addfa79f9c27f160f19789aa5ffd"},
     {file = "PyMuPDF-1.23.6-cp311-none-win32.whl", hash = "sha256:2c141f33e2733e48de8524dfd2de56d889feef0c7773b20a8cd216c03ab24793"},
     {file = "PyMuPDF-1.23.6-cp311-none-win_amd64.whl", hash = "sha256:8fd9c4ee1dd4744a515b9190d8ba9133348b0d94c362293ed77726aa1c13b0a6"},
     {file = "PyMuPDF-1.23.6-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:4d06751d5cd213e96f84f2faaa71a51cf4d641851e07579247ca1190121f173b"},
     {file = "PyMuPDF-1.23.6-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:526b26a5207e923aab65877ad305644402851823a352cb92d362053426899354"},
+    {file = "PyMuPDF-1.23.6-cp312-none-manylinux2014_aarch64.whl", hash = "sha256:0f852d125defc26716878b1796f4d68870e9065041d00cf46bde317fd8d30e68"},
     {file = "PyMuPDF-1.23.6-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:5bdf7020b90987412381acc42427dd1b7a03d771ee9ec273de003e570164ec1a"},
     {file = "PyMuPDF-1.23.6-cp312-none-win32.whl", hash = "sha256:e2d64799c6d9a3735be9e162a5d11061c0b7fbcb1e5fc7446e0993d0f815a93a"},
     {file = "PyMuPDF-1.23.6-cp312-none-win_amd64.whl", hash = "sha256:c8ea81964c1433ea163ad4b53c56053a87a9ef6e1bd7a879d4d368a3988b60d1"},
     {file = "PyMuPDF-1.23.6-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:761501a4965264e81acdd8f2224f993020bf24474e9b34fcdb5805a6826eda1c"},
     {file = "PyMuPDF-1.23.6-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:fd8388e82b6045807d19addf310d8119d32908e89f76cc8bbf8cf1ec36fce947"},
+    {file = "PyMuPDF-1.23.6-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:4ac9673a6d6ee7e80cb242dacb43f9ca097b502d9c5e44687dbdffc2bce7961a"},
     {file = "PyMuPDF-1.23.6-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:6e319c1f49476e07b9a12017c2d031687617713f8a46b7adcec03c636ed04607"},
     {file = "PyMuPDF-1.23.6-cp38-none-win32.whl", hash = "sha256:1103eea4ab727e32b9cb93347b35f71562033018c333a7f3a17d115e980fea4a"},
     {file = "PyMuPDF-1.23.6-cp38-none-win_amd64.whl", hash = "sha256:991a37e1cba43775ce094da87cf0bf72172a5532a09644003276bc8bfdfe9f1a"},
     {file = "PyMuPDF-1.23.6-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:57725e15872f7ab67a9fb3e06e5384d1047b2121e85755c93a6d4266d3ca8983"},
     {file = "PyMuPDF-1.23.6-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:224c341fe254adda97c8f06a4c5838cdbcf609fa89e70b1fb179752533378f2f"},
+    {file = "PyMuPDF-1.23.6-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:271bdf6059bb8347f9c9c6b721329bd353a933681b1fc62f43241b410e7ab7ae"},
     {file = "PyMuPDF-1.23.6-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:57e22bea69690450197b34dcde16bd9fe0265ac4425b4033535ccc5c044246fb"},
     {file = "PyMuPDF-1.23.6-cp39-none-win32.whl", hash = "sha256:2885a26220a32fb45ea443443b72194bb7107d6862d8d546b59e4ad0c8a1f2c9"},
     {file = "PyMuPDF-1.23.6-cp39-none-win_amd64.whl", hash = "sha256:361cab1be45481bd3dc4e00ec82628ebc189b4f4b6fd9bd78a00cfeed54e0034"},
@@ -2391,6 +2396,7 @@ python-versions = ">=3.8"
 files = [
     {file = "PyMuPDFb-1.23.6-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:e5af77580aad3d1103aeec57009d156bfca429cecda14a17c573fcbe97bafb30"},
     {file = "PyMuPDFb-1.23.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9925816cbe3e05e920f9be925e5752c2eef42b793885b62075bb0f6a69178598"},
+    {file = "PyMuPDFb-1.23.6-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:009e2cff166059e13bf71f93919e688f46b8fc11d122433574cfb0cc9134690e"},
     {file = "PyMuPDFb-1.23.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7132b30e6ad6ff2013344e3a481b2287fe0be3710d80694807dd6e0d8635f085"},
     {file = "PyMuPDFb-1.23.6-py3-none-win32.whl", hash = "sha256:9d24ddadc204e895bee5000ddc7507c801643548e59f5a56aad6d32981d17eeb"},
     {file = "PyMuPDFb-1.23.6-py3-none-win_amd64.whl", hash = "sha256:7bef75988e6979b10ca804cf9487f817aae43b0fff1c6e315b3b9ee0cf1cc32f"},
@@ -2418,6 +2424,38 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
 [package.extras]
 testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 
+[[package]]
+name = "python-box"
+version = "7.1.1"
+description = "Advanced Python dictionaries with dot notation access"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "python-box-7.1.1.tar.gz", hash = "sha256:2a3df244a5a79ac8f8447b5d11b5be0f2747d7b141cb2866060081ae9b53cc50"},
+    {file = "python_box-7.1.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:81ed1ec0f0ff2370227fc07277c5baca46d190a4747631bad7eb6ab1630fb7d9"},
+    {file = "python_box-7.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8891735b4148e84d348c6eadd2f127152f751c9603e35d43a1f496183a291ac4"},
+    {file = "python_box-7.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:0036fd47d388deaca8ebd65aea905f88ee6ef91d1d8ce34898b66f1824afbe80"},
+    {file = "python_box-7.1.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:aabf8b9ae5dbc8ba431d8cbe0d4cfe737a25d52d68b0f5f2ff34915c21a2c1db"},
+    {file = "python_box-7.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c046608337e723ae4de3206db5d1e1202ed166da2dfdc70c1f9361e72ace5633"},
+    {file = "python_box-7.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:f9266795e9c233874fb5b34fa994054b4fb0371881678e6ec45aec17fc95feac"},
+    {file = "python_box-7.1.1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:f76b5b7f0cdc07bfdd4200dc24e6e33189bb2ae322137a2b7110fd41891a3157"},
+    {file = "python_box-7.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ea13c98e05a3ec0ff26f254986a17290b69b5ade209fad081fd628f8fcfaa08"},
+    {file = "python_box-7.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:1b3f346e332dba16df0b0543d319d9e7ce07d93e5ae152175302894352aa2d28"},
+    {file = "python_box-7.1.1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:24c4ec0ee0278f66321100aaa9c615413da27a14ff43d376a2a3b4665e1d9494"},
+    {file = "python_box-7.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d95e5eec4fc8f3fc5c9cc7347fc2eb4f9187c853d34c90b1658d1eff96cd4eac"},
+    {file = "python_box-7.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:a0f1333c42e81529b6f68c192050df9d4505b803be7ac47f114036b98707f7cf"},
+    {file = "python_box-7.1.1-py3-none-any.whl", hash = "sha256:63b609555554d7a9d4b6e725f8e78ef1717c67e7d386200e03422ad612338df8"},
+]
+
+[package.extras]
+all = ["msgpack", "ruamel.yaml (>=0.17)", "toml"]
+msgpack = ["msgpack"]
+pyyaml = ["PyYAML"]
+ruamel-yaml = ["ruamel.yaml (>=0.17)"]
+toml = ["toml"]
+tomli = ["tomli", "tomli-w"]
+yaml = ["ruamel.yaml (>=0.17)"]
+
 [[package]]
 name = "python-dateutil"
 version = "2.8.2"
@@ -3854,4 +3892,4 @@ postgres = ["pg8000", "pgvector", "psycopg", "psycopg-binary", "psycopg2-binary"
 [metadata]
 lock-version = "2.0"
 python-versions = "<3.12,>=3.9"
-content-hash = "bdcea0954fdc07fbe7cbb5128c3df6c42250c1fd072b3e8ebf92e4d245981c25"
+content-hash = "a1d04a1b10676fcb84fbce5440800706a2ae14cbe2a10bb7d59667b7c36b7709"
diff --git a/pyproject.toml b/pyproject.toml
index cb8d8421..d06b2793 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,6 @@ faiss-cpu = { version = "^1.7.4", optional = true }
 tiktoken = "^0.5.1"
 pymupdf = "^1.23.5"
 tqdm = "^4.66.1"
-openai = "^0.28.1"
 black = { version = "^23.10.1", optional = true }
 pytest = { version = "^7.4.3", optional = true }
 llama-index = "^0.8.53.post3"
@@ -50,6 +49,7 @@ docstring-parser = "^0.15"
 lancedb = {version = "^0.3.3", optional = true}
 httpx = "^0.25.2"
 pyautogen = {version = "0.1.14", optional = true}
+python-box = "^7.1.1"
 
 [tool.poetry.extras]
 legacy = ["faiss-cpu", "numpy"]