Files
letta-server/memgpt/openai_tools.py
Sarah Wooders ec2bda4966 Refactor config + determine LLM via config.model_endpoint_type (#422)
* mark depricated API section

* CLI bug fixes for azure

* check azure before running

* Update README.md

* Update README.md

* bug fix with persona loading

* remove print

* make errors for cli flags more clear

* format

* fix imports

* fix imports

* add prints

* update lock

* update config fields

* cleanup config loading

* commit

* remove asserts

* refactor configure

* put into different functions

* add embedding default

* pass in config

* fixes

* allow overriding openai embedding endpoint

* black

* trying to patch tests (some circular import errors)

* update flags and docs

* patched support for local llms using endpoint and endpoint type passed via configs, not env vars

* missing files

* fix naming

* fix import

* fix two runtime errors

* patch ollama typo, move ollama model question pre-wrapper, modify question phrasing to include link to readthedocs, also have a default ollama model that has a tag included

* disable debug messages

* made error message for failed load more informative

* don't print dynamic linking function warning unless --debug

* updated tests to work with new cli workflow (disabled openai config test for now)

* added skips for tests when vars are missing

* update bad arg

* revise test to soft pass on empty string too

* don't run configure twice

* extend timeout (try to pass against nltk download)

* update defaults

* typo with endpoint type default

* patch runtime errors for when model is None

* catching another case of 'x in model' when model is None (preemptively)

* allow overrides to local llm related config params

* made model wrapper selection from a list vs raw input

* update test for select instead of input

* Fixed bug in endpoint when using local->openai selection, also added validation loop to manual endpoint entry

* updated error messages to be more informative with links to readthedocs

* add back gpt3.5-turbo

---------

Co-authored-by: cpacker <packercharles@gmail.com>
2023-11-14 15:58:19 -08:00

187 lines
6.5 KiB
Python

import random
import os
import time
import time
from typing import Callable, TypeVar
from memgpt.local_llm.chat_completion_proxy import get_chat_completion
HOST = os.getenv("OPENAI_API_BASE")
HOST_TYPE = os.getenv("BACKEND_TYPE") # default None == ChatCompletion
R = TypeVar("R")
import openai
if HOST is not None:
openai.api_base = HOST
def retry_with_exponential_backoff(
func,
initial_delay: float = 1,
exponential_base: float = 2,
jitter: bool = True,
max_retries: int = 20,
errors: tuple = (openai.error.RateLimitError,),
):
"""Retry a function with exponential backoff."""
def wrapper(*args, **kwargs):
# Initialize variables
num_retries = 0
delay = initial_delay
# Loop until a successful response or max_retries is hit or an exception is raised
while True:
try:
return func(*args, **kwargs)
# Retry on specified errors
except errors as e:
# Increment retries
num_retries += 1
# Check if max retries has been reached
if num_retries > max_retries:
raise Exception(f"Maximum number of retries ({max_retries}) exceeded.")
# Increment the delay
delay *= exponential_base * (1 + jitter * random.random())
# Sleep for the delay
time.sleep(delay)
# Raise exceptions for any errors not specified
except Exception as e:
raise e
return wrapper
# TODO: delete/ignore --legacy
@retry_with_exponential_backoff
def completions_with_backoff(**kwargs):
# Local model
if HOST_TYPE is not None:
return get_chat_completion(**kwargs)
# OpenAI / Azure model
else:
if using_azure():
azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
if azure_openai_deployment is not None:
kwargs["deployment_id"] = azure_openai_deployment
else:
kwargs["engine"] = MODEL_TO_AZURE_ENGINE[kwargs["model"]]
kwargs.pop("model")
if "context_window" in kwargs:
kwargs.pop("context_window")
return openai.ChatCompletion.create(**kwargs)
@retry_with_exponential_backoff
def chat_completion_with_backoff(agent_config, **kwargs):
from memgpt.utils import printd
from memgpt.config import AgentConfig, MemGPTConfig
printd(f"Using model {agent_config.model_endpoint_type}, endpoint: {agent_config.model_endpoint}")
if agent_config.model_endpoint_type == "openai":
# openai
openai.api_base = agent_config.model_endpoint
return openai.ChatCompletion.create(**kwargs)
elif agent_config.model_endpoint_type == "azure":
# configure openai
config = MemGPTConfig.load() # load credentials (currently not stored in agent config)
openai.api_type = "azure"
openai.api_key = config.azure_key
openai.api_base = config.azure_endpoint
openai.api_version = config.azure_version
if config.azure_deployment is not None:
kwargs["deployment_id"] = config.azure_deployment
else:
kwargs["engine"] = MODEL_TO_AZURE_ENGINE[config.model]
del kwargs["model"]
return openai.ChatCompletion.create(**kwargs)
else: # local model
kwargs["context_window"] = agent_config.context_window # specify for open LLMs
kwargs["endpoint"] = agent_config.model_endpoint # specify for open LLMs
kwargs["endpoint_type"] = agent_config.model_endpoint_type # specify for open LLMs
kwargs["wrapper"] = agent_config.model_wrapper # specify for open LLMs
return get_chat_completion(**kwargs)
# TODO: deprecate
@retry_with_exponential_backoff
def create_embedding_with_backoff(**kwargs):
if using_azure():
azure_openai_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT")
if azure_openai_deployment is not None:
kwargs["deployment_id"] = azure_openai_deployment
else:
kwargs["engine"] = kwargs["model"]
kwargs.pop("model")
return openai.Embedding.create(**kwargs)
def get_embedding_with_backoff(text, model="text-embedding-ada-002"):
text = text.replace("\n", " ")
response = create_embedding_with_backoff(input=[text], model=model)
embedding = response["data"][0]["embedding"]
return embedding
MODEL_TO_AZURE_ENGINE = {
"gpt-4": "gpt-4",
"gpt-4-32k": "gpt-4-32k",
"gpt-3.5": "gpt-35-turbo",
"gpt-3.5-turbo": "gpt-35-turbo",
"gpt-3.5-turbo-16k": "gpt-35-turbo-16k",
}
def get_set_azure_env_vars():
azure_env_variables = [
("AZURE_OPENAI_KEY", os.getenv("AZURE_OPENAI_KEY")),
("AZURE_OPENAI_ENDPOINT", os.getenv("AZURE_OPENAI_ENDPOINT")),
("AZURE_OPENAI_VERSION", os.getenv("AZURE_OPENAI_VERSION")),
("AZURE_OPENAI_DEPLOYMENT", os.getenv("AZURE_OPENAI_DEPLOYMENT")),
(
"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT",
os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT"),
),
]
return [x for x in azure_env_variables if x[1] is not None]
def using_azure():
return len(get_set_azure_env_vars()) > 0
def configure_azure_support():
azure_openai_key = os.getenv("AZURE_OPENAI_KEY")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_version = os.getenv("AZURE_OPENAI_VERSION")
if None in [
azure_openai_key,
azure_openai_endpoint,
azure_openai_version,
]:
print(f"Error: missing Azure OpenAI environment variables. Please see README section on Azure.")
return
openai.api_type = "azure"
openai.api_key = azure_openai_key
openai.api_base = azure_openai_endpoint
openai.api_version = azure_openai_version
# deployment gets passed into chatcompletion
def check_azure_embeddings():
azure_openai_deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT")
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT")
if azure_openai_deployment is not None and azure_openai_embedding_deployment is None:
raise ValueError(
f"Error: It looks like you are using Azure deployment ids and computing embeddings, make sure you are setting one for embeddings as well. Please see README section on Azure"
)