Files
letta-server/memgpt/local_llm/chat_completion_proxy.py
2023-10-22 22:52:24 -07:00

89 lines
2.7 KiB
Python

"""MemGPT sends a ChatCompletion request
Under the hood, we use the functions argument to turn
"""
"""Key idea: create drop-in replacement for agent's ChatCompletion call that runs on an OpenLLM backend"""
import os
import json
import requests
from .webui_settings import DETERMINISTIC, SIMPLE
from .llm_chat_completion_wrappers import airoboros
HOST = os.getenv('OPENAI_API_BASE')
HOST_TYPE = os.getenv('BACKEND_TYPE') # default None == ChatCompletion
class DotDict(dict):
"""Allow dot access on properties similar to OpenAI response object"""
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self[key] = value
async def get_chat_completion(
model, # no model, since the model is fixed to whatever you set in your own backend
messages,
functions,
function_call="auto",
):
if function_call != "auto":
raise ValueError(f"function_call == {function_call} not supported (auto only)")
if True or model == 'airoboros_v2.1':
llm_wrapper = airoboros.Airoboros21Wrapper()
# First step: turn the message sequence into a prompt that the model expects
prompt = llm_wrapper.chat_completion_to_prompt(messages, functions)
# print(prompt)
if HOST_TYPE != 'webui':
raise ValueError(HOST_TYPE)
request = SIMPLE
request['prompt'] = prompt
try:
URI = f'{HOST}/v1/generate'
response = requests.post(URI, json=request)
if response.status_code == 200:
# result = response.json()['results'][0]['history']
result = response.json()
# print(f"raw API response: {result}")
result = result['results'][0]['text']
print(f"json API response.text: {result}")
else:
raise Exception(f"API call got non-200 response code")
# cleaned_result, chatcompletion_result = parse_st_json_output(result)
chat_completion_result = llm_wrapper.output_to_chat_completion_response(result)
print(json.dumps(chat_completion_result, indent=2))
# print(cleaned_result)
# unpack with response.choices[0].message.content
response = DotDict({
'model': None,
'choices': [DotDict({
'message': DotDict(chat_completion_result),
'finish_reason': 'stop', # TODO vary based on webui response
})],
'usage': DotDict({
# TODO fix
'prompt_tokens': 0,
'completion_tokens': 0,
'total_tokens': 0,
})
})
return response
except Exception as e:
# TODO
raise e