fix: patches to the API for non-streaming OAI proxy backends (#1653)

This commit is contained in:
Charles Packer
2024-08-16 09:48:28 -07:00
committed by GitHub
parent 1dfadcb21f
commit 1b64ccbbc1
4 changed files with 14 additions and 4 deletions

View File

@@ -272,7 +272,9 @@ def create(
else:
inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False
assert isinstance(inner_thoughts_in_kwargs, bool), type(inner_thoughts_in_kwargs)
if not isinstance(inner_thoughts_in_kwargs, bool):
warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
if inner_thoughts_in_kwargs:
functions = add_inner_thoughts_to_functions(
functions=functions,

View File

@@ -1,4 +1,5 @@
import os
import warnings
from typing import List
import requests
@@ -191,9 +192,13 @@ def num_tokens_from_messages(messages: List[dict], model: str = "gpt-4") -> int:
# print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return num_tokens_from_messages(messages, model="gpt-4-0613")
else:
raise NotImplementedError(
warnings.warn(
f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
)
return num_tokens_from_messages(messages, model="gpt-4-0613")
# raise NotImplementedError(
# f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
# )
num_tokens = 0
for message in messages:
num_tokens += tokens_per_message

View File

@@ -95,7 +95,10 @@ async def send_message_to_agent(
) -> Union[StreamingResponse, UserMessageResponse]:
"""Split off into a separate function so that it can be imported in the /chat/completion proxy."""
include_final_message = True
# TODO this is a total hack but is required until we move streaming into the model config
if server.server_llm_config.model_endpoint != "https://api.openai.com/v1":
stream_tokens = False
# handle the legacy mode streaming
if stream_legacy:
# NOTE: override

View File

@@ -500,7 +500,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
processed_chunk = {
"function_call": {
# "id": function_call.id,
"id": function_call.id,
"name": function_call.function["name"],
"arguments": function_call.function["arguments"],
},