fix: patches to the API for non-streaming OAI proxy backends (#1653)
This commit is contained in:
@@ -272,7 +272,9 @@ def create(
|
||||
else:
|
||||
inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False
|
||||
|
||||
assert isinstance(inner_thoughts_in_kwargs, bool), type(inner_thoughts_in_kwargs)
|
||||
if not isinstance(inner_thoughts_in_kwargs, bool):
|
||||
warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
|
||||
inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
|
||||
if inner_thoughts_in_kwargs:
|
||||
functions = add_inner_thoughts_to_functions(
|
||||
functions=functions,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import warnings
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
@@ -191,9 +192,13 @@ def num_tokens_from_messages(messages: List[dict], model: str = "gpt-4") -> int:
|
||||
# print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
|
||||
return num_tokens_from_messages(messages, model="gpt-4-0613")
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
warnings.warn(
|
||||
f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
|
||||
)
|
||||
return num_tokens_from_messages(messages, model="gpt-4-0613")
|
||||
# raise NotImplementedError(
|
||||
# f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
|
||||
# )
|
||||
num_tokens = 0
|
||||
for message in messages:
|
||||
num_tokens += tokens_per_message
|
||||
|
||||
@@ -95,7 +95,10 @@ async def send_message_to_agent(
|
||||
) -> Union[StreamingResponse, UserMessageResponse]:
|
||||
"""Split off into a separate function so that it can be imported in the /chat/completion proxy."""
|
||||
|
||||
include_final_message = True
|
||||
# TODO this is a total hack but is required until we move streaming into the model config
|
||||
if server.server_llm_config.model_endpoint != "https://api.openai.com/v1":
|
||||
stream_tokens = False
|
||||
|
||||
# handle the legacy mode streaming
|
||||
if stream_legacy:
|
||||
# NOTE: override
|
||||
|
||||
@@ -500,7 +500,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
|
||||
processed_chunk = {
|
||||
"function_call": {
|
||||
# "id": function_call.id,
|
||||
"id": function_call.id,
|
||||
"name": function_call.function["name"],
|
||||
"arguments": function_call.function["arguments"],
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user