diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py index 7cf8b6dc..f43cfcb6 100644 --- a/memgpt/llm_api/llm_api_tools.py +++ b/memgpt/llm_api/llm_api_tools.py @@ -272,7 +272,9 @@ def create( else: inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False - assert isinstance(inner_thoughts_in_kwargs, bool), type(inner_thoughts_in_kwargs) + if not isinstance(inner_thoughts_in_kwargs, bool): + warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}") + inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs) if inner_thoughts_in_kwargs: functions = add_inner_thoughts_to_functions( functions=functions, diff --git a/memgpt/local_llm/utils.py b/memgpt/local_llm/utils.py index 496b074e..87ef4754 100644 --- a/memgpt/local_llm/utils.py +++ b/memgpt/local_llm/utils.py @@ -1,4 +1,5 @@ import os +import warnings from typing import List import requests @@ -191,9 +192,13 @@ def num_tokens_from_messages(messages: List[dict], model: str = "gpt-4") -> int: # print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.") return num_tokens_from_messages(messages, model="gpt-4-0613") else: - raise NotImplementedError( + warnings.warn( f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" ) + return num_tokens_from_messages(messages, model="gpt-4-0613") + # raise NotImplementedError( + # f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""" + # ) num_tokens = 0 for message in messages: num_tokens += tokens_per_message diff --git a/memgpt/server/rest_api/agents/message.py b/memgpt/server/rest_api/agents/message.py index ab5e41b1..4058c1a8 100644 --- a/memgpt/server/rest_api/agents/message.py +++ b/memgpt/server/rest_api/agents/message.py @@ -95,7 +95,10 @@ async def send_message_to_agent( ) -> Union[StreamingResponse, UserMessageResponse]: """Split off into a separate function so that it can be imported in the /chat/completion proxy.""" - include_final_message = True + # TODO this is a total hack but is required until we move streaming into the model config + if server.server_llm_config.model_endpoint != "https://api.openai.com/v1": + stream_tokens = False + # handle the legacy mode streaming if stream_legacy: # NOTE: override diff --git a/memgpt/server/rest_api/interface.py b/memgpt/server/rest_api/interface.py index f5711109..884334f0 100644 --- a/memgpt/server/rest_api/interface.py +++ b/memgpt/server/rest_api/interface.py @@ -500,7 +500,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface): processed_chunk = { "function_call": { - # "id": function_call.id, + "id": function_call.id, "name": function_call.function["name"], "arguments": function_call.function["arguments"], },