diff --git a/memgpt/llm_api/llm_api_tools.py b/memgpt/llm_api/llm_api_tools.py
index 7cf8b6dc..f43cfcb6 100644
--- a/memgpt/llm_api/llm_api_tools.py
+++ b/memgpt/llm_api/llm_api_tools.py
@@ -272,7 +272,9 @@ def create(
         else:
             inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False
 
-        assert isinstance(inner_thoughts_in_kwargs, bool), type(inner_thoughts_in_kwargs)
+        if not isinstance(inner_thoughts_in_kwargs, bool):
+            warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
+            inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
         if inner_thoughts_in_kwargs:
             functions = add_inner_thoughts_to_functions(
                 functions=functions,
diff --git a/memgpt/local_llm/utils.py b/memgpt/local_llm/utils.py
index 496b074e..87ef4754 100644
--- a/memgpt/local_llm/utils.py
+++ b/memgpt/local_llm/utils.py
@@ -1,4 +1,5 @@
 import os
+import warnings
 from typing import List
 
 import requests
@@ -191,9 +192,13 @@ def num_tokens_from_messages(messages: List[dict], model: str = "gpt-4") -> int:
         # print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
         return num_tokens_from_messages(messages, model="gpt-4-0613")
     else:
-        raise NotImplementedError(
+        warnings.warn(
             f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
         )
+        return num_tokens_from_messages(messages, model="gpt-4-0613")
+        # raise NotImplementedError(
+        # f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
+        # )
     num_tokens = 0
     for message in messages:
         num_tokens += tokens_per_message
diff --git a/memgpt/server/rest_api/agents/message.py b/memgpt/server/rest_api/agents/message.py
index ab5e41b1..4058c1a8 100644
--- a/memgpt/server/rest_api/agents/message.py
+++ b/memgpt/server/rest_api/agents/message.py
@@ -95,7 +95,10 @@ async def send_message_to_agent(
 ) -> Union[StreamingResponse, UserMessageResponse]:
     """Split off into a separate function so that it can be imported in the /chat/completion proxy."""
 
-    include_final_message = True
+    # TODO this is a total hack but is required until we move streaming into the model config
+    if server.server_llm_config.model_endpoint != "https://api.openai.com/v1":
+        stream_tokens = False
+
     # handle the legacy mode streaming
     if stream_legacy:
         # NOTE: override
diff --git a/memgpt/server/rest_api/interface.py b/memgpt/server/rest_api/interface.py
index f5711109..884334f0 100644
--- a/memgpt/server/rest_api/interface.py
+++ b/memgpt/server/rest_api/interface.py
@@ -500,7 +500,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
 
                     processed_chunk = {
                         "function_call": {
-                            # "id": function_call.id,
+                            "id": function_call.id,
                             "name": function_call.function["name"],
                             "arguments": function_call.function["arguments"],
                         },