diff --git a/memgpt/local_llm/chat_completion_proxy.py b/memgpt/local_llm/chat_completion_proxy.py
index a5290717..cab33c89 100644
--- a/memgpt/local_llm/chat_completion_proxy.py
+++ b/memgpt/local_llm/chat_completion_proxy.py
@@ -10,7 +10,7 @@ from .utils import DotDict
 
 HOST = os.getenv("OPENAI_API_BASE")
 HOST_TYPE = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
-DEBUG = True
+DEBUG = False
 
 
 async def get_chat_completion(
diff --git a/memgpt/local_llm/webui/api.py b/memgpt/local_llm/webui/api.py
index 3cff08e0..2614050d 100644
--- a/memgpt/local_llm/webui/api.py
+++ b/memgpt/local_llm/webui/api.py
@@ -5,8 +5,8 @@ from .settings import SIMPLE
 
 HOST = os.getenv("OPENAI_API_BASE")
 HOST_TYPE = os.getenv("BACKEND_TYPE")  # default None == ChatCompletion
-WEBUI_API_SUFFIX = "/v1/generate"
-DEBUG = True
+WEBUI_API_SUFFIX = "/api/v1/generate"
+DEBUG = False
 
 
 def get_webui_completion(prompt, settings=SIMPLE):
@@ -25,7 +25,7 @@ def get_webui_completion(prompt, settings=SIMPLE):
             if DEBUG:
                 print(f"json API response.text: {result}")
         else:
-            raise Exception(f"API call got non-200 response code")
+            raise Exception(f"API call got non-200 response code for address: {URI}")
     except:
         # TODO handle gracefully
         raise