From 0b9fdcf46cbe133dd6759ffba5a9e5985b11fe0d Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Thu, 28 Dec 2023 23:01:45 -0800
Subject: [PATCH] fix: added new json test case + added fix for it (also
 refactored clean json func) (#739)

---
 memgpt/local_llm/json_parser.py | 61 +++++++++++++--------------------
 tests/test_json_parsers.py      | 10 ++++++
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/memgpt/local_llm/json_parser.py b/memgpt/local_llm/json_parser.py
index 1eab64c4..4b958477 100644
--- a/memgpt/local_llm/json_parser.py
+++ b/memgpt/local_llm/json_parser.py
@@ -148,42 +148,27 @@ def repair_even_worse_json(json_string):
 def clean_json(raw_llm_output, messages=None, functions=None):
     from memgpt.utils import printd
 
-    """Try a bunch of hacks to parse the data coming out of the LLM"""
-    try:
-        # printd("clean json runs:", raw_llm_output)
-        data = json.loads(raw_llm_output)
-    except (json.JSONDecodeError, LLMJSONParsingError):
+    strategies = [
+        lambda output: json.loads(output),
+        lambda output: json.loads(output + "}"),
+        lambda output: json.loads(output + "}}"),
+        lambda output: json.loads(output + '"}}'),
+        # with strip and strip comma
+        lambda output: json.loads(output.strip().rstrip(",") + "}"),
+        lambda output: json.loads(output.strip().rstrip(",") + "}}"),
+        lambda output: json.loads(output.strip().rstrip(",") + '"}}'),
+        # more complex patchers
+        lambda output: json.loads(repair_json_string(output)),
+        lambda output: json.loads(repair_even_worse_json(output)),
+        lambda output: extract_first_json(output + "}}"),
+        lambda output: clean_and_interpret_send_message_json(output),
+    ]
+
+    for strategy in strategies:
         try:
-            printd("trying adding }")
-            data = json.loads(raw_llm_output + "}")
-        except (json.JSONDecodeError, LLMJSONParsingError):
-            try:
-                printd("trying adding }}")
-                data = json.loads(raw_llm_output + "}}")
-            except (json.JSONDecodeError, LLMJSONParsingError):
-                try:
-                    printd('trying adding "}}')
-                    data = json.loads(raw_llm_output + '"}}')
-                except (json.JSONDecodeError, LLMJSONParsingError):
-                    try:
-                        repaired = repair_json_string(raw_llm_output)
-                        printd("trying repair_json_string:", repaired)
-                        data = json.loads(repaired)
-                    except (json.JSONDecodeError, LLMJSONParsingError):
-                        try:
-                            repaired = repair_even_worse_json(raw_llm_output)
-                            printd("trying repair_even_worse_json:", repaired)
-                            data = json.loads(repaired)
-                        except (json.JSONDecodeError, LLMJSONParsingError):
-                            try:
-                                printd("trying first_json")
-                                data = extract_first_json(raw_llm_output + "}}")
-                            except (json.JSONDecodeError, LLMJSONParsingError):
-                                try:
-                                    printd("trying to pull send_message manually")
-                                    data = clean_and_interpret_send_message_json(raw_llm_output)
-                                except (json.JSONDecodeError, LLMJSONParsingError):
-                                    raise LLMJSONParsingError(
-                                        f"Failed to decode valid MemGPT JSON from LLM output:\n=====\n{raw_llm_output}\n====="
-                                    )
-    return data
+            printd(f"Trying strategy: {strategy.__name__}")
+            return strategy(raw_llm_output)
+        except (json.JSONDecodeError, LLMJSONParsingError) as e:
+            printd(f"Strategy {strategy.__name__} failed with error: {e}")
+
+    raise LLMJSONParsingError(f"Failed to decode valid MemGPT JSON from LLM output:\n=====\n{raw_llm_output}\n=====")
diff --git a/tests/test_json_parsers.py b/tests/test_json_parsers.py
index b975825e..a0cef1d6 100644
--- a/tests/test_json_parsers.py
+++ b/tests/test_json_parsers.py
@@ -57,6 +57,15 @@ GARBAGEGARBAGEGARBAGEGARBAGE
 GARBAGEGARBAGEGARBAGEGARBAGE
 """
 
+EXAMPLE_ARCHIVAL_SEARCH = """
+
+{
+  "function": "archival_memory_search",
+  "params": {
+    "inner_thoughts": "Looking for WaitingForAction.",
+    "query": "WaitingForAction",
+"""
+
 
 def test_json_parsers():
     """Try various broken JSON and check that the parsers can fix it"""
@@ -67,6 +76,7 @@ def test_json_parsers():
         EXAMPLE_DOUBLE_JSON,
         EXAMPLE_HARD_LINE_FEEDS,
         EXAMPLE_SEND_MESSAGE_PREFIX_OK_REST_BAD,
+        EXAMPLE_ARCHIVAL_SEARCH,
     ]
 
     for string in test_strings: