From 8be67d2fad34d195f019a2be29d8d1f293f9f84c Mon Sep 17 00:00:00 2001 From: Miao Date: Fri, 21 Mar 2025 01:22:21 +0800 Subject: [PATCH] Fix optimistic json parser strict mode (#2506) --- .../server/rest_api/optimistic_json_parser.py | 10 +++--- tests/test_optimistic_json_parser.py | 34 ++++++++++++++++++- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/letta/server/rest_api/optimistic_json_parser.py b/letta/server/rest_api/optimistic_json_parser.py index 9379b4e6..452d29e9 100644 --- a/letta/server/rest_api/optimistic_json_parser.py +++ b/letta/server/rest_api/optimistic_json_parser.py @@ -32,7 +32,7 @@ class OptimisticJSONParser: self.on_extra_token = self.default_on_extra_token def default_on_extra_token(self, text, data, reminding): - pass + print(f"Parsed JSON with extra tokens: {data}, remaining: {reminding}") def parse(self, input_str): """ @@ -130,8 +130,8 @@ class OptimisticJSONParser: if end == -1: # Incomplete string if not self.strict: - return input_str[1:], "" - return json.loads(f'"{input_str[1:]}"'), "" + return input_str[1:], "" # Lenient mode returns partial string + raise decode_error # Raise error for incomplete string in strict mode str_val = input_str[: end + 1] input_str = input_str[end + 1 :] @@ -152,8 +152,8 @@ class OptimisticJSONParser: num_str = input_str[:idx] remainder = input_str[idx:] - # If it's only a sign or just '.', return as-is with empty remainder - if not num_str or num_str in {"-", "."}: + # If not strict, and it's only a sign or just '.', return as-is with empty remainder + if not self.strict and (not num_str or num_str in {"-", "."}): return num_str, "" try: diff --git a/tests/test_optimistic_json_parser.py b/tests/test_optimistic_json_parser.py index 4f188854..f7741f7c 100644 --- a/tests/test_optimistic_json_parser.py +++ b/tests/test_optimistic_json_parser.py @@ -96,7 +96,7 @@ def test_parse_number_cases(strict_parser): def test_parse_boolean_true(strict_parser): assert strict_parser.parse("true") is True, "Should parse 'true'." # Check leftover - assert strict_parser.last_parse_reminding == "", "No extra tokens expected." + assert strict_parser.last_parse_reminding == None, "No extra tokens expected." def test_parse_boolean_false(strict_parser): @@ -246,3 +246,35 @@ def test_multiple_parse_calls(strict_parser): result_2 = strict_parser.parse(input_2) assert result_2 == [2, 3] assert strict_parser.last_parse_reminding.strip() == "trailing2" + + +def test_parse_incomplete_string_streaming_strict(strict_parser): + """ + Test how a strict parser handles an incomplete string received in chunks. + """ + # Simulate streaming chunks + chunk1 = '{"message": "This is an incomplete' + chunk2 = " string with a newline\\n" + chunk3 = 'and more text"}' + + with pytest.raises(json.JSONDecodeError, match="Unterminated string"): + strict_parser.parse(chunk1) + + incomplete_json = chunk1 + chunk2 + with pytest.raises(json.JSONDecodeError, match="Unterminated string"): + strict_parser.parse(incomplete_json) + + complete_json = incomplete_json + chunk3 + result = strict_parser.parse(complete_json) + expected = {"message": "This is an incomplete string with a newline\nand more text"} + assert result == expected, "Should parse complete JSON correctly" + + +def test_unescaped_control_characters_strict(strict_parser): + """ + Test parsing JSON containing unescaped control characters in strict mode. + """ + input_str = '{"message": "This has a newline\nand tab\t"}' + + with pytest.raises(json.JSONDecodeError, match="Invalid control character"): + strict_parser.parse(input_str)