fix: gracefully handle too long responses from llm provider (#2677)

2025-06-06 13:13:32 -07:00
parent 831bb63e1e
commit e4da78fce7
2 changed files with 29 additions and 1 deletions
--- a/letta/llm_api/llm_client_base.py
+++ b/letta/llm_api/llm_client_base.py
@@ -1,3 +1,4 @@
+import json
 from abc import abstractmethod
 from typing import TYPE_CHECKING, Dict, List, Optional, Union

@@ -186,3 +187,30 @@ class LLMClientBase:
            An LLMError subclass that represents the error in a provider-agnostic way
        """
        return LLMError(f"Unhandled LLM error: {str(e)}")
+
+    def _fix_truncated_json_response(self, response: ChatCompletionResponse) -> ChatCompletionResponse:
+        """
+        Fixes truncated JSON responses by ensuring the content is properly formatted.
+        This is a workaround for some providers that may return incomplete JSON.
+        """
+        if response.choices and response.choices[0].message and response.choices[0].message.tool_calls:
+            tool_call_args_str = response.choices[0].message.tool_calls[0].function.arguments
+            try:
+                json.loads(tool_call_args_str)
+            except json.JSONDecodeError:
+                try:
+                    json_str_end = ""
+                    quote_count = tool_call_args_str.count('"')
+                    if quote_count % 2 != 0:
+                        json_str_end = json_str_end + '"'
+
+                    open_braces = tool_call_args_str.count("{")
+                    close_braces = tool_call_args_str.count("}")
+                    missing_braces = open_braces - close_braces
+                    json_str_end += "}" * missing_braces
+                    fixed_tool_call_args_str = tool_call_args_str[: -len(json_str_end)] + json_str_end
+                    json.loads(fixed_tool_call_args_str)
+                    response.choices[0].message.tool_calls[0].function.arguments = fixed_tool_call_args_str
+                except json.JSONDecodeError:
+                    pass
+        return response
--- a/letta/llm_api/openai_client.py
+++ b/letta/llm_api/openai_client.py
@@ -280,7 +280,7 @@ class OpenAIClient(LLMClientBase):
        # OpenAI's response structure directly maps to ChatCompletionResponse
        # We just need to instantiate the Pydantic model for validation and type safety.
        chat_completion_response = ChatCompletionResponse(**response_data)
-
+        chat_completion_response = self._fix_truncated_json_response(chat_completion_response)
        # Unpack inner thoughts if they were embedded in function arguments
        if llm_config.put_inner_thoughts_in_kwargs:
            chat_completion_response = unpack_all_inner_thoughts_from_kwargs(