diff --git a/letta/llm_api/llm_client_base.py b/letta/llm_api/llm_client_base.py index bd4054f4..81ab852b 100644 --- a/letta/llm_api/llm_client_base.py +++ b/letta/llm_api/llm_client_base.py @@ -1,3 +1,4 @@ +import json from abc import abstractmethod from typing import TYPE_CHECKING, Dict, List, Optional, Union @@ -186,3 +187,30 @@ class LLMClientBase: An LLMError subclass that represents the error in a provider-agnostic way """ return LLMError(f"Unhandled LLM error: {str(e)}") + + def _fix_truncated_json_response(self, response: ChatCompletionResponse) -> ChatCompletionResponse: + """ + Fixes truncated JSON responses by ensuring the content is properly formatted. + This is a workaround for some providers that may return incomplete JSON. + """ + if response.choices and response.choices[0].message and response.choices[0].message.tool_calls: + tool_call_args_str = response.choices[0].message.tool_calls[0].function.arguments + try: + json.loads(tool_call_args_str) + except json.JSONDecodeError: + try: + json_str_end = "" + quote_count = tool_call_args_str.count('"') + if quote_count % 2 != 0: + json_str_end = json_str_end + '"' + + open_braces = tool_call_args_str.count("{") + close_braces = tool_call_args_str.count("}") + missing_braces = open_braces - close_braces + json_str_end += "}" * missing_braces + fixed_tool_call_args_str = tool_call_args_str[: -len(json_str_end)] + json_str_end + json.loads(fixed_tool_call_args_str) + response.choices[0].message.tool_calls[0].function.arguments = fixed_tool_call_args_str + except json.JSONDecodeError: + pass + return response diff --git a/letta/llm_api/openai_client.py b/letta/llm_api/openai_client.py index 737466e8..ec289803 100644 --- a/letta/llm_api/openai_client.py +++ b/letta/llm_api/openai_client.py @@ -280,7 +280,7 @@ class OpenAIClient(LLMClientBase): # OpenAI's response structure directly maps to ChatCompletionResponse # We just need to instantiate the Pydantic model for validation and type safety. chat_completion_response = ChatCompletionResponse(**response_data) - + chat_completion_response = self._fix_truncated_json_response(chat_completion_response) # Unpack inner thoughts if they were embedded in function arguments if llm_config.put_inner_thoughts_in_kwargs: chat_completion_response = unpack_all_inner_thoughts_from_kwargs(