From 89932ff90c18eb3de475bfa66dce15b8fdd114c6 Mon Sep 17 00:00:00 2001 From: cthomas Date: Thu, 28 Aug 2025 11:12:28 -0700 Subject: [PATCH] feat: clean up legacy deepseek api call logic [LET-4087] (#4275) feat: clean up legacy deepseek api call logic Co-authored-by: Shubham Naik --- letta/llm_api/deepseek.py | 303 ------------------------------- letta/llm_api/deepseek_client.py | 300 +++++++++++++++++++++++++++++- letta/llm_api/llm_api_tools.py | 49 ----- 3 files changed, 298 insertions(+), 354 deletions(-) delete mode 100644 letta/llm_api/deepseek.py diff --git a/letta/llm_api/deepseek.py b/letta/llm_api/deepseek.py deleted file mode 100644 index 5d4eb9e1..00000000 --- a/letta/llm_api/deepseek.py +++ /dev/null @@ -1,303 +0,0 @@ -import json -import re -import warnings -from typing import List, Optional - -from letta.schemas.llm_config import LLMConfig -from letta.schemas.message import Message as _Message -from letta.schemas.openai.chat_completion_request import AssistantMessage, ChatCompletionRequest, ChatMessage -from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall -from letta.schemas.openai.chat_completion_request import Tool, ToolFunctionChoice, ToolMessage, UserMessage, cast_message_to_subtype -from letta.schemas.openai.chat_completion_response import ChatCompletionResponse -from letta.schemas.openai.openai import Function, ToolCall -from letta.utils import get_tool_call_id - - -def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage: - """ - Merge `ToolMessage` objects into the previous message. - """ - previous_message.content += ( - f" content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}" - ) - return previous_message - - -def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage: - """ - For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field. - """ - - if "tool_calls" in assistant_message.dict().keys(): - assistant_message.content = "".join( - [ - # f" name: {tool_call.function.name}, function: {tool_call.function}" - f" {json.dumps(tool_call.function.dict())} " - for tool_call in assistant_message.tool_calls - ] - ) - del assistant_message.tool_calls - return assistant_message - - -def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]: - """ - Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message. - Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling. - - This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message - at the end. - - """ - deepseek_messages = [] - for idx, message in enumerate(messages): - # First message is the system prompt, add it - if idx == 0 and message.role == "system": - deepseek_messages.append(message) - continue - if message.role == "user": - if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system": - # User message, add it - deepseek_messages.append(UserMessage(content=message.content)) - else: - # add to the content of the previous message - deepseek_messages[-1].content += message.content - elif message.role == "assistant": - if deepseek_messages[-1].role == "user": - # Assistant message, remove tool calls and add them to the content - deepseek_messages.append(handle_assistant_message(message)) - else: - # add to the content of the previous message - deepseek_messages[-1].content += message.content - elif message.role == "tool" and deepseek_messages[-1].role == "assistant": - # Tool message, add it to the last assistant message - merged_message = merge_tool_message(deepseek_messages[-1], message) - deepseek_messages[-1] = merged_message - else: - print(f"Skipping message: {message}") - - # This needs to end on a user message, add a dummy message if the last was assistant - if deepseek_messages[-1].role == "assistant": - deepseek_messages.append(UserMessage(content="")) - return deepseek_messages - - -def build_deepseek_chat_completions_request( - llm_config: LLMConfig, - messages: List[_Message], - user_id: Optional[str], - functions: Optional[list], - function_call: Optional[str], - use_tool_naming: bool, - max_tokens: Optional[int], -) -> ChatCompletionRequest: - # if functions and llm_config.put_inner_thoughts_in_kwargs: - # # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first - # # TODO(fix) - # inner_thoughts_desc = ( - # INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION - # ) - # functions = add_inner_thoughts_to_functions( - # functions=functions, - # inner_thoughts_key=INNER_THOUGHTS_KWARG, - # inner_thoughts_description=inner_thoughts_desc, - # ) - - openai_message_list = [cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=False)) for m in messages] - - if llm_config.model: - model = llm_config.model - else: - warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}") - model = None - if use_tool_naming: - if function_call is None: - tool_choice = None - elif function_call not in ["none", "auto", "required"]: - tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call)) - else: - tool_choice = function_call - - def add_functions_to_system_message(system_message: ChatMessage): - system_message.content += f" {''.join(json.dumps(f) for f in functions)} " - system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.' - - if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively - add_functions_to_system_message( - openai_message_list[0] - ) # Inject additional instructions to the system prompt with the available functions - - openai_message_list = map_messages_to_deepseek_format(openai_message_list) - - data = ChatCompletionRequest( - model=model, - messages=openai_message_list, - user=str(user_id), - max_completion_tokens=max_tokens, - temperature=llm_config.temperature, - ) - else: - data = ChatCompletionRequest( - model=model, - messages=openai_message_list, - tools=[Tool(type="function", function=f) for f in functions] if functions else None, - tool_choice=tool_choice, - user=str(user_id), - max_completion_tokens=max_tokens, - temperature=llm_config.temperature, - ) - else: - data = ChatCompletionRequest( - model=model, - messages=openai_message_list, - functions=functions, - function_call=function_call, - user=str(user_id), - max_completion_tokens=max_tokens, - temperature=llm_config.temperature, - ) - - return data - - -def convert_deepseek_response_to_chatcompletion( - response: ChatCompletionResponse, -) -> ChatCompletionResponse: - """ - Example response from DeepSeek: - - ChatCompletion( - id='bc7f7d25-82e4-443a-b217-dfad2b66da8e', - choices=[ - Choice( - finish_reason='stop', - index=0, - logprobs=None, - message=ChatCompletionMessage( - content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}', - refusal=None, - role='assistant', - audio=None, - function_call=None, - tool_calls=None, - reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.' - ) - ) - ], - created=1738266449, - model='deepseek-reasoner', - object='chat.completion', - service_tier=None, - system_fingerprint='fp_7e73fd9a08', - usage=CompletionUsage( - completion_tokens=111, - prompt_tokens=1270, - total_tokens=1381, - completion_tokens_details=CompletionTokensDetails( - accepted_prediction_tokens=None, - audio_tokens=None, - reasoning_tokens=72, - rejected_prediction_tokens=None - ), - prompt_tokens_details=PromptTokensDetails( - audio_tokens=None, - cached_tokens=1088 - ), - prompt_cache_hit_tokens=1088, - prompt_cache_miss_tokens=182 - ) - ) - """ - - def convert_dict_quotes(input_dict: dict): - """ - Convert a dictionary with single-quoted keys to double-quoted keys, - properly handling boolean values and nested structures. - - Args: - input_dict (dict): Input dictionary with single-quoted keys - - Returns: - str: JSON string with double-quoted keys - """ - # First convert the dictionary to a JSON string to handle booleans properly - json_str = json.dumps(input_dict) - - # Function to handle complex string replacements - def replace_quotes(match): - key = match.group(1) - # Escape any existing double quotes in the key - key = key.replace('"', '\\"') - return f'"{key}":' - - # Replace single-quoted keys with double-quoted keys - # This regex looks for single-quoted keys followed by a colon - def strip_json_block(text): - # Check if text starts with ```json or similar - if text.strip().startswith("```"): - # Split by \n to remove the first and last lines - lines = text.split("\n")[1:-1] - return "\n".join(lines) - return text - - pattern = r"'([^']*)':" - converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str)) - - # Parse the string back to ensure valid JSON format - try: - json.loads(converted_str) - return converted_str - except json.JSONDecodeError as e: - raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}") - - def extract_json_block(text): - # Find the first { - start = text.find("{") - if start == -1: - return text - - # Track nested braces to find the matching closing brace - brace_count = 0 - end = start - - for i in range(start, len(text)): - if text[i] == "{": - brace_count += 1 - elif text[i] == "}": - brace_count -= 1 - if brace_count == 0: - end = i + 1 - break - - return text[start:end] - - content = response.choices[0].message.content - try: - content_dict = json.loads(extract_json_block(content)) - - if type(content_dict["arguments"]) == str: - content_dict["arguments"] = json.loads(content_dict["arguments"]) - - tool_calls = [ - ToolCall( - id=get_tool_call_id(), - type="function", - function=Function( - name=content_dict["name"], - arguments=convert_dict_quotes(content_dict["arguments"]), - ), - ) - ] - except (json.JSONDecodeError, TypeError, KeyError) as e: - print(e) - tool_calls = response.choices[0].message.tool_calls - raise ValueError(f"Failed to create valid JSON {content}") - - # Move the "reasoning_content" into the "content" field - response.choices[0].message.content = response.choices[0].message.reasoning_content - response.choices[0].message.tool_calls = tool_calls - - # Remove the "reasoning_content" field - response.choices[0].message.reasoning_content = None - - return response diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py index 7c4d9d74..4e678f60 100644 --- a/letta/llm_api/deepseek_client.py +++ b/letta/llm_api/deepseek_client.py @@ -1,19 +1,315 @@ import json import os +import re +import warnings from typing import List, Optional from openai import AsyncOpenAI, AsyncStream, OpenAI from openai.types.chat.chat_completion import ChatCompletion from openai.types.chat.chat_completion_chunk import ChatCompletionChunk -from letta.llm_api.deepseek import convert_deepseek_response_to_chatcompletion, map_messages_to_deepseek_format from letta.llm_api.openai_client import OpenAIClient from letta.otel.tracing import trace_method from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message as PydanticMessage -from letta.schemas.openai.chat_completion_request import ChatMessage, cast_message_to_subtype +from letta.schemas.message import Message as _Message +from letta.schemas.openai.chat_completion_request import AssistantMessage, ChatCompletionRequest, ChatMessage +from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall +from letta.schemas.openai.chat_completion_request import Tool, ToolFunctionChoice, ToolMessage, UserMessage, cast_message_to_subtype from letta.schemas.openai.chat_completion_response import ChatCompletionResponse +from letta.schemas.openai.openai import Function, ToolCall from letta.settings import model_settings +from letta.utils import get_tool_call_id + + +def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage: + """ + Merge `ToolMessage` objects into the previous message. + """ + previous_message.content += ( + f" content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}" + ) + return previous_message + + +def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage: + """ + For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field. + """ + + if "tool_calls" in assistant_message.dict().keys(): + assistant_message.content = "".join( + [ + # f" name: {tool_call.function.name}, function: {tool_call.function}" + f" {json.dumps(tool_call.function.dict())} " + for tool_call in assistant_message.tool_calls + ] + ) + del assistant_message.tool_calls + return assistant_message + + +def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]: + """ + Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message. + Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling. + + This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message + at the end. + + """ + deepseek_messages = [] + for idx, message in enumerate(messages): + # First message is the system prompt, add it + if idx == 0 and message.role == "system": + deepseek_messages.append(message) + continue + if message.role == "user": + if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system": + # User message, add it + deepseek_messages.append(UserMessage(content=message.content)) + else: + # add to the content of the previous message + deepseek_messages[-1].content += message.content + elif message.role == "assistant": + if deepseek_messages[-1].role == "user": + # Assistant message, remove tool calls and add them to the content + deepseek_messages.append(handle_assistant_message(message)) + else: + # add to the content of the previous message + deepseek_messages[-1].content += message.content + elif message.role == "tool" and deepseek_messages[-1].role == "assistant": + # Tool message, add it to the last assistant message + merged_message = merge_tool_message(deepseek_messages[-1], message) + deepseek_messages[-1] = merged_message + else: + print(f"Skipping message: {message}") + + # This needs to end on a user message, add a dummy message if the last was assistant + if deepseek_messages[-1].role == "assistant": + deepseek_messages.append(UserMessage(content="")) + return deepseek_messages + + +def build_deepseek_chat_completions_request( + llm_config: LLMConfig, + messages: List[_Message], + user_id: Optional[str], + functions: Optional[list], + function_call: Optional[str], + use_tool_naming: bool, + max_tokens: Optional[int], +) -> ChatCompletionRequest: + # if functions and llm_config.put_inner_thoughts_in_kwargs: + # # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first + # # TODO(fix) + # inner_thoughts_desc = ( + # INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION + # ) + # functions = add_inner_thoughts_to_functions( + # functions=functions, + # inner_thoughts_key=INNER_THOUGHTS_KWARG, + # inner_thoughts_description=inner_thoughts_desc, + # ) + + openai_message_list = [cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=False)) for m in messages] + + if llm_config.model: + model = llm_config.model + else: + warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}") + model = None + if use_tool_naming: + if function_call is None: + tool_choice = None + elif function_call not in ["none", "auto", "required"]: + tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call)) + else: + tool_choice = function_call + + def add_functions_to_system_message(system_message: ChatMessage): + system_message.content += f" {''.join(json.dumps(f) for f in functions)} " + system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.' + + if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively + add_functions_to_system_message( + openai_message_list[0] + ) # Inject additional instructions to the system prompt with the available functions + + openai_message_list = map_messages_to_deepseek_format(openai_message_list) + + data = ChatCompletionRequest( + model=model, + messages=openai_message_list, + user=str(user_id), + max_completion_tokens=max_tokens, + temperature=llm_config.temperature, + ) + else: + data = ChatCompletionRequest( + model=model, + messages=openai_message_list, + tools=[Tool(type="function", function=f) for f in functions] if functions else None, + tool_choice=tool_choice, + user=str(user_id), + max_completion_tokens=max_tokens, + temperature=llm_config.temperature, + ) + else: + data = ChatCompletionRequest( + model=model, + messages=openai_message_list, + functions=functions, + function_call=function_call, + user=str(user_id), + max_completion_tokens=max_tokens, + temperature=llm_config.temperature, + ) + + return data + + +def convert_deepseek_response_to_chatcompletion( + response: ChatCompletionResponse, +) -> ChatCompletionResponse: + """ + Example response from DeepSeek (NOTE: as of 8/28/25, deepseek api does populate tool call in response): + + ChatCompletion( + id='bc7f7d25-82e4-443a-b217-dfad2b66da8e', + choices=[ + Choice( + finish_reason='stop', + index=0, + logprobs=None, + message=ChatCompletionMessage( + content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}', + refusal=None, + role='assistant', + audio=None, + function_call=None, + tool_calls=None, + reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.' + ) + ) + ], + created=1738266449, + model='deepseek-reasoner', + object='chat.completion', + service_tier=None, + system_fingerprint='fp_7e73fd9a08', + usage=CompletionUsage( + completion_tokens=111, + prompt_tokens=1270, + total_tokens=1381, + completion_tokens_details=CompletionTokensDetails( + accepted_prediction_tokens=None, + audio_tokens=None, + reasoning_tokens=72, + rejected_prediction_tokens=None + ), + prompt_tokens_details=PromptTokensDetails( + audio_tokens=None, + cached_tokens=1088 + ), + prompt_cache_hit_tokens=1088, + prompt_cache_miss_tokens=182 + ) + ) + """ + + def convert_dict_quotes(input_dict: dict): + """ + Convert a dictionary with single-quoted keys to double-quoted keys, + properly handling boolean values and nested structures. + + Args: + input_dict (dict): Input dictionary with single-quoted keys + + Returns: + str: JSON string with double-quoted keys + """ + # First convert the dictionary to a JSON string to handle booleans properly + json_str = json.dumps(input_dict) + + # Function to handle complex string replacements + def replace_quotes(match): + key = match.group(1) + # Escape any existing double quotes in the key + key = key.replace('"', '\\"') + return f'"{key}":' + + # Replace single-quoted keys with double-quoted keys + # This regex looks for single-quoted keys followed by a colon + def strip_json_block(text): + # Check if text starts with ```json or similar + if text.strip().startswith("```"): + # Split by \n to remove the first and last lines + lines = text.split("\n")[1:-1] + return "\n".join(lines) + return text + + pattern = r"'([^']*)':" + converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str)) + + # Parse the string back to ensure valid JSON format + try: + json.loads(converted_str) + return converted_str + except json.JSONDecodeError as e: + raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}") + + def extract_json_block(text): + # Find the first { + start = text.find("{") + if start == -1: + return text + + # Track nested braces to find the matching closing brace + brace_count = 0 + end = start + + for i in range(start, len(text)): + if text[i] == "{": + brace_count += 1 + elif text[i] == "}": + brace_count -= 1 + if brace_count == 0: + end = i + 1 + break + + return text[start:end] + + content = response.choices[0].message.content + try: + content_dict = json.loads(extract_json_block(content)) + + if type(content_dict["arguments"]) == str: + content_dict["arguments"] = json.loads(content_dict["arguments"]) + + tool_calls = [ + ToolCall( + id=get_tool_call_id(), + type="function", + function=Function( + name=content_dict["name"], + arguments=convert_dict_quotes(content_dict["arguments"]), + ), + ) + ] + except (json.JSONDecodeError, TypeError, KeyError) as e: + print(e) + tool_calls = response.choices[0].message.tool_calls + raise ValueError(f"Failed to create valid JSON {content}") + + # Move the "reasoning_content" into the "content" field + response.choices[0].message.content = response.choices[0].message.reasoning_content + response.choices[0].message.tool_calls = tool_calls + + # Remove the "reasoning_content" field + response.choices[0].message.reasoning_content = None + + return response class DeepseekClient(OpenAIClient): diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py index 3dd8cd74..3050bec6 100644 --- a/letta/llm_api/llm_api_tools.py +++ b/letta/llm_api/llm_api_tools.py @@ -7,7 +7,6 @@ import requests from letta.constants import CLI_WARNING_PREFIX from letta.errors import LettaConfigurationError, RateLimitExceededError -from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion from letta.llm_api.helpers import unpack_all_inner_thoughts_from_kwargs from letta.llm_api.openai import ( build_openai_chat_completions_request, @@ -245,54 +244,6 @@ def create( return response - elif llm_config.model_endpoint_type == "deepseek": - if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "": - # only is a problem if we are *not* using an openai proxy - raise LettaConfigurationError(message="DeepSeek key is missing from letta config file", missing_fields=["deepseek_api_key"]) - - data = build_deepseek_chat_completions_request( - llm_config, - messages, - user_id, - functions, - function_call, - use_tool_naming, - llm_config.max_tokens, - ) - if stream: # Client requested token streaming - data.stream = True - assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance( - stream_interface, AgentRefreshStreamingInterface - ), type(stream_interface) - response = openai_chat_completions_process_stream( - url=llm_config.model_endpoint, - api_key=model_settings.deepseek_api_key, - chat_completion_request=data, - stream_interface=stream_interface, - name=name, - # TODO should we toggle for R1 vs V3? - expect_reasoning_content=True, - ) - else: # Client did not request token streaming (expect a blocking backend response) - data.stream = False - if isinstance(stream_interface, AgentChunkStreamingInterface): - stream_interface.stream_start() - try: - response = openai_chat_completions_request( - url=llm_config.model_endpoint, - api_key=model_settings.deepseek_api_key, - chat_completion_request=data, - ) - finally: - if isinstance(stream_interface, AgentChunkStreamingInterface): - stream_interface.stream_end() - """ - if llm_config.put_inner_thoughts_in_kwargs: - response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG) - """ - response = convert_deepseek_response_to_chatcompletion(response) - return response - # local model else: if stream: