From d1536df6f6485c10fb491f1c8884fe64e9035d47 Mon Sep 17 00:00:00 2001 From: Devansh Jain <31609257+devanshrj@users.noreply.github.com> Date: Mon, 8 Dec 2025 17:25:15 -0800 Subject: [PATCH] chore: Update deepseek client for v3.2 models (#6556) * support for v3.2 models * streaming + context window fix * fix for no assitant text from deepseek --- letta/adapters/simple_llm_stream_adapter.py | 2 +- .../interfaces/openai_streaming_interface.py | 9 +- letta/llm_api/deepseek_client.py | 365 ++---------------- .../schemas/openai/chat_completion_request.py | 4 + letta/schemas/providers/deepseek.py | 4 +- .../llm_model_configs/deepseek-reasoner.json | 2 +- 6 files changed, 54 insertions(+), 332 deletions(-) diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py index 3089f94c..91d5e211 100644 --- a/letta/adapters/simple_llm_stream_adapter.py +++ b/letta/adapters/simple_llm_stream_adapter.py @@ -75,7 +75,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): run_id=self.run_id, step_id=step_id, ) - elif self.llm_config.model_endpoint_type == ProviderType.openai: + elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.deepseek]: # Decide interface based on payload shape use_responses = "input" in request_data and "messages" not in request_data # No support for Responses API proxy diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py index 65fdfd39..aa7dac9a 100644 --- a/letta/interfaces/openai_streaming_interface.py +++ b/letta/interfaces/openai_streaming_interface.py @@ -625,7 +625,14 @@ class SimpleOpenAIStreamingInterface: if reasoning_content: combined_reasoning = "".join(reasoning_content) - merged_messages.append(ReasoningContent(is_native=True, reasoning=combined_reasoning, signature=None)) + # Only reroute reasoning into content for DeepSeek streams when no assistant text was emitted + # and no tool calls were produced (i.e., a reasoning-only final answer). + is_deepseek = bool(self.model and self.model.startswith("deepseek")) + produced_tool_calls = bool(self._tool_calls_acc) + if is_deepseek and not concat_content_parts and not produced_tool_calls: + concat_content_parts.append(combined_reasoning) + else: + merged_messages.append(ReasoningContent(is_native=True, reasoning=combined_reasoning, signature=None)) if concat_content_parts: merged_messages.append(TextContent(text="".join(concat_content_parts))) diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py index e5b2844e..0703445d 100644 --- a/letta/llm_api/deepseek_client.py +++ b/letta/llm_api/deepseek_client.py @@ -1,6 +1,4 @@ -import json import os -import re from typing import List, Optional from openai import AsyncOpenAI, AsyncStream, OpenAI @@ -13,315 +11,29 @@ from letta.otel.tracing import trace_method from letta.schemas.enums import AgentType from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message as PydanticMessage +from letta.schemas.openai.chat_completion_response import ChatCompletionResponse +from letta.settings import model_settings logger = get_logger(__name__) -from letta.schemas.openai.chat_completion_request import ( - AssistantMessage, - ChatCompletionRequest, - ChatMessage, - FunctionCall as ToolFunctionChoiceFunctionCall, - Tool, - ToolFunctionChoice, - ToolMessage, - UserMessage, - cast_message_to_subtype, -) -from letta.schemas.openai.chat_completion_response import ChatCompletionResponse -from letta.schemas.openai.openai import Function, ToolCall -from letta.settings import model_settings -from letta.utils import get_tool_call_id -def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage: +def _strip_reasoning_content_for_new_user_turn(messages: List[dict]) -> List[dict]: """ - Merge `ToolMessage` objects into the previous message. + DeepSeek thinking mode wants reasoning_content during the active turn (e.g., before tool calls finish), + but it should be dropped once a new user question begins. """ - previous_message.content += ( - f" content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}" - ) - return previous_message + if not messages or messages[-1].get("role") != "user": + return messages - -def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage: - """ - For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field. - """ - - if "tool_calls" in assistant_message.dict().keys(): - assistant_message.content = "".join( - [ - # f" name: {tool_call.function.name}, function: {tool_call.function}" - f" {json.dumps(tool_call.function.dict())} " - for tool_call in assistant_message.tool_calls - ] - ) - del assistant_message.tool_calls - return assistant_message - - -def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List["_Message"]: - """ - Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message. - Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling. - - This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message - at the end. - - """ - deepseek_messages = [] - for idx, message in enumerate(messages): - # First message is the system prompt, add it - if idx == 0 and message.role == "system": - deepseek_messages.append(message) - continue - if message.role == "user": - if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system": - # User message, add it - deepseek_messages.append(UserMessage(content=message.content)) - else: - # add to the content of the previous message - deepseek_messages[-1].content += message.content - elif message.role == "assistant": - if deepseek_messages[-1].role == "user": - # Assistant message, remove tool calls and add them to the content - deepseek_messages.append(handle_assistant_message(message)) - else: - # add to the content of the previous message - deepseek_messages[-1].content += message.content - elif message.role == "tool" and deepseek_messages[-1].role == "assistant": - # Tool message, add it to the last assistant message - merged_message = merge_tool_message(deepseek_messages[-1], message) - deepseek_messages[-1] = merged_message - else: - logger.warning(f"Skipping message: {message}") - - # This needs to end on a user message, add a dummy message if the last was assistant - if deepseek_messages[-1].role == "assistant": - deepseek_messages.append(UserMessage(content="")) - return deepseek_messages - - -def build_deepseek_chat_completions_request( - llm_config: LLMConfig, - messages: List["_Message"], - user_id: Optional[str], - functions: Optional[list], - function_call: Optional[str], - use_tool_naming: bool, - max_tokens: Optional[int], -) -> ChatCompletionRequest: - # if functions and llm_config.put_inner_thoughts_in_kwargs: - # # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first - # # TODO(fix) - # inner_thoughts_desc = ( - # INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION - # ) - # functions = add_inner_thoughts_to_functions( - # functions=functions, - # inner_thoughts_key=INNER_THOUGHTS_KWARG, - # inner_thoughts_description=inner_thoughts_desc, - # ) - - openai_message_list = [ - cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False) - ] - - if llm_config.model: - model = llm_config.model - else: - logger.warning(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}") - model = None - if use_tool_naming: - if function_call is None: - tool_choice = None - elif function_call not in ["none", "auto", "required"]: - tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call)) - else: - tool_choice = function_call - - def add_functions_to_system_message(system_message: ChatMessage): - system_message.content += f" {''.join(json.dumps(f) for f in functions)} " - system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.' - - if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively - add_functions_to_system_message( - openai_message_list[0] - ) # Inject additional instructions to the system prompt with the available functions - - openai_message_list = map_messages_to_deepseek_format(openai_message_list) - - data = ChatCompletionRequest( - model=model, - messages=openai_message_list, - user=str(user_id), - max_completion_tokens=max_tokens, - temperature=llm_config.temperature, - ) - else: - data = ChatCompletionRequest( - model=model, - messages=openai_message_list, - tools=[Tool(type="function", function=f) for f in functions] if functions else None, - tool_choice=tool_choice, - user=str(user_id), - max_completion_tokens=max_tokens, - temperature=llm_config.temperature, - ) - else: - data = ChatCompletionRequest( - model=model, - messages=openai_message_list, - functions=functions, - function_call=function_call, - user=str(user_id), - max_completion_tokens=max_tokens, - temperature=llm_config.temperature, - ) - - return data - - -def convert_deepseek_response_to_chatcompletion( - response: ChatCompletionResponse, -) -> ChatCompletionResponse: - """ - Example response from DeepSeek (NOTE: as of 8/28/25, deepseek api does populate tool call in response): - - ChatCompletion( - id='bc7f7d25-82e4-443a-b217-dfad2b66da8e', - choices=[ - Choice( - finish_reason='stop', - index=0, - logprobs=None, - message=ChatCompletionMessage( - content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}', - refusal=None, - role='assistant', - audio=None, - function_call=None, - tool_calls=None, - reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.' - ) - ) - ], - created=1738266449, - model='deepseek-reasoner', - object='chat.completion', - service_tier=None, - system_fingerprint='fp_7e73fd9a08', - usage=CompletionUsage( - completion_tokens=111, - prompt_tokens=1270, - total_tokens=1381, - completion_tokens_details=CompletionTokensDetails( - accepted_prediction_tokens=None, - audio_tokens=None, - reasoning_tokens=72, - rejected_prediction_tokens=None - ), - prompt_tokens_details=PromptTokensDetails( - audio_tokens=None, - cached_tokens=1088 - ), - prompt_cache_hit_tokens=1088, - prompt_cache_miss_tokens=182 - ) - ) - """ - - def convert_dict_quotes(input_dict: dict): - """ - Convert a dictionary with single-quoted keys to double-quoted keys, - properly handling boolean values and nested structures. - - Args: - input_dict (dict): Input dictionary with single-quoted keys - - Returns: - str: JSON string with double-quoted keys - """ - # First convert the dictionary to a JSON string to handle booleans properly - json_str = json.dumps(input_dict) - - # Function to handle complex string replacements - def replace_quotes(match): - key = match.group(1) - # Escape any existing double quotes in the key - key = key.replace('"', '\\"') - return f'"{key}":' - - # Replace single-quoted keys with double-quoted keys - # This regex looks for single-quoted keys followed by a colon - def strip_json_block(text): - # Check if text starts with ```json or similar - if text.strip().startswith("```"): - # Split by \n to remove the first and last lines - lines = text.split("\n")[1:-1] - return "\n".join(lines) - return text - - pattern = r"'([^']*)':" - converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str)) - - # Parse the string back to ensure valid JSON format - try: - json.loads(converted_str) - return converted_str - except json.JSONDecodeError as e: - raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}") - - def extract_json_block(text): - # Find the first { - start = text.find("{") - if start == -1: - return text - - # Track nested braces to find the matching closing brace - brace_count = 0 - end = start - - for i in range(start, len(text)): - if text[i] == "{": - brace_count += 1 - elif text[i] == "}": - brace_count -= 1 - if brace_count == 0: - end = i + 1 - break - - return text[start:end] - - content = response.choices[0].message.content - try: - content_dict = json.loads(extract_json_block(content)) - - if type(content_dict["arguments"]) == str: - content_dict["arguments"] = json.loads(content_dict["arguments"]) - - tool_calls = [ - ToolCall( - id=get_tool_call_id(), - type="function", - function=Function( - name=content_dict["name"], - arguments=convert_dict_quotes(content_dict["arguments"]), - ), - ) - ] - except (json.JSONDecodeError, TypeError, KeyError) as e: - logger.error(f"Failed to parse DeepSeek response: {e}") - tool_calls = response.choices[0].message.tool_calls - raise ValueError(f"Failed to create valid JSON {content}") - - # Move the "reasoning_content" into the "content" field - response.choices[0].message.content = response.choices[0].message.reasoning_content - response.choices[0].message.tool_calls = tool_calls - - # Remove the "reasoning_content" field - response.choices[0].message.reasoning_content = None - - return response + cleaned: List[dict] = [] + for msg in messages: + if msg.get("role") == "assistant": + msg = dict(msg) + msg.pop("reasoning_content", None) + msg.pop("reasoning_content_signature", None) + msg.pop("redacted_reasoning_content", None) + cleaned.append(msg) + return cleaned class DeepseekClient(OpenAIClient): @@ -342,27 +54,30 @@ class DeepseekClient(OpenAIClient): requires_subsequent_tool_call: bool = False, tool_return_truncation_chars: Optional[int] = None, ) -> dict: - # Override put_inner_thoughts_in_kwargs to False for DeepSeek + # DeepSeek thinking mode surfaces reasoning_content; keep it for active turns, drop for new user turns. llm_config.put_inner_thoughts_in_kwargs = False - data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call) + data = super().build_request_data( + agent_type, + messages, + llm_config, + tools, + force_tool_call, + requires_subsequent_tool_call, + tool_return_truncation_chars, + ) - def add_functions_to_system_message(system_message: ChatMessage): - system_message.content += f" {''.join(json.dumps(f) for f in tools)} " - system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.' + if "messages" in data: + for msg in data["messages"]: + if msg.get("role") == "assistant" and msg.get("tool_calls") and msg.get("reasoning_content") is None: + # DeepSeek requires reasoning_content whenever tool_calls are present in thinking mode. + msg["reasoning_content"] = "" + data["messages"] = _strip_reasoning_content_for_new_user_turn(data["messages"]) - openai_message_list = [ - cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False) - ] - - if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively - add_functions_to_system_message( - openai_message_list[0] - ) # Inject additional instructions to the system prompt with the available functions - - openai_message_list = map_messages_to_deepseek_format(openai_message_list) - - data["messages"] = [m.dict() for m in openai_message_list] + # DeepSeek reasoning models ignore/ reject some sampling params; avoid sending them. + if llm_config.model and "reasoner" in llm_config.model: + for unsupported in ("temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"): + data.pop(unsupported, None) return data @@ -408,10 +123,6 @@ class DeepseekClient(OpenAIClient): llm_config: LLMConfig, ) -> ChatCompletionResponse: """ - Converts raw OpenAI response dict into the ChatCompletionResponse Pydantic model. - Handles potential extraction of inner thoughts if they were added via kwargs. + Use native tool-calling and reasoning_content in DeepSeek responses; no custom parsing needed. """ - response = ChatCompletionResponse(**response_data) - if response.choices[0].message.tool_calls: - return await super().convert_response_to_chat_completion(response_data, input_messages, llm_config) - return convert_deepseek_response_to_chatcompletion(response) + return await super().convert_response_to_chat_completion(response_data, input_messages, llm_config) diff --git a/letta/schemas/openai/chat_completion_request.py b/letta/schemas/openai/chat_completion_request.py index da1c2632..2e755634 100644 --- a/letta/schemas/openai/chat_completion_request.py +++ b/letta/schemas/openai/chat_completion_request.py @@ -31,6 +31,10 @@ class AssistantMessage(BaseModel): role: str = "assistant" name: Optional[str] = None tool_calls: Optional[List[ToolCall]] = None + reasoning_content: Optional[str] = None + reasoning_content_signature: Optional[str] = None + redacted_reasoning_content: Optional[str] = None + omitted_reasoning_content: Optional[bool] = None class ToolMessage(BaseModel): diff --git a/letta/schemas/providers/deepseek.py b/letta/schemas/providers/deepseek.py index ac0144e3..be2ef0b1 100644 --- a/letta/schemas/providers/deepseek.py +++ b/letta/schemas/providers/deepseek.py @@ -25,9 +25,9 @@ class DeepSeekProvider(OpenAIProvider): # DeepSeek doesn't return context window in the model listing, # so these are hardcoded from their website if model_name == "deepseek-reasoner": - return 64000 + return 128000 elif model_name == "deepseek-chat": - return 64000 + return 128000 else: return None diff --git a/tests/configs/llm_model_configs/deepseek-reasoner.json b/tests/configs/llm_model_configs/deepseek-reasoner.json index 99dac148..db9ed806 100644 --- a/tests/configs/llm_model_configs/deepseek-reasoner.json +++ b/tests/configs/llm_model_configs/deepseek-reasoner.json @@ -2,6 +2,6 @@ "model": "deepseek-reasoner", "model_endpoint_type": "deepseek", "model_endpoint": "https://api.deepseek.com/v1", - "context_window": 64000, + "context_window": 128000, "put_inner_thoughts_in_kwargs": false }