From d1536df6f6485c10fb491f1c8884fe64e9035d47 Mon Sep 17 00:00:00 2001
From: Devansh Jain <31609257+devanshrj@users.noreply.github.com>
Date: Mon, 8 Dec 2025 17:25:15 -0800
Subject: [PATCH] chore: Update deepseek client for v3.2 models (#6556)

* support for v3.2 models

* streaming + context window fix

* fix for no assitant text from deepseek
---
 letta/adapters/simple_llm_stream_adapter.py   |   2 +-
 .../interfaces/openai_streaming_interface.py  |   9 +-
 letta/llm_api/deepseek_client.py              | 365 ++----------------
 .../schemas/openai/chat_completion_request.py |   4 +
 letta/schemas/providers/deepseek.py           |   4 +-
 .../llm_model_configs/deepseek-reasoner.json  |   2 +-
 6 files changed, 54 insertions(+), 332 deletions(-)

diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py
index 3089f94c..91d5e211 100644
--- a/letta/adapters/simple_llm_stream_adapter.py
+++ b/letta/adapters/simple_llm_stream_adapter.py
@@ -75,7 +75,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
                 run_id=self.run_id,
                 step_id=step_id,
             )
-        elif self.llm_config.model_endpoint_type == ProviderType.openai:
+        elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.deepseek]:
             # Decide interface based on payload shape
             use_responses = "input" in request_data and "messages" not in request_data
             # No support for Responses API proxy
diff --git a/letta/interfaces/openai_streaming_interface.py b/letta/interfaces/openai_streaming_interface.py
index 65fdfd39..aa7dac9a 100644
--- a/letta/interfaces/openai_streaming_interface.py
+++ b/letta/interfaces/openai_streaming_interface.py
@@ -625,7 +625,14 @@ class SimpleOpenAIStreamingInterface:
 
         if reasoning_content:
             combined_reasoning = "".join(reasoning_content)
-            merged_messages.append(ReasoningContent(is_native=True, reasoning=combined_reasoning, signature=None))
+            # Only reroute reasoning into content for DeepSeek streams when no assistant text was emitted
+            # and no tool calls were produced (i.e., a reasoning-only final answer).
+            is_deepseek = bool(self.model and self.model.startswith("deepseek"))
+            produced_tool_calls = bool(self._tool_calls_acc)
+            if is_deepseek and not concat_content_parts and not produced_tool_calls:
+                concat_content_parts.append(combined_reasoning)
+            else:
+                merged_messages.append(ReasoningContent(is_native=True, reasoning=combined_reasoning, signature=None))
 
         if concat_content_parts:
             merged_messages.append(TextContent(text="".join(concat_content_parts)))
diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py
index e5b2844e..0703445d 100644
--- a/letta/llm_api/deepseek_client.py
+++ b/letta/llm_api/deepseek_client.py
@@ -1,6 +1,4 @@
-import json
 import os
-import re
 from typing import List, Optional
 
 from openai import AsyncOpenAI, AsyncStream, OpenAI
@@ -13,315 +11,29 @@ from letta.otel.tracing import trace_method
 from letta.schemas.enums import AgentType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.settings import model_settings
 
 logger = get_logger(__name__)
-from letta.schemas.openai.chat_completion_request import (
-    AssistantMessage,
-    ChatCompletionRequest,
-    ChatMessage,
-    FunctionCall as ToolFunctionChoiceFunctionCall,
-    Tool,
-    ToolFunctionChoice,
-    ToolMessage,
-    UserMessage,
-    cast_message_to_subtype,
-)
-from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
-from letta.schemas.openai.openai import Function, ToolCall
-from letta.settings import model_settings
-from letta.utils import get_tool_call_id
 
 
-def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage:
+def _strip_reasoning_content_for_new_user_turn(messages: List[dict]) -> List[dict]:
     """
-    Merge `ToolMessage` objects into the previous message.
+    DeepSeek thinking mode wants reasoning_content during the active turn (e.g., before tool calls finish),
+    but it should be dropped once a new user question begins.
     """
-    previous_message.content += (
-        f"<ToolMessage> content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}</ToolMessage>"
-    )
-    return previous_message
+    if not messages or messages[-1].get("role") != "user":
+        return messages
 
-
-def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage:
-    """
-    For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field.
-    """
-
-    if "tool_calls" in assistant_message.dict().keys():
-        assistant_message.content = "".join(
-            [
-                # f"<ToolCall> name: {tool_call.function.name}, function: {tool_call.function}</ToolCall>"
-                f"<ToolCall> {json.dumps(tool_call.function.dict())} </ToolCall>"
-                for tool_call in assistant_message.tool_calls
-            ]
-        )
-        del assistant_message.tool_calls
-    return assistant_message
-
-
-def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List["_Message"]:
-    """
-    Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
-    Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
-
-    This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message
-    at the end.
-
-    """
-    deepseek_messages = []
-    for idx, message in enumerate(messages):
-        # First message is the system prompt, add it
-        if idx == 0 and message.role == "system":
-            deepseek_messages.append(message)
-            continue
-        if message.role == "user":
-            if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system":
-                # User message, add it
-                deepseek_messages.append(UserMessage(content=message.content))
-            else:
-                # add to the content of the previous message
-                deepseek_messages[-1].content += message.content
-        elif message.role == "assistant":
-            if deepseek_messages[-1].role == "user":
-                # Assistant message, remove tool calls and add them to the content
-                deepseek_messages.append(handle_assistant_message(message))
-            else:
-                # add to the content of the previous message
-                deepseek_messages[-1].content += message.content
-        elif message.role == "tool" and deepseek_messages[-1].role == "assistant":
-            # Tool message, add it to the last assistant message
-            merged_message = merge_tool_message(deepseek_messages[-1], message)
-            deepseek_messages[-1] = merged_message
-        else:
-            logger.warning(f"Skipping message: {message}")
-
-    # This needs to end on a user message, add a dummy message if the last was assistant
-    if deepseek_messages[-1].role == "assistant":
-        deepseek_messages.append(UserMessage(content=""))
-    return deepseek_messages
-
-
-def build_deepseek_chat_completions_request(
-    llm_config: LLMConfig,
-    messages: List["_Message"],
-    user_id: Optional[str],
-    functions: Optional[list],
-    function_call: Optional[str],
-    use_tool_naming: bool,
-    max_tokens: Optional[int],
-) -> ChatCompletionRequest:
-    # if functions and llm_config.put_inner_thoughts_in_kwargs:
-    #     # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
-    #     # TODO(fix)
-    #     inner_thoughts_desc = (
-    #         INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
-    #     )
-    #     functions = add_inner_thoughts_to_functions(
-    #         functions=functions,
-    #         inner_thoughts_key=INNER_THOUGHTS_KWARG,
-    #         inner_thoughts_description=inner_thoughts_desc,
-    #     )
-
-    openai_message_list = [
-        cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False)
-    ]
-
-    if llm_config.model:
-        model = llm_config.model
-    else:
-        logger.warning(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
-        model = None
-    if use_tool_naming:
-        if function_call is None:
-            tool_choice = None
-        elif function_call not in ["none", "auto", "required"]:
-            tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
-        else:
-            tool_choice = function_call
-
-        def add_functions_to_system_message(system_message: ChatMessage):
-            system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
-            system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
-
-        if llm_config.model == "deepseek-reasoner":  # R1 currently doesn't support function calling natively
-            add_functions_to_system_message(
-                openai_message_list[0]
-            )  # Inject additional instructions to the system prompt with the available functions
-
-            openai_message_list = map_messages_to_deepseek_format(openai_message_list)
-
-            data = ChatCompletionRequest(
-                model=model,
-                messages=openai_message_list,
-                user=str(user_id),
-                max_completion_tokens=max_tokens,
-                temperature=llm_config.temperature,
-            )
-        else:
-            data = ChatCompletionRequest(
-                model=model,
-                messages=openai_message_list,
-                tools=[Tool(type="function", function=f) for f in functions] if functions else None,
-                tool_choice=tool_choice,
-                user=str(user_id),
-                max_completion_tokens=max_tokens,
-                temperature=llm_config.temperature,
-            )
-    else:
-        data = ChatCompletionRequest(
-            model=model,
-            messages=openai_message_list,
-            functions=functions,
-            function_call=function_call,
-            user=str(user_id),
-            max_completion_tokens=max_tokens,
-            temperature=llm_config.temperature,
-        )
-
-    return data
-
-
-def convert_deepseek_response_to_chatcompletion(
-    response: ChatCompletionResponse,
-) -> ChatCompletionResponse:
-    """
-        Example response from DeepSeek (NOTE: as of 8/28/25, deepseek api does populate tool call in response):
-
-        ChatCompletion(
-        id='bc7f7d25-82e4-443a-b217-dfad2b66da8e',
-        choices=[
-            Choice(
-                finish_reason='stop',
-                index=0,
-                logprobs=None,
-                message=ChatCompletionMessage(
-                    content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}',
-                    refusal=None,
-                    role='assistant',
-                    audio=None,
-                    function_call=None,
-                    tool_calls=None,
-                    reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.'
-                )
-            )
-        ],
-        created=1738266449,
-        model='deepseek-reasoner',
-        object='chat.completion',
-        service_tier=None,
-        system_fingerprint='fp_7e73fd9a08',
-        usage=CompletionUsage(
-            completion_tokens=111,
-            prompt_tokens=1270,
-            total_tokens=1381,
-            completion_tokens_details=CompletionTokensDetails(
-                accepted_prediction_tokens=None,
-                audio_tokens=None,
-                reasoning_tokens=72,
-                rejected_prediction_tokens=None
-            ),
-            prompt_tokens_details=PromptTokensDetails(
-                audio_tokens=None,
-                cached_tokens=1088
-            ),
-            prompt_cache_hit_tokens=1088,
-            prompt_cache_miss_tokens=182
-        )
-    )
-    """
-
-    def convert_dict_quotes(input_dict: dict):
-        """
-        Convert a dictionary with single-quoted keys to double-quoted keys,
-        properly handling boolean values and nested structures.
-
-        Args:
-            input_dict (dict): Input dictionary with single-quoted keys
-
-        Returns:
-            str: JSON string with double-quoted keys
-        """
-        # First convert the dictionary to a JSON string to handle booleans properly
-        json_str = json.dumps(input_dict)
-
-        # Function to handle complex string replacements
-        def replace_quotes(match):
-            key = match.group(1)
-            # Escape any existing double quotes in the key
-            key = key.replace('"', '\\"')
-            return f'"{key}":'
-
-        # Replace single-quoted keys with double-quoted keys
-        # This regex looks for single-quoted keys followed by a colon
-        def strip_json_block(text):
-            # Check if text starts with ```json or similar
-            if text.strip().startswith("```"):
-                # Split by \n to remove the first and last lines
-                lines = text.split("\n")[1:-1]
-                return "\n".join(lines)
-            return text
-
-        pattern = r"'([^']*)':"
-        converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str))
-
-        # Parse the string back to ensure valid JSON format
-        try:
-            json.loads(converted_str)
-            return converted_str
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}")
-
-    def extract_json_block(text):
-        # Find the first {
-        start = text.find("{")
-        if start == -1:
-            return text
-
-        # Track nested braces to find the matching closing brace
-        brace_count = 0
-        end = start
-
-        for i in range(start, len(text)):
-            if text[i] == "{":
-                brace_count += 1
-            elif text[i] == "}":
-                brace_count -= 1
-                if brace_count == 0:
-                    end = i + 1
-                    break
-
-        return text[start:end]
-
-    content = response.choices[0].message.content
-    try:
-        content_dict = json.loads(extract_json_block(content))
-
-        if type(content_dict["arguments"]) == str:
-            content_dict["arguments"] = json.loads(content_dict["arguments"])
-
-        tool_calls = [
-            ToolCall(
-                id=get_tool_call_id(),
-                type="function",
-                function=Function(
-                    name=content_dict["name"],
-                    arguments=convert_dict_quotes(content_dict["arguments"]),
-                ),
-            )
-        ]
-    except (json.JSONDecodeError, TypeError, KeyError) as e:
-        logger.error(f"Failed to parse DeepSeek response: {e}")
-        tool_calls = response.choices[0].message.tool_calls
-        raise ValueError(f"Failed to create valid JSON {content}")
-
-    # Move the "reasoning_content" into the "content" field
-    response.choices[0].message.content = response.choices[0].message.reasoning_content
-    response.choices[0].message.tool_calls = tool_calls
-
-    # Remove the "reasoning_content" field
-    response.choices[0].message.reasoning_content = None
-
-    return response
+    cleaned: List[dict] = []
+    for msg in messages:
+        if msg.get("role") == "assistant":
+            msg = dict(msg)
+            msg.pop("reasoning_content", None)
+            msg.pop("reasoning_content_signature", None)
+            msg.pop("redacted_reasoning_content", None)
+        cleaned.append(msg)
+    return cleaned
 
 
 class DeepseekClient(OpenAIClient):
@@ -342,27 +54,30 @@ class DeepseekClient(OpenAIClient):
         requires_subsequent_tool_call: bool = False,
         tool_return_truncation_chars: Optional[int] = None,
     ) -> dict:
-        # Override put_inner_thoughts_in_kwargs to False for DeepSeek
+        # DeepSeek thinking mode surfaces reasoning_content; keep it for active turns, drop for new user turns.
         llm_config.put_inner_thoughts_in_kwargs = False
 
-        data = super().build_request_data(agent_type, messages, llm_config, tools, force_tool_call, requires_subsequent_tool_call)
+        data = super().build_request_data(
+            agent_type,
+            messages,
+            llm_config,
+            tools,
+            force_tool_call,
+            requires_subsequent_tool_call,
+            tool_return_truncation_chars,
+        )
 
-        def add_functions_to_system_message(system_message: ChatMessage):
-            system_message.content += f"<available functions> {''.join(json.dumps(f) for f in tools)} </available functions>"
-            system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
+        if "messages" in data:
+            for msg in data["messages"]:
+                if msg.get("role") == "assistant" and msg.get("tool_calls") and msg.get("reasoning_content") is None:
+                    # DeepSeek requires reasoning_content whenever tool_calls are present in thinking mode.
+                    msg["reasoning_content"] = ""
+            data["messages"] = _strip_reasoning_content_for_new_user_turn(data["messages"])
 
-        openai_message_list = [
-            cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False)
-        ]
-
-        if llm_config.model == "deepseek-reasoner":  # R1 currently doesn't support function calling natively
-            add_functions_to_system_message(
-                openai_message_list[0]
-            )  # Inject additional instructions to the system prompt with the available functions
-
-            openai_message_list = map_messages_to_deepseek_format(openai_message_list)
-
-        data["messages"] = [m.dict() for m in openai_message_list]
+        # DeepSeek reasoning models ignore/ reject some sampling params; avoid sending them.
+        if llm_config.model and "reasoner" in llm_config.model:
+            for unsupported in ("temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs"):
+                data.pop(unsupported, None)
 
         return data
 
@@ -408,10 +123,6 @@ class DeepseekClient(OpenAIClient):
         llm_config: LLMConfig,
     ) -> ChatCompletionResponse:
         """
-        Converts raw OpenAI response dict into the ChatCompletionResponse Pydantic model.
-        Handles potential extraction of inner thoughts if they were added via kwargs.
+        Use native tool-calling and reasoning_content in DeepSeek responses; no custom parsing needed.
         """
-        response = ChatCompletionResponse(**response_data)
-        if response.choices[0].message.tool_calls:
-            return await super().convert_response_to_chat_completion(response_data, input_messages, llm_config)
-        return convert_deepseek_response_to_chatcompletion(response)
+        return await super().convert_response_to_chat_completion(response_data, input_messages, llm_config)
diff --git a/letta/schemas/openai/chat_completion_request.py b/letta/schemas/openai/chat_completion_request.py
index da1c2632..2e755634 100644
--- a/letta/schemas/openai/chat_completion_request.py
+++ b/letta/schemas/openai/chat_completion_request.py
@@ -31,6 +31,10 @@ class AssistantMessage(BaseModel):
     role: str = "assistant"
     name: Optional[str] = None
     tool_calls: Optional[List[ToolCall]] = None
+    reasoning_content: Optional[str] = None
+    reasoning_content_signature: Optional[str] = None
+    redacted_reasoning_content: Optional[str] = None
+    omitted_reasoning_content: Optional[bool] = None
 
 
 class ToolMessage(BaseModel):
diff --git a/letta/schemas/providers/deepseek.py b/letta/schemas/providers/deepseek.py
index ac0144e3..be2ef0b1 100644
--- a/letta/schemas/providers/deepseek.py
+++ b/letta/schemas/providers/deepseek.py
@@ -25,9 +25,9 @@ class DeepSeekProvider(OpenAIProvider):
         # DeepSeek doesn't return context window in the model listing,
         # so these are hardcoded from their website
         if model_name == "deepseek-reasoner":
-            return 64000
+            return 128000
         elif model_name == "deepseek-chat":
-            return 64000
+            return 128000
         else:
             return None
 
diff --git a/tests/configs/llm_model_configs/deepseek-reasoner.json b/tests/configs/llm_model_configs/deepseek-reasoner.json
index 99dac148..db9ed806 100644
--- a/tests/configs/llm_model_configs/deepseek-reasoner.json
+++ b/tests/configs/llm_model_configs/deepseek-reasoner.json
@@ -2,6 +2,6 @@
   "model": "deepseek-reasoner",
   "model_endpoint_type": "deepseek",
   "model_endpoint": "https://api.deepseek.com/v1",
-  "context_window": 64000,
+  "context_window": 128000,
   "put_inner_thoughts_in_kwargs": false
 }