feat: support deepseek models (#821)

Co-authored-by: Charles Packer <packercharles@gmail.com> Co-authored-by: Sarah Wooders <sarahwooders@gmail.com> Co-authored-by: Shubham Naik <shub@memgpt.ai> Co-authored-by: Shubham Naik <shub@letta.com>
2025-02-18 15:28:01 -08:00
parent 6d49dc1ac5
commit 98f0062416
18 changed files with 709 additions and 20 deletions
--- a/letta/local_llm/chat_completion_proxy.py
+++ b/letta/local_llm/chat_completion_proxy.py
@@ -14,7 +14,7 @@ from letta.local_llm.grammars.gbnf_grammar_generator import create_dynamic_model
 from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
 from letta.local_llm.llamacpp.api import get_llamacpp_completion
 from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
-from letta.local_llm.lmstudio.api import get_lmstudio_completion
+from letta.local_llm.lmstudio.api import get_lmstudio_completion, get_lmstudio_completion_chatcompletions
 from letta.local_llm.ollama.api import get_ollama_completion
 from letta.local_llm.utils import count_tokens, get_available_wrappers
 from letta.local_llm.vllm.api import get_vllm_completion
@@ -141,11 +141,24 @@ def get_chat_completion(
            f"Failed to convert ChatCompletion messages into prompt string with wrapper {str(llm_wrapper)} - error: {str(e)}"
        )

+    # get the schema for the model
+
+    """
+    if functions_python is not None:
+        model_schema = generate_schema(functions)
+    else:
+        model_schema = None
+    """
+
+    # Run the LLM
    try:
+        result_reasoning = None
        if endpoint_type == "webui":
            result, usage = get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
        elif endpoint_type == "webui-legacy":
            result, usage = get_webui_completion_legacy(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
+        elif endpoint_type == "lmstudio-chatcompletions":
+            result, usage, result_reasoning = get_lmstudio_completion_chatcompletions(endpoint, auth_type, auth_key, model, messages)
        elif endpoint_type == "lmstudio":
            result, usage = get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions")
        elif endpoint_type == "lmstudio-legacy":
@@ -214,7 +227,7 @@ def get_chat_completion(
                index=0,
                message=Message(
                    role=chat_completion_result["role"],
-                    content=chat_completion_result["content"],
+                    content=result_reasoning if result_reasoning is not None else chat_completion_result["content"],
                    tool_calls=(
                        [ToolCall(id=get_tool_call_id(), type="function", function=chat_completion_result["function_call"])]
                        if "function_call" in chat_completion_result