feat: support deepseek models (#821)
Co-authored-by: Charles Packer <packercharles@gmail.com> Co-authored-by: Sarah Wooders <sarahwooders@gmail.com> Co-authored-by: Shubham Naik <shub@memgpt.ai> Co-authored-by: Shubham Naik <shub@letta.com>
This commit is contained in:
@@ -492,7 +492,10 @@ class Agent(BaseAgent):
|
||||
try:
|
||||
raw_function_args = function_call.arguments
|
||||
function_args = parse_json(raw_function_args)
|
||||
except Exception:
|
||||
if not isinstance(function_args, dict):
|
||||
raise ValueError(f"Function arguments are not a dictionary: {function_args} (raw={raw_function_args})")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
error_msg = f"Error parsing JSON for function '{function_name}' arguments: {function_call.arguments}"
|
||||
function_response = "None" # more like "never ran?"
|
||||
messages = self._handle_function_error_response(
|
||||
|
||||
@@ -86,6 +86,8 @@ NON_USER_MSG_PREFIX = "[This is an automated system message hidden from the user
|
||||
# The max amount of tokens supported by the underlying model (eg 8k for gpt-4 and Mistral 7B)
|
||||
LLM_MAX_TOKENS = {
|
||||
"DEFAULT": 8192,
|
||||
"deepseek-chat": 64000,
|
||||
"deepseek-reasoner": 64000,
|
||||
## OpenAI models: https://platform.openai.com/docs/models/overview
|
||||
# "o1-preview
|
||||
"chatgpt-4o-latest": 128000,
|
||||
|
||||
303
letta/llm_api/deepseek.py
Normal file
303
letta/llm_api/deepseek.py
Normal file
@@ -0,0 +1,303 @@
|
||||
import json
|
||||
import re
|
||||
import warnings
|
||||
from typing import List, Optional
|
||||
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.message import Message as _Message
|
||||
from letta.schemas.openai.chat_completion_request import AssistantMessage, ChatCompletionRequest, ChatMessage
|
||||
from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall
|
||||
from letta.schemas.openai.chat_completion_request import Tool, ToolFunctionChoice, ToolMessage, UserMessage, cast_message_to_subtype
|
||||
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
||||
from letta.schemas.openai.openai import Function, ToolCall
|
||||
from letta.utils import get_tool_call_id
|
||||
|
||||
|
||||
def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage:
|
||||
"""
|
||||
Merge `ToolMessage` objects into the previous message.
|
||||
"""
|
||||
previous_message.content += (
|
||||
f"<ToolMessage> content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}</ToolMessage>"
|
||||
)
|
||||
return previous_message
|
||||
|
||||
|
||||
def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage:
|
||||
"""
|
||||
For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field.
|
||||
"""
|
||||
|
||||
if "tool_calls" in assistant_message.dict().keys():
|
||||
assistant_message.content = "".join(
|
||||
[
|
||||
# f"<ToolCall> name: {tool_call.function.name}, function: {tool_call.function}</ToolCall>"
|
||||
f"<ToolCall> {json.dumps(tool_call.function.dict())} </ToolCall>"
|
||||
for tool_call in assistant_message.tool_calls
|
||||
]
|
||||
)
|
||||
del assistant_message.tool_calls
|
||||
return assistant_message
|
||||
|
||||
|
||||
def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
|
||||
"""
|
||||
Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
|
||||
Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
|
||||
|
||||
This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message
|
||||
at the end.
|
||||
|
||||
"""
|
||||
deepseek_messages = []
|
||||
for idx, message in enumerate(messages):
|
||||
# First message is the system prompt, add it
|
||||
if idx == 0 and message.role == "system":
|
||||
deepseek_messages.append(message)
|
||||
continue
|
||||
if message.role == "user":
|
||||
if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system":
|
||||
# User message, add it
|
||||
deepseek_messages.append(UserMessage(content=message.content))
|
||||
else:
|
||||
# add to the content of the previous message
|
||||
deepseek_messages[-1].content += message.content
|
||||
elif message.role == "assistant":
|
||||
if deepseek_messages[-1].role == "user":
|
||||
# Assistant message, remove tool calls and add them to the content
|
||||
deepseek_messages.append(handle_assistant_message(message))
|
||||
else:
|
||||
# add to the content of the previous message
|
||||
deepseek_messages[-1].content += message.content
|
||||
elif message.role == "tool" and deepseek_messages[-1].role == "assistant":
|
||||
# Tool message, add it to the last assistant message
|
||||
merged_message = merge_tool_message(deepseek_messages[-1], message)
|
||||
deepseek_messages[-1] = merged_message
|
||||
else:
|
||||
print(f"Skipping message: {message}")
|
||||
|
||||
# This needs to end on a user message, add a dummy message if the last was assistant
|
||||
if deepseek_messages[-1].role == "assistant":
|
||||
deepseek_messages.append(UserMessage(content=""))
|
||||
return deepseek_messages
|
||||
|
||||
|
||||
def build_deepseek_chat_completions_request(
|
||||
llm_config: LLMConfig,
|
||||
messages: List[_Message],
|
||||
user_id: Optional[str],
|
||||
functions: Optional[list],
|
||||
function_call: Optional[str],
|
||||
use_tool_naming: bool,
|
||||
max_tokens: Optional[int],
|
||||
) -> ChatCompletionRequest:
|
||||
# if functions and llm_config.put_inner_thoughts_in_kwargs:
|
||||
# # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
|
||||
# # TODO(fix)
|
||||
# inner_thoughts_desc = (
|
||||
# INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
|
||||
# )
|
||||
# functions = add_inner_thoughts_to_functions(
|
||||
# functions=functions,
|
||||
# inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
||||
# inner_thoughts_description=inner_thoughts_desc,
|
||||
# )
|
||||
|
||||
openai_message_list = [cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=False)) for m in messages]
|
||||
|
||||
if llm_config.model:
|
||||
model = llm_config.model
|
||||
else:
|
||||
warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
|
||||
model = None
|
||||
if use_tool_naming:
|
||||
if function_call is None:
|
||||
tool_choice = None
|
||||
elif function_call not in ["none", "auto", "required"]:
|
||||
tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
|
||||
else:
|
||||
tool_choice = function_call
|
||||
|
||||
def add_functions_to_system_message(system_message: ChatMessage):
|
||||
system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
|
||||
system_message.content += f'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
|
||||
|
||||
if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
|
||||
add_functions_to_system_message(
|
||||
openai_message_list[0]
|
||||
) # Inject additional instructions to the system prompt with the available functions
|
||||
|
||||
openai_message_list = map_messages_to_deepseek_format(openai_message_list)
|
||||
|
||||
data = ChatCompletionRequest(
|
||||
model=model,
|
||||
messages=openai_message_list,
|
||||
user=str(user_id),
|
||||
max_completion_tokens=max_tokens,
|
||||
temperature=llm_config.temperature,
|
||||
)
|
||||
else:
|
||||
data = ChatCompletionRequest(
|
||||
model=model,
|
||||
messages=openai_message_list,
|
||||
tools=[Tool(type="function", function=f) for f in functions] if functions else None,
|
||||
tool_choice=tool_choice,
|
||||
user=str(user_id),
|
||||
max_completion_tokens=max_tokens,
|
||||
temperature=llm_config.temperature,
|
||||
)
|
||||
else:
|
||||
data = ChatCompletionRequest(
|
||||
model=model,
|
||||
messages=openai_message_list,
|
||||
functions=functions,
|
||||
function_call=function_call,
|
||||
user=str(user_id),
|
||||
max_completion_tokens=max_tokens,
|
||||
temperature=llm_config.temperature,
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def convert_deepseek_response_to_chatcompletion(
|
||||
response: ChatCompletionResponse,
|
||||
) -> ChatCompletionResponse:
|
||||
"""
|
||||
Example response from DeepSeek:
|
||||
|
||||
ChatCompletion(
|
||||
id='bc7f7d25-82e4-443a-b217-dfad2b66da8e',
|
||||
choices=[
|
||||
Choice(
|
||||
finish_reason='stop',
|
||||
index=0,
|
||||
logprobs=None,
|
||||
message=ChatCompletionMessage(
|
||||
content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}',
|
||||
refusal=None,
|
||||
role='assistant',
|
||||
audio=None,
|
||||
function_call=None,
|
||||
tool_calls=None,
|
||||
reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.'
|
||||
)
|
||||
)
|
||||
],
|
||||
created=1738266449,
|
||||
model='deepseek-reasoner',
|
||||
object='chat.completion',
|
||||
service_tier=None,
|
||||
system_fingerprint='fp_7e73fd9a08',
|
||||
usage=CompletionUsage(
|
||||
completion_tokens=111,
|
||||
prompt_tokens=1270,
|
||||
total_tokens=1381,
|
||||
completion_tokens_details=CompletionTokensDetails(
|
||||
accepted_prediction_tokens=None,
|
||||
audio_tokens=None,
|
||||
reasoning_tokens=72,
|
||||
rejected_prediction_tokens=None
|
||||
),
|
||||
prompt_tokens_details=PromptTokensDetails(
|
||||
audio_tokens=None,
|
||||
cached_tokens=1088
|
||||
),
|
||||
prompt_cache_hit_tokens=1088,
|
||||
prompt_cache_miss_tokens=182
|
||||
)
|
||||
)
|
||||
"""
|
||||
|
||||
def convert_dict_quotes(input_dict: dict):
|
||||
"""
|
||||
Convert a dictionary with single-quoted keys to double-quoted keys,
|
||||
properly handling boolean values and nested structures.
|
||||
|
||||
Args:
|
||||
input_dict (dict): Input dictionary with single-quoted keys
|
||||
|
||||
Returns:
|
||||
str: JSON string with double-quoted keys
|
||||
"""
|
||||
# First convert the dictionary to a JSON string to handle booleans properly
|
||||
json_str = json.dumps(input_dict)
|
||||
|
||||
# Function to handle complex string replacements
|
||||
def replace_quotes(match):
|
||||
key = match.group(1)
|
||||
# Escape any existing double quotes in the key
|
||||
key = key.replace('"', '\\"')
|
||||
return f'"{key}":'
|
||||
|
||||
# Replace single-quoted keys with double-quoted keys
|
||||
# This regex looks for single-quoted keys followed by a colon
|
||||
def strip_json_block(text):
|
||||
# Check if text starts with ```json or similar
|
||||
if text.strip().startswith("```"):
|
||||
# Split by \n to remove the first and last lines
|
||||
lines = text.split("\n")[1:-1]
|
||||
return "\n".join(lines)
|
||||
return text
|
||||
|
||||
pattern = r"'([^']*)':"
|
||||
converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str))
|
||||
|
||||
# Parse the string back to ensure valid JSON format
|
||||
try:
|
||||
json.loads(converted_str)
|
||||
return converted_str
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}")
|
||||
|
||||
def extract_json_block(text):
|
||||
# Find the first {
|
||||
start = text.find("{")
|
||||
if start == -1:
|
||||
return text
|
||||
|
||||
# Track nested braces to find the matching closing brace
|
||||
brace_count = 0
|
||||
end = start
|
||||
|
||||
for i in range(start, len(text)):
|
||||
if text[i] == "{":
|
||||
brace_count += 1
|
||||
elif text[i] == "}":
|
||||
brace_count -= 1
|
||||
if brace_count == 0:
|
||||
end = i + 1
|
||||
break
|
||||
|
||||
return text[start:end]
|
||||
|
||||
content = response.choices[0].message.content
|
||||
try:
|
||||
content_dict = json.loads(extract_json_block(content))
|
||||
|
||||
if type(content_dict["arguments"]) == str:
|
||||
content_dict["arguments"] = json.loads(content_dict["arguments"])
|
||||
|
||||
tool_calls = [
|
||||
ToolCall(
|
||||
id=get_tool_call_id(),
|
||||
type="function",
|
||||
function=Function(
|
||||
name=content_dict["name"],
|
||||
arguments=convert_dict_quotes(content_dict["arguments"]),
|
||||
),
|
||||
)
|
||||
]
|
||||
except (json.JSONDecodeError, TypeError, KeyError) as e:
|
||||
print(e)
|
||||
tool_calls = response.choices[0].message.tool_calls
|
||||
raise ValueError(f"Failed to create valid JSON {content}")
|
||||
|
||||
# Move the "reasoning_content" into the "content" field
|
||||
response.choices[0].message.content = response.choices[0].message.reasoning_content
|
||||
response.choices[0].message.tool_calls = tool_calls
|
||||
|
||||
# Remove the "reasoning_content" field
|
||||
response.choices[0].message.reasoning_content = None
|
||||
|
||||
return response
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
from typing import List, Optional, Union
|
||||
@@ -13,6 +14,7 @@ from letta.llm_api.anthropic import (
|
||||
)
|
||||
from letta.llm_api.aws_bedrock import has_valid_aws_credentials
|
||||
from letta.llm_api.azure_openai import azure_openai_chat_completions_request
|
||||
from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
|
||||
from letta.llm_api.google_ai import convert_tools_to_google_ai_format, google_ai_chat_completions_request
|
||||
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
||||
from letta.llm_api.openai import (
|
||||
@@ -30,7 +32,7 @@ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
||||
from letta.settings import ModelSettings
|
||||
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
||||
|
||||
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
|
||||
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq", "deepseek"]
|
||||
|
||||
|
||||
def retry_with_exponential_backoff(
|
||||
@@ -453,10 +455,62 @@ def create(
|
||||
),
|
||||
)
|
||||
|
||||
elif llm_config.model_endpoint_type == "deepseek":
|
||||
if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
|
||||
# only is a problem if we are *not* using an openai proxy
|
||||
raise LettaConfigurationError(message="DeepSeek key is missing from letta config file", missing_fields=["deepseek_api_key"])
|
||||
|
||||
data = build_deepseek_chat_completions_request(
|
||||
llm_config,
|
||||
messages,
|
||||
user_id,
|
||||
functions,
|
||||
function_call,
|
||||
use_tool_naming,
|
||||
llm_config.max_tokens,
|
||||
)
|
||||
if stream: # Client requested token streaming
|
||||
data.stream = True
|
||||
assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
|
||||
stream_interface, AgentRefreshStreamingInterface
|
||||
), type(stream_interface)
|
||||
response = openai_chat_completions_process_stream(
|
||||
url=llm_config.model_endpoint,
|
||||
api_key=model_settings.deepseek_api_key,
|
||||
chat_completion_request=data,
|
||||
stream_interface=stream_interface,
|
||||
)
|
||||
else: # Client did not request token streaming (expect a blocking backend response)
|
||||
data.stream = False
|
||||
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
||||
stream_interface.stream_start()
|
||||
try:
|
||||
response = openai_chat_completions_request(
|
||||
url=llm_config.model_endpoint,
|
||||
api_key=model_settings.deepseek_api_key,
|
||||
chat_completion_request=data,
|
||||
)
|
||||
finally:
|
||||
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
||||
stream_interface.stream_end()
|
||||
"""
|
||||
if llm_config.put_inner_thoughts_in_kwargs:
|
||||
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
||||
"""
|
||||
response = convert_deepseek_response_to_chatcompletion(response)
|
||||
return response
|
||||
|
||||
# local model
|
||||
else:
|
||||
if stream:
|
||||
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
||||
|
||||
if "DeepSeek-R1".lower() in llm_config.model.lower(): # TODO: move this to the llm_config.
|
||||
messages[0].content[0].text += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
|
||||
messages[0].content[
|
||||
0
|
||||
].text += f'Select best function to call simply by responding with a single json block with the keys "function" and "params". Use double quotes around the arguments.'
|
||||
|
||||
return get_chat_completion(
|
||||
model=llm_config.model,
|
||||
messages=messages,
|
||||
|
||||
@@ -166,6 +166,11 @@ def openai_chat_completions_process_stream(
|
||||
create_message_id: bool = True,
|
||||
create_message_datetime: bool = True,
|
||||
override_tool_call_id: bool = True,
|
||||
# if we expect reasoning content in the response,
|
||||
# then we should emit reasoning_content as "inner_thoughts"
|
||||
# however, we don't necessarily want to put these
|
||||
# expect_reasoning_content: bool = False,
|
||||
expect_reasoning_content: bool = True,
|
||||
) -> ChatCompletionResponse:
|
||||
"""Process a streaming completion response, and return a ChatCompletionRequest at the end.
|
||||
|
||||
@@ -250,6 +255,7 @@ def openai_chat_completions_process_stream(
|
||||
chat_completion_chunk,
|
||||
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
||||
message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
|
||||
expect_reasoning_content=expect_reasoning_content,
|
||||
)
|
||||
elif isinstance(stream_interface, AgentRefreshStreamingInterface):
|
||||
stream_interface.process_refresh(chat_completion_response)
|
||||
@@ -290,6 +296,13 @@ def openai_chat_completions_process_stream(
|
||||
else:
|
||||
accum_message.content += content_delta
|
||||
|
||||
if expect_reasoning_content and message_delta.reasoning_content is not None:
|
||||
reasoning_content_delta = message_delta.reasoning_content
|
||||
if accum_message.reasoning_content is None:
|
||||
accum_message.reasoning_content = reasoning_content_delta
|
||||
else:
|
||||
accum_message.reasoning_content += reasoning_content_delta
|
||||
|
||||
# TODO(charles) make sure this works for parallel tool calling?
|
||||
if message_delta.tool_calls is not None:
|
||||
tool_calls_delta = message_delta.tool_calls
|
||||
|
||||
@@ -14,7 +14,7 @@ from letta.local_llm.grammars.gbnf_grammar_generator import create_dynamic_model
|
||||
from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
|
||||
from letta.local_llm.llamacpp.api import get_llamacpp_completion
|
||||
from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
|
||||
from letta.local_llm.lmstudio.api import get_lmstudio_completion
|
||||
from letta.local_llm.lmstudio.api import get_lmstudio_completion, get_lmstudio_completion_chatcompletions
|
||||
from letta.local_llm.ollama.api import get_ollama_completion
|
||||
from letta.local_llm.utils import count_tokens, get_available_wrappers
|
||||
from letta.local_llm.vllm.api import get_vllm_completion
|
||||
@@ -141,11 +141,24 @@ def get_chat_completion(
|
||||
f"Failed to convert ChatCompletion messages into prompt string with wrapper {str(llm_wrapper)} - error: {str(e)}"
|
||||
)
|
||||
|
||||
# get the schema for the model
|
||||
|
||||
"""
|
||||
if functions_python is not None:
|
||||
model_schema = generate_schema(functions)
|
||||
else:
|
||||
model_schema = None
|
||||
"""
|
||||
|
||||
# Run the LLM
|
||||
try:
|
||||
result_reasoning = None
|
||||
if endpoint_type == "webui":
|
||||
result, usage = get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
|
||||
elif endpoint_type == "webui-legacy":
|
||||
result, usage = get_webui_completion_legacy(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
|
||||
elif endpoint_type == "lmstudio-chatcompletions":
|
||||
result, usage, result_reasoning = get_lmstudio_completion_chatcompletions(endpoint, auth_type, auth_key, model, messages)
|
||||
elif endpoint_type == "lmstudio":
|
||||
result, usage = get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions")
|
||||
elif endpoint_type == "lmstudio-legacy":
|
||||
@@ -214,7 +227,7 @@ def get_chat_completion(
|
||||
index=0,
|
||||
message=Message(
|
||||
role=chat_completion_result["role"],
|
||||
content=chat_completion_result["content"],
|
||||
content=result_reasoning if result_reasoning is not None else chat_completion_result["content"],
|
||||
tool_calls=(
|
||||
[ToolCall(id=get_tool_call_id(), type="function", function=chat_completion_result["function_call"])]
|
||||
if "function_call" in chat_completion_result
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from letta.local_llm.settings.settings import get_completions_settings
|
||||
@@ -6,6 +7,73 @@ from letta.utils import count_tokens
|
||||
|
||||
LMSTUDIO_API_CHAT_SUFFIX = "/v1/chat/completions"
|
||||
LMSTUDIO_API_COMPLETIONS_SUFFIX = "/v1/completions"
|
||||
LMSTUDIO_API_CHAT_COMPLETIONS_SUFFIX = "/v1/chat/completions"
|
||||
|
||||
|
||||
def get_lmstudio_completion_chatcompletions(endpoint, auth_type, auth_key, model, messages):
|
||||
"""
|
||||
This is the request we need to send
|
||||
|
||||
{
|
||||
"model": "deepseek-r1-distill-qwen-7b",
|
||||
"messages": [
|
||||
{ "role": "system", "content": "Always answer in rhymes. Today is Thursday" },
|
||||
{ "role": "user", "content": "What day is it today?" },
|
||||
{ "role": "user", "content": "What day is it today?" }],
|
||||
"temperature": 0.7,
|
||||
"max_tokens": -1,
|
||||
"stream": false
|
||||
"""
|
||||
from letta.utils import printd
|
||||
|
||||
URI = endpoint + LMSTUDIO_API_CHAT_COMPLETIONS_SUFFIX
|
||||
request = {"model": model, "messages": messages}
|
||||
|
||||
response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
|
||||
|
||||
# Get the reasoning from the model
|
||||
if response.status_code == 200:
|
||||
result_full = response.json()
|
||||
result_reasoning = result_full["choices"][0]["message"].get("reasoning_content")
|
||||
result = result_full["choices"][0]["message"]["content"]
|
||||
usage = result_full["usage"]
|
||||
|
||||
# See if result is json
|
||||
try:
|
||||
function_call = json.loads(result)
|
||||
if "function" in function_call and "params" in function_call:
|
||||
return result, usage, result_reasoning
|
||||
else:
|
||||
print("Did not get json on without json constraint, attempting with json decoding")
|
||||
except Exception as e:
|
||||
print(f"Did not get json on without json constraint, attempting with json decoding: {e}")
|
||||
|
||||
request["messages"].append({"role": "assistant", "content": result_reasoning})
|
||||
request["messages"].append({"role": "user", "content": ""}) # last message must be user
|
||||
# Now run with json decoding to get the function
|
||||
request["response_format"] = {
|
||||
"type": "json_schema",
|
||||
"json_schema": {
|
||||
"name": "function_call",
|
||||
"strict": "true",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {"function": {"type": "string"}, "params": {"type": "object"}},
|
||||
"required": ["function", "params"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
|
||||
if response.status_code == 200:
|
||||
result_full = response.json()
|
||||
printd(f"JSON API response:\n{result_full}")
|
||||
result = result_full["choices"][0]["message"]["content"]
|
||||
# add usage with previous call, merge with prev usage
|
||||
for key, value in result_full["usage"].items():
|
||||
usage[key] += value
|
||||
|
||||
return result, usage, result_reasoning
|
||||
|
||||
|
||||
def get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions"):
|
||||
@@ -24,7 +92,8 @@ def get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_windo
|
||||
# This controls how LM studio handles context overflow
|
||||
# In Letta we handle this ourselves, so this should be disabled
|
||||
# "context_overflow_policy": 0,
|
||||
"lmstudio": {"context_overflow_policy": 0}, # 0 = stop at limit
|
||||
# "lmstudio": {"context_overflow_policy": 0}, # 0 = stop at limit
|
||||
# "lmstudio": {"context_overflow_policy": "stopAtLimit"}, # https://github.com/letta-ai/letta/issues/1782
|
||||
"stream": False,
|
||||
"model": "local model",
|
||||
}
|
||||
@@ -72,6 +141,11 @@ def get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_windo
|
||||
elif api == "completions":
|
||||
result = result_full["choices"][0]["text"]
|
||||
usage = result_full.get("usage", None)
|
||||
elif api == "chat/completions":
|
||||
result = result_full["choices"][0]["content"]
|
||||
result_full["choices"][0]["reasoning_content"]
|
||||
usage = result_full.get("usage", None)
|
||||
|
||||
else:
|
||||
# Example error: msg={"error":"Context length exceeded. Tokens in context: 8000, Context length: 8000"}
|
||||
if "context length" in str(response.text).lower():
|
||||
|
||||
@@ -33,6 +33,7 @@ class LLMConfig(BaseModel):
|
||||
"webui-legacy",
|
||||
"lmstudio",
|
||||
"lmstudio-legacy",
|
||||
"lmstudio-chatcompletions",
|
||||
"llamacpp",
|
||||
"koboldcpp",
|
||||
"vllm",
|
||||
@@ -40,6 +41,7 @@ class LLMConfig(BaseModel):
|
||||
"mistral",
|
||||
"together", # completions endpoint
|
||||
"bedrock",
|
||||
"deepseek",
|
||||
] = Field(..., description="The endpoint type for the model.")
|
||||
model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
|
||||
model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
|
||||
|
||||
@@ -39,6 +39,7 @@ class Message(BaseModel):
|
||||
tool_calls: Optional[List[ToolCall]] = None
|
||||
role: str
|
||||
function_call: Optional[FunctionCall] = None # Deprecated
|
||||
reasoning_content: Optional[str] = None # Used in newer reasoning APIs
|
||||
|
||||
|
||||
class Choice(BaseModel):
|
||||
@@ -115,6 +116,7 @@ class MessageDelta(BaseModel):
|
||||
"""
|
||||
|
||||
content: Optional[str] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: Optional[List[ToolCallDelta]] = None
|
||||
role: Optional[str] = None
|
||||
function_call: Optional[FunctionCallDelta] = None # Deprecated
|
||||
|
||||
@@ -211,6 +211,75 @@ class OpenAIProvider(Provider):
|
||||
return None
|
||||
|
||||
|
||||
class DeepSeekProvider(OpenAIProvider):
|
||||
"""
|
||||
DeepSeek ChatCompletions API is similar to OpenAI's reasoning API,
|
||||
but with slight differences:
|
||||
* For example, DeepSeek's API requires perfect interleaving of user/assistant
|
||||
* It also does not support native function calling
|
||||
"""
|
||||
|
||||
name: str = "deepseek"
|
||||
base_url: str = Field("https://api.deepseek.com/v1", description="Base URL for the DeepSeek API.")
|
||||
api_key: str = Field(..., description="API key for the DeepSeek API.")
|
||||
|
||||
def get_model_context_window_size(self, model_name: str) -> Optional[int]:
|
||||
# DeepSeek doesn't return context window in the model listing,
|
||||
# so these are hardcoded from their website
|
||||
if model_name == "deepseek-reasoner":
|
||||
return 64000
|
||||
elif model_name == "deepseek-chat":
|
||||
return 64000
|
||||
else:
|
||||
return None
|
||||
|
||||
def list_llm_models(self) -> List[LLMConfig]:
|
||||
from letta.llm_api.openai import openai_get_model_list
|
||||
|
||||
response = openai_get_model_list(self.base_url, api_key=self.api_key)
|
||||
|
||||
if "data" in response:
|
||||
data = response["data"]
|
||||
else:
|
||||
data = response
|
||||
|
||||
configs = []
|
||||
for model in data:
|
||||
assert "id" in model, f"DeepSeek model missing 'id' field: {model}"
|
||||
model_name = model["id"]
|
||||
|
||||
# In case DeepSeek starts supporting it in the future:
|
||||
if "context_length" in model:
|
||||
# Context length is returned in OpenRouter as "context_length"
|
||||
context_window_size = model["context_length"]
|
||||
else:
|
||||
context_window_size = self.get_model_context_window_size(model_name)
|
||||
|
||||
if not context_window_size:
|
||||
warnings.warn(f"Couldn't find context window size for model {model_name}")
|
||||
continue
|
||||
|
||||
# Not used for deepseek-reasoner, but otherwise is true
|
||||
put_inner_thoughts_in_kwargs = False if model_name == "deepseek-reasoner" else True
|
||||
|
||||
configs.append(
|
||||
LLMConfig(
|
||||
model=model_name,
|
||||
model_endpoint_type="deepseek",
|
||||
model_endpoint=self.base_url,
|
||||
context_window=context_window_size,
|
||||
handle=self.get_handle(model_name),
|
||||
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
||||
)
|
||||
)
|
||||
|
||||
return configs
|
||||
|
||||
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
||||
# No embeddings supported
|
||||
return []
|
||||
|
||||
|
||||
class LMStudioOpenAIProvider(OpenAIProvider):
|
||||
name: str = "lmstudio-openai"
|
||||
base_url: str = Field(..., description="Base URL for the LMStudio OpenAI API.")
|
||||
|
||||
@@ -317,6 +317,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
self.debug = False
|
||||
self.timeout = 10 * 60 # 10 minute timeout
|
||||
|
||||
# for expect_reasoning_content, we should accumulate `content`
|
||||
self.expect_reasoning_content_buffer = None
|
||||
|
||||
def _reset_inner_thoughts_json_reader(self):
|
||||
# A buffer for accumulating function arguments (we want to buffer keys and run checks on each one)
|
||||
self.function_args_reader = JSONInnerThoughtsExtractor(inner_thoughts_key=self.inner_thoughts_kwarg, wait_for_first_key=True)
|
||||
@@ -387,6 +390,39 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
# Wipe the inner thoughts buffers
|
||||
self._reset_inner_thoughts_json_reader()
|
||||
|
||||
# If we were in reasoning mode and accumulated a json block, attempt to release it as chunks
|
||||
# if self.expect_reasoning_content_buffer is not None:
|
||||
# try:
|
||||
# # NOTE: this is hardcoded for our DeepSeek API integration
|
||||
# json_reasoning_content = json.loads(self.expect_reasoning_content_buffer)
|
||||
|
||||
# if "name" in json_reasoning_content:
|
||||
# self._push_to_buffer(
|
||||
# ToolCallMessage(
|
||||
# id=message_id,
|
||||
# date=message_date,
|
||||
# tool_call=ToolCallDelta(
|
||||
# name=json_reasoning_content["name"],
|
||||
# arguments=None,
|
||||
# tool_call_id=None,
|
||||
# ),
|
||||
# )
|
||||
# )
|
||||
# if "arguments" in json_reasoning_content:
|
||||
# self._push_to_buffer(
|
||||
# ToolCallMessage(
|
||||
# id=message_id,
|
||||
# date=message_date,
|
||||
# tool_call=ToolCallDelta(
|
||||
# name=None,
|
||||
# arguments=json_reasoning_content["arguments"],
|
||||
# tool_call_id=None,
|
||||
# ),
|
||||
# )
|
||||
# )
|
||||
# except Exception as e:
|
||||
# print(f"Failed to interpret reasoning content ({self.expect_reasoning_content_buffer}) as JSON: {e}")
|
||||
|
||||
def step_complete(self):
|
||||
"""Signal from the agent that one 'step' finished (step = LLM response + tool execution)"""
|
||||
if not self.multi_step:
|
||||
@@ -410,7 +446,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
return
|
||||
|
||||
def _process_chunk_to_letta_style(
|
||||
self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime
|
||||
self,
|
||||
chunk: ChatCompletionChunkResponse,
|
||||
message_id: str,
|
||||
message_date: datetime,
|
||||
# if we expect `reasoning_content``, then that's what gets mapped to ReasoningMessage
|
||||
# and `content` needs to be handled outside the interface
|
||||
expect_reasoning_content: bool = False,
|
||||
) -> Optional[Union[ReasoningMessage, ToolCallMessage, AssistantMessage]]:
|
||||
"""
|
||||
Example data from non-streaming response looks like:
|
||||
@@ -426,6 +468,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
|
||||
if (
|
||||
message_delta.content is None
|
||||
and (expect_reasoning_content and message_delta.reasoning_content is None)
|
||||
and message_delta.tool_calls is None
|
||||
and message_delta.function_call is None
|
||||
and choice.finish_reason is None
|
||||
@@ -435,17 +478,68 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
return None
|
||||
|
||||
# inner thoughts
|
||||
if message_delta.content is not None:
|
||||
if message_delta.content == "":
|
||||
print("skipping empty content")
|
||||
processed_chunk = None
|
||||
if expect_reasoning_content and message_delta.reasoning_content is not None:
|
||||
processed_chunk = ReasoningMessage(
|
||||
id=message_id,
|
||||
date=message_date,
|
||||
reasoning=message_delta.reasoning_content,
|
||||
)
|
||||
elif expect_reasoning_content and message_delta.content is not None:
|
||||
# "ignore" content if we expect reasoning content
|
||||
if self.expect_reasoning_content_buffer is None:
|
||||
self.expect_reasoning_content_buffer = message_delta.content
|
||||
else:
|
||||
processed_chunk = ReasoningMessage(
|
||||
self.expect_reasoning_content_buffer += message_delta.content
|
||||
|
||||
# we expect this to be pure JSON
|
||||
# OptimisticJSONParser
|
||||
|
||||
# If we can pull a name out, pull it
|
||||
|
||||
try:
|
||||
# NOTE: this is hardcoded for our DeepSeek API integration
|
||||
json_reasoning_content = json.loads(self.expect_reasoning_content_buffer)
|
||||
print(f"json_reasoning_content: {json_reasoning_content}")
|
||||
|
||||
processed_chunk = ToolCallMessage(
|
||||
id=message_id,
|
||||
date=message_date,
|
||||
reasoning=message_delta.content,
|
||||
tool_call=ToolCallDelta(
|
||||
name=json_reasoning_content.get("name"),
|
||||
arguments=json.dumps(json_reasoning_content.get("arguments")),
|
||||
tool_call_id=None,
|
||||
),
|
||||
)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Failed to interpret reasoning content ({self.expect_reasoning_content_buffer}) as JSON: {e}")
|
||||
|
||||
return None
|
||||
# Else,
|
||||
# return None
|
||||
# processed_chunk = ToolCallMessage(
|
||||
# id=message_id,
|
||||
# date=message_date,
|
||||
# tool_call=ToolCallDelta(
|
||||
# # name=tool_call_delta.get("name"),
|
||||
# name=None,
|
||||
# arguments=message_delta.content,
|
||||
# # tool_call_id=tool_call_delta.get("id"),
|
||||
# tool_call_id=None,
|
||||
# ),
|
||||
# )
|
||||
# return processed_chunk
|
||||
|
||||
# TODO eventually output as tool call outputs?
|
||||
# print(f"Hiding content delta stream: '{message_delta.content}'")
|
||||
# return None
|
||||
elif message_delta.content is not None:
|
||||
processed_chunk = ReasoningMessage(
|
||||
id=message_id,
|
||||
date=message_date,
|
||||
reasoning=message_delta.content,
|
||||
)
|
||||
|
||||
# tool calls
|
||||
elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
|
||||
tool_call = message_delta.tool_calls[0]
|
||||
@@ -890,7 +984,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
|
||||
return processed_chunk
|
||||
|
||||
def process_chunk(self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime):
|
||||
def process_chunk(
|
||||
self,
|
||||
chunk: ChatCompletionChunkResponse,
|
||||
message_id: str,
|
||||
message_date: datetime,
|
||||
expect_reasoning_content: bool = False,
|
||||
):
|
||||
"""Process a streaming chunk from an OpenAI-compatible server.
|
||||
|
||||
Example data from non-streaming response looks like:
|
||||
@@ -910,7 +1010,12 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
||||
# processed_chunk = self._process_chunk_to_openai_style(chunk)
|
||||
raise NotImplementedError("OpenAI proxy streaming temporarily disabled")
|
||||
else:
|
||||
processed_chunk = self._process_chunk_to_letta_style(chunk=chunk, message_id=message_id, message_date=message_date)
|
||||
processed_chunk = self._process_chunk_to_letta_style(
|
||||
chunk=chunk,
|
||||
message_id=message_id,
|
||||
message_date=message_date,
|
||||
expect_reasoning_content=expect_reasoning_content,
|
||||
)
|
||||
if processed_chunk is None:
|
||||
return
|
||||
|
||||
|
||||
@@ -48,6 +48,7 @@ from letta.schemas.providers import (
|
||||
AnthropicBedrockProvider,
|
||||
AnthropicProvider,
|
||||
AzureProvider,
|
||||
DeepSeekProvider,
|
||||
GoogleAIProvider,
|
||||
GoogleVertexProvider,
|
||||
GroqProvider,
|
||||
@@ -305,6 +306,8 @@ class SyncServer(Server):
|
||||
else model_settings.lmstudio_base_url + "/v1"
|
||||
)
|
||||
self._enabled_providers.append(LMStudioOpenAIProvider(base_url=lmstudio_url))
|
||||
if model_settings.deepseek_api_key:
|
||||
self._enabled_providers.append(DeepSeekProvider(api_key=model_settings.deepseek_api_key))
|
||||
|
||||
def load_agent(self, agent_id: str, actor: User, interface: Union[AgentInterface, None] = None) -> Agent:
|
||||
"""Updated method to load agents from persisted storage"""
|
||||
@@ -1182,11 +1185,12 @@ class SyncServer(Server):
|
||||
# Disable token streaming if not OpenAI or Anthropic
|
||||
# TODO: cleanup this logic
|
||||
llm_config = letta_agent.agent_state.llm_config
|
||||
supports_token_streaming = ["openai", "anthropic", "deepseek"]
|
||||
if stream_tokens and (
|
||||
llm_config.model_endpoint_type not in ["openai", "anthropic"] or "inference.memgpt.ai" in llm_config.model_endpoint
|
||||
llm_config.model_endpoint_type not in supports_token_streaming or "inference.memgpt.ai" in llm_config.model_endpoint
|
||||
):
|
||||
warnings.warn(
|
||||
f"Token streaming is only supported for models with type 'openai' or 'anthropic' in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
|
||||
f"Token streaming is only supported for models with type {' or '.join(supports_token_streaming)} in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
|
||||
)
|
||||
stream_tokens = False
|
||||
|
||||
|
||||
@@ -60,6 +60,9 @@ class ModelSettings(BaseSettings):
|
||||
openai_api_key: Optional[str] = None
|
||||
openai_api_base: str = "https://api.openai.com/v1"
|
||||
|
||||
# deepseek
|
||||
deepseek_api_key: Optional[str] = None
|
||||
|
||||
# groq
|
||||
groq_api_key: Optional[str] = None
|
||||
|
||||
|
||||
@@ -824,12 +824,16 @@ def parse_json(string) -> dict:
|
||||
result = None
|
||||
try:
|
||||
result = json_loads(string)
|
||||
if not isinstance(result, dict):
|
||||
raise ValueError(f"JSON from string input ({string}) is not a dictionary (type {type(result)}): {result}")
|
||||
return result
|
||||
except Exception as e:
|
||||
print(f"Error parsing json with json package: {e}")
|
||||
|
||||
try:
|
||||
result = demjson.decode(string)
|
||||
if not isinstance(result, dict):
|
||||
raise ValueError(f"JSON from string input ({string}) is not a dictionary (type {type(result)}): {result}")
|
||||
return result
|
||||
except demjson.JSONDecodeError as e:
|
||||
print(f"Error parsing json with demjson package: {e}")
|
||||
|
||||
7
tests/configs/llm_model_configs/deepseek-reasoner.json
Normal file
7
tests/configs/llm_model_configs/deepseek-reasoner.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"model": "deepseek-reasoner",
|
||||
"model_endpoint_type": "deepseek",
|
||||
"model_endpoint": "https://api.deepseek.com/v1",
|
||||
"context_window": 64000,
|
||||
"put_inner_thoughts_in_kwargs": false
|
||||
}
|
||||
@@ -83,7 +83,7 @@ def setup_agent(
|
||||
# ======================================================================================================================
|
||||
|
||||
|
||||
def check_first_response_is_valid_for_llm_endpoint(filename: str) -> ChatCompletionResponse:
|
||||
def check_first_response_is_valid_for_llm_endpoint(filename: str, validate_inner_monologue_contents: bool = True) -> ChatCompletionResponse:
|
||||
"""
|
||||
Checks that the first response is valid:
|
||||
|
||||
@@ -126,7 +126,11 @@ def check_first_response_is_valid_for_llm_endpoint(filename: str) -> ChatComplet
|
||||
assert_contains_valid_function_call(choice.message, validator_func)
|
||||
|
||||
# Assert that the message has an inner monologue
|
||||
assert_contains_correct_inner_monologue(choice, agent_state.llm_config.put_inner_thoughts_in_kwargs)
|
||||
assert_contains_correct_inner_monologue(
|
||||
choice,
|
||||
agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
||||
validate_inner_monologue_contents=validate_inner_monologue_contents,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
@@ -470,7 +474,11 @@ def assert_inner_monologue_is_valid(message: Message) -> None:
|
||||
raise InvalidInnerMonologueError(messages=[message], explanation=f"{phrase} is in monologue")
|
||||
|
||||
|
||||
def assert_contains_correct_inner_monologue(choice: Choice, inner_thoughts_in_kwargs: bool) -> None:
|
||||
def assert_contains_correct_inner_monologue(
|
||||
choice: Choice,
|
||||
inner_thoughts_in_kwargs: bool,
|
||||
validate_inner_monologue_contents: bool = True,
|
||||
) -> None:
|
||||
"""
|
||||
Helper function to check that the inner monologue exists and is valid.
|
||||
"""
|
||||
@@ -483,4 +491,5 @@ def assert_contains_correct_inner_monologue(choice: Choice, inner_thoughts_in_kw
|
||||
if not monologue or monologue is None or monologue == "":
|
||||
raise MissingInnerMonologueError(messages=[message])
|
||||
|
||||
assert_inner_monologue_is_valid(message)
|
||||
if validate_inner_monologue_contents:
|
||||
assert_inner_monologue_is_valid(message)
|
||||
|
||||
@@ -315,6 +315,19 @@ def test_vertex_gemini_pro_20_returns_valid_first_message():
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
|
||||
|
||||
# ======================================================================================================================
|
||||
# DEEPSEEK TESTS
|
||||
# ======================================================================================================================
|
||||
@pytest.mark.deepseek_basic
|
||||
def test_deepseek_reasoner_returns_valid_first_message():
|
||||
filename = os.path.join(llm_config_dir, "deepseek-reasoner.json")
|
||||
# Don't validate that the inner monologue doesn't contain things like "function", since
|
||||
# for the reasoners it might be quite meta (have analysis about functions etc.)
|
||||
response = check_first_response_is_valid_for_llm_endpoint(filename, validate_inner_monologue_contents=False)
|
||||
# Log out successful response
|
||||
print(f"Got successful response from client: \n\n{response}")
|
||||
|
||||
|
||||
# ======================================================================================================================
|
||||
# TOGETHER TESTS
|
||||
# ======================================================================================================================
|
||||
|
||||
@@ -4,6 +4,7 @@ from letta.schemas.providers import (
|
||||
AnthropicBedrockProvider,
|
||||
AnthropicProvider,
|
||||
AzureProvider,
|
||||
DeepSeekProvider,
|
||||
GoogleAIProvider,
|
||||
GoogleVertexProvider,
|
||||
GroqProvider,
|
||||
@@ -23,6 +24,14 @@ def test_openai():
|
||||
print(models)
|
||||
|
||||
|
||||
def test_deepseek():
|
||||
api_key = os.getenv("DEEPSEEK_API_KEY")
|
||||
assert api_key is not None
|
||||
provider = DeepSeekProvider(api_key=api_key)
|
||||
models = provider.list_llm_models()
|
||||
print(models)
|
||||
|
||||
|
||||
def test_anthropic():
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
assert api_key is not None
|
||||
|
||||
Reference in New Issue
Block a user