feat: fix streaming put_inner_thoughts_in_kwargs (#1913)

This commit is contained in:
Charles Packer
2024-10-21 17:07:20 -07:00
committed by GitHub
parent e940511a6f
commit 1a93b85bfd
6 changed files with 677 additions and 126 deletions

View File

@@ -9,7 +9,11 @@ from httpx_sse._exceptions import SSEError
from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
from letta.errors import LLMError
from letta.llm_api.helpers import add_inner_thoughts_to_functions, make_post_request
from letta.llm_api.helpers import (
add_inner_thoughts_to_functions,
convert_to_structured_output,
make_post_request,
)
from letta.local_llm.constants import (
INNER_THOUGHTS_KWARG,
INNER_THOUGHTS_KWARG_DESCRIPTION,
@@ -112,7 +116,7 @@ def build_openai_chat_completions_request(
use_tool_naming: bool,
max_tokens: Optional[int],
) -> ChatCompletionRequest:
if llm_config.put_inner_thoughts_in_kwargs:
if functions and llm_config.put_inner_thoughts_in_kwargs:
functions = add_inner_thoughts_to_functions(
functions=functions,
inner_thoughts_key=INNER_THOUGHTS_KWARG,
@@ -154,8 +158,8 @@ def build_openai_chat_completions_request(
)
# https://platform.openai.com/docs/guides/text-generation/json-mode
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
data.response_format = {"type": "json_object"}
# if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
# data.response_format = {"type": "json_object"}
if "inference.memgpt.ai" in llm_config.model_endpoint:
# override user id for inference.memgpt.ai
@@ -362,6 +366,8 @@ def openai_chat_completions_process_stream(
chat_completion_response.usage.completion_tokens = n_chunks
chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
assert len(chat_completion_response.choices) > 0, chat_completion_response
# printd(chat_completion_response)
return chat_completion_response
@@ -461,6 +467,13 @@ def openai_chat_completions_request_stream(
data.pop("tools")
data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
if "tools" in data:
for tool in data["tools"]:
# tool["strict"] = True
tool["function"] = convert_to_structured_output(tool["function"])
# print(f"\n\n\n\nData[tools]: {json.dumps(data['tools'], indent=2)}")
printd(f"Sending request to {url}")
try:
return _sse_post(url=url, data=data, headers=headers)