fix: Fix streaming when there are child tool rules (#1638)
This commit is contained in:
@@ -25,6 +25,7 @@ from letta.llm_api.aws_bedrock import get_bedrock_client
|
||||
from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
||||
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
||||
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
||||
from letta.log import get_logger
|
||||
from letta.schemas.message import Message as _Message
|
||||
from letta.schemas.message import MessageRole as _MessageRole
|
||||
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
|
||||
@@ -44,6 +45,8 @@ from letta.settings import model_settings
|
||||
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
||||
from letta.tracing import log_event
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
BASE_URL = "https://api.anthropic.com/v1"
|
||||
|
||||
|
||||
@@ -620,9 +623,9 @@ def _prepare_anthropic_request(
|
||||
data: ChatCompletionRequest,
|
||||
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
||||
# if true, prefix fill the generation with the thinking tag
|
||||
prefix_fill: bool = True,
|
||||
prefix_fill: bool = False,
|
||||
# if true, put COT inside the tool calls instead of inside the content
|
||||
put_inner_thoughts_in_kwargs: bool = False,
|
||||
put_inner_thoughts_in_kwargs: bool = True,
|
||||
bedrock: bool = False,
|
||||
# extended thinking related fields
|
||||
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
|
||||
@@ -634,7 +637,9 @@ def _prepare_anthropic_request(
|
||||
assert (
|
||||
max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
|
||||
), "max tokens must be greater than thinking budget"
|
||||
assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
|
||||
if put_inner_thoughts_in_kwargs:
|
||||
logger.warning("Extended thinking not compatible with put_inner_thoughts_in_kwargs")
|
||||
put_inner_thoughts_in_kwargs = False
|
||||
# assert not prefix_fill, "extended thinking not compatible with prefix_fill"
|
||||
# Silently disable prefix_fill for now
|
||||
prefix_fill = False
|
||||
|
||||
@@ -322,6 +322,7 @@ def create(
|
||||
|
||||
# Force tool calling
|
||||
tool_call = None
|
||||
llm_config.put_inner_thoughts_in_kwargs = True
|
||||
if functions is None:
|
||||
# Special case for summarization path
|
||||
tools = None
|
||||
@@ -356,6 +357,7 @@ def create(
|
||||
if stream: # Client requested token streaming
|
||||
assert isinstance(stream_interface, (AgentChunkStreamingInterface, AgentRefreshStreamingInterface)), type(stream_interface)
|
||||
|
||||
stream_interface.inner_thoughts_in_kwargs = True
|
||||
response = anthropic_chat_completions_process_stream(
|
||||
chat_completion_request=chat_completion_request,
|
||||
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
||||
|
||||
Reference in New Issue
Block a user