chore: clean up legacy anthropic path (#3905)
This commit is contained in:
@@ -7,11 +7,7 @@ import requests
|
||||
|
||||
from letta.constants import CLI_WARNING_PREFIX
|
||||
from letta.errors import LettaConfigurationError, RateLimitExceededError
|
||||
from letta.llm_api.anthropic import (
|
||||
anthropic_bedrock_chat_completions_request,
|
||||
anthropic_chat_completions_process_stream,
|
||||
anthropic_chat_completions_request,
|
||||
)
|
||||
from letta.llm_api.anthropic import anthropic_bedrock_chat_completions_request
|
||||
from letta.llm_api.aws_bedrock import has_valid_aws_credentials
|
||||
from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
|
||||
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
||||
@@ -311,89 +307,6 @@ def create(
|
||||
|
||||
return response
|
||||
|
||||
elif llm_config.model_endpoint_type == "anthropic":
|
||||
if not use_tool_naming:
|
||||
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
||||
|
||||
if llm_config.enable_reasoner:
|
||||
llm_config.put_inner_thoughts_in_kwargs = False
|
||||
|
||||
# Force tool calling
|
||||
tool_call = None
|
||||
if functions is None:
|
||||
# Special case for summarization path
|
||||
tools = None
|
||||
tool_choice = None
|
||||
elif force_tool_call is not None:
|
||||
# tool_call = {"type": "function", "function": {"name": force_tool_call}}
|
||||
tool_choice = {"type": "tool", "name": force_tool_call}
|
||||
tools = [{"type": "function", "function": f} for f in functions if f["name"] == force_tool_call]
|
||||
assert functions is not None
|
||||
|
||||
# need to have this setting to be able to put inner thoughts in kwargs
|
||||
llm_config.put_inner_thoughts_in_kwargs = True
|
||||
else:
|
||||
if llm_config.put_inner_thoughts_in_kwargs:
|
||||
# tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
|
||||
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
||||
else:
|
||||
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
||||
tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
|
||||
|
||||
chat_completion_request = ChatCompletionRequest(
|
||||
model=llm_config.model,
|
||||
messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
|
||||
tools=tools,
|
||||
tool_choice=tool_choice,
|
||||
max_tokens=llm_config.max_tokens, # Note: max_tokens is required for Anthropic API
|
||||
temperature=llm_config.temperature,
|
||||
stream=stream,
|
||||
)
|
||||
|
||||
# Handle streaming
|
||||
if stream: # Client requested token streaming
|
||||
assert isinstance(stream_interface, (AgentChunkStreamingInterface, AgentRefreshStreamingInterface)), type(stream_interface)
|
||||
|
||||
stream_interface.inner_thoughts_in_kwargs = True
|
||||
response = anthropic_chat_completions_process_stream(
|
||||
chat_completion_request=chat_completion_request,
|
||||
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
||||
stream_interface=stream_interface,
|
||||
extended_thinking=llm_config.enable_reasoner,
|
||||
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
||||
provider_name=llm_config.provider_name,
|
||||
provider_category=llm_config.provider_category,
|
||||
name=name,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
else:
|
||||
# Client did not request token streaming (expect a blocking backend response)
|
||||
response = anthropic_chat_completions_request(
|
||||
data=chat_completion_request,
|
||||
put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
||||
extended_thinking=llm_config.enable_reasoner,
|
||||
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
||||
provider_name=llm_config.provider_name,
|
||||
provider_category=llm_config.provider_category,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
if llm_config.put_inner_thoughts_in_kwargs:
|
||||
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
||||
|
||||
telemetry_manager.create_provider_trace(
|
||||
actor=actor,
|
||||
provider_trace_create=ProviderTraceCreate(
|
||||
request_json=chat_completion_request.model_json_schema(),
|
||||
response_json=response.model_json_schema(),
|
||||
step_id=step_id,
|
||||
organization_id=actor.organization_id,
|
||||
),
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
# elif llm_config.model_endpoint_type == "cohere":
|
||||
# if stream:
|
||||
# raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
||||
|
||||
Reference in New Issue
Block a user