chore: clean up legacy anthropic path (#3905)

2025-08-13 15:09:13 -07:00
parent 15d8cc224c
commit edf39c565f
2 changed files with 2 additions and 414 deletions
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -7,11 +7,7 @@ import requests

 from letta.constants import CLI_WARNING_PREFIX
 from letta.errors import LettaConfigurationError, RateLimitExceededError
-from letta.llm_api.anthropic import (
-    anthropic_bedrock_chat_completions_request,
-    anthropic_chat_completions_process_stream,
-    anthropic_chat_completions_request,
-)
+from letta.llm_api.anthropic import anthropic_bedrock_chat_completions_request
 from letta.llm_api.aws_bedrock import has_valid_aws_credentials
 from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
@@ -311,89 +307,6 @@ def create(

        return response

-    elif llm_config.model_endpoint_type == "anthropic":
-        if not use_tool_naming:
-            raise NotImplementedError("Only tool calling supported on Anthropic API requests")
-
-        if llm_config.enable_reasoner:
-            llm_config.put_inner_thoughts_in_kwargs = False
-
-        # Force tool calling
-        tool_call = None
-        if functions is None:
-            # Special case for summarization path
-            tools = None
-            tool_choice = None
-        elif force_tool_call is not None:
-            # tool_call = {"type": "function", "function": {"name": force_tool_call}}
-            tool_choice = {"type": "tool", "name": force_tool_call}
-            tools = [{"type": "function", "function": f} for f in functions if f["name"] == force_tool_call]
-            assert functions is not None
-
-            # need to have this setting to be able to put inner thoughts in kwargs
-            llm_config.put_inner_thoughts_in_kwargs = True
-        else:
-            if llm_config.put_inner_thoughts_in_kwargs:
-                # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
-                tool_choice = {"type": "any", "disable_parallel_tool_use": True}
-            else:
-                tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
-            tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
-
-        chat_completion_request = ChatCompletionRequest(
-            model=llm_config.model,
-            messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
-            tools=tools,
-            tool_choice=tool_choice,
-            max_tokens=llm_config.max_tokens,  # Note: max_tokens is required for Anthropic API
-            temperature=llm_config.temperature,
-            stream=stream,
-        )
-
-        # Handle streaming
-        if stream:  # Client requested token streaming
-            assert isinstance(stream_interface, (AgentChunkStreamingInterface, AgentRefreshStreamingInterface)), type(stream_interface)
-
-            stream_interface.inner_thoughts_in_kwargs = True
-            response = anthropic_chat_completions_process_stream(
-                chat_completion_request=chat_completion_request,
-                put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
-                stream_interface=stream_interface,
-                extended_thinking=llm_config.enable_reasoner,
-                max_reasoning_tokens=llm_config.max_reasoning_tokens,
-                provider_name=llm_config.provider_name,
-                provider_category=llm_config.provider_category,
-                name=name,
-                user_id=user_id,
-            )
-
-        else:
-            # Client did not request token streaming (expect a blocking backend response)
-            response = anthropic_chat_completions_request(
-                data=chat_completion_request,
-                put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
-                extended_thinking=llm_config.enable_reasoner,
-                max_reasoning_tokens=llm_config.max_reasoning_tokens,
-                provider_name=llm_config.provider_name,
-                provider_category=llm_config.provider_category,
-                user_id=user_id,
-            )
-
-        if llm_config.put_inner_thoughts_in_kwargs:
-            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
-
-        telemetry_manager.create_provider_trace(
-            actor=actor,
-            provider_trace_create=ProviderTraceCreate(
-                request_json=chat_completion_request.model_json_schema(),
-                response_json=response.model_json_schema(),
-                step_id=step_id,
-                organization_id=actor.organization_id,
-            ),
-        )
-
-        return response
-
    # elif llm_config.model_endpoint_type == "cohere":
    #     if stream:
    #         raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")