diff --git a/fern/examples/agent_config.py b/fern/examples/agent_config.py index 09ac1234..84cb9779 100644 --- a/fern/examples/agent_config.py +++ b/fern/examples/agent_config.py @@ -32,7 +32,7 @@ azure_openai_agent = client.agents.create( # anthropic anthropic_agent = client.agents.create( - model="anthropic/claude-3-5-sonnet-20241022", + model="anthropic/claude-sonnet-4-20250514", # note: anthropic does not support embeddings so you will need another provider embedding="openai/text-embedding-3-small", # optional configuration diff --git a/fern/examples/data_sources.py b/fern/examples/data_sources.py index dc8b61f8..018bb6e0 100644 --- a/fern/examples/data_sources.py +++ b/fern/examples/data_sources.py @@ -50,7 +50,7 @@ print(f"Passages in source: {passages}") agent = client.agents.create( name="my_agent", memory_blocks=[], - model="anthropic/claude-3-5-sonnet-20241022", + model="anthropic/claude-sonnet-4-20250514", embedding=embedding_configs[0].handle, tags=["worker"], ) diff --git a/fern/examples/memory.py b/fern/examples/memory.py index 38618e5e..0b656486 100644 --- a/fern/examples/memory.py +++ b/fern/examples/memory.py @@ -8,7 +8,7 @@ agent = client.agents.create( {"label": "persona", "value": "I am a memory agent"}, {"label": "human", "value": "Name: Bob", "limit": 10000}, ], - model="anthropic/claude-3-5-sonnet-20241022", + model="anthropic/claude-sonnet-4-20250514", embedding="openai/text-embedding-3-small", tags=["worker"], ) @@ -25,7 +25,7 @@ block = client.blocks.create( shared_block_agent = client.agents.create( name="shared_block_agent", memory_blocks=[block.id], - model="anthropic/claude-3-5-sonnet-20241022", + model="anthropic/claude-sonnet-4-20250514", embedding="openai/text-embedding-3-small", tags=["worker"], ) diff --git a/fern/examples/simple_multiagent.py b/fern/examples/simple_multiagent.py index 5f6490c9..d04a4ef2 100644 --- a/fern/examples/simple_multiagent.py +++ b/fern/examples/simple_multiagent.py @@ -10,7 +10,7 @@ try: memory_blocks=[ {"label": "persona", "value": "I am the supervisor, and I can communicate with worker agents with the tag `worker`"} ], - model="anthropic/claude-3-5-sonnet-20241022", + model="anthropic/claude-sonnet-4-20250514", embedding="openai/text-embedding-3-small", tags=["supervisor"], tools=["send_message_to_agents_matching_all_tags"], @@ -28,7 +28,7 @@ try: worker_agent = client.agents.create( name="worker_agent", memory_blocks=[{"label": "persona", "value": f"I am the worker, my supervisor agent has ID {supervisor_agent.id}"}], - model="anthropic/claude-3-5-sonnet-20241022", + model="anthropic/claude-sonnet-4-20250514", embedding="openai/text-embedding-3-small", tool_ids=[tool.id], tags=["worker"], diff --git a/fern/examples/tool_rules.py b/fern/examples/tool_rules.py index 098d993d..041265a4 100644 --- a/fern/examples/tool_rules.py +++ b/fern/examples/tool_rules.py @@ -16,7 +16,7 @@ client = Letta(base_url="http://localhost:8283") search_agent = client.agents.create( name="search_agent", memory_blocks=[], - model="anthropic/claude-3-5-sonnet-20241022", + model="anthropic/claude-sonnet-4-20250514", embedding="openai/text-embedding-3-small", tags=["worker"], tool_rules=[ diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py index 8099155b..deba4b53 100644 --- a/letta/llm_api/deepseek_client.py +++ b/letta/llm_api/deepseek_client.py @@ -59,7 +59,7 @@ def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMe return assistant_message -def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]: +def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List["_Message"]: """ Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message. Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling. @@ -103,7 +103,7 @@ def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Messag def build_deepseek_chat_completions_request( llm_config: LLMConfig, - messages: List[_Message], + messages: List["_Message"], user_id: Optional[str], functions: Optional[list], function_call: Optional[str], diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py index 5a12c9c3..4f08b380 100644 --- a/letta/server/rest_api/app.py +++ b/letta/server/rest_api/app.py @@ -26,6 +26,7 @@ from letta.errors import ( AgentFileImportError, AgentNotFoundForExportError, BedrockPermissionError, + HandleNotFoundError, LettaAgentNotFoundError, LettaExpiredError, LettaInvalidArgumentError, @@ -369,6 +370,7 @@ def create_application() -> "FastAPI": app.add_exception_handler(LettaAgentNotFoundError, _error_handler_404_agent) app.add_exception_handler(LettaUserNotFoundError, _error_handler_404_user) app.add_exception_handler(AgentNotFoundForExportError, _error_handler_404) + app.add_exception_handler(HandleNotFoundError, _error_handler_404) # 410 Expired errors app.add_exception_handler(LettaExpiredError, _error_handler_410) diff --git a/letta/server/server.py b/letta/server/server.py index 064755b3..ef860f75 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -430,6 +430,11 @@ class SyncServer(object): log_event(name="start get_cached_llm_config", attributes=config_params) request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params) log_event(name="end get_cached_llm_config", attributes=config_params) + if request.model and isinstance(request.model, str): + assert request.llm_config.handle == request.model, ( + f"LLM config handle {request.llm_config.handle} does not match request handle {request.model}" + ) + print("GOT LLM CONFIG", request.llm_config) if request.reasoning is None: request.reasoning = request.llm_config.enable_reasoner or request.llm_config.put_inner_thoughts_in_kwargs @@ -1031,92 +1036,63 @@ class SyncServer(object): max_reasoning_tokens: Optional[int] = None, enable_reasoner: Optional[bool] = None, ) -> LLMConfig: - try: - provider_name, model_name = handle.split("/", 1) - provider = await self.get_provider_from_name_async(provider_name, actor) - - all_llm_configs = await provider.list_llm_models_async() - llm_configs = [config for config in all_llm_configs if config.handle == handle] - if not llm_configs: - llm_configs = [config for config in all_llm_configs if config.model == model_name] - if not llm_configs: - available_handles = [config.handle for config in all_llm_configs] - raise HandleNotFoundError(handle, available_handles) - except ValueError as e: - llm_configs = [config for config in self.get_local_llm_configs() if config.handle == handle] - if not llm_configs: - llm_configs = [config for config in self.get_local_llm_configs() if config.model == model_name] - if not llm_configs: - raise e - - if len(llm_configs) == 1: - llm_config = llm_configs[0] - elif len(llm_configs) > 1: - raise LettaInvalidArgumentError( - f"Multiple LLM models with name {model_name} supported by {provider_name}", argument_name="model_name" - ) - else: - llm_config = llm_configs[0] + """String match the `handle` to the available configs""" + matched_llm_config = None + available_handles = [] + for provider in self._enabled_providers: + llm_configs = await provider.list_llm_models_async() + for llm_config in llm_configs: + available_handles.append(llm_config.handle) + if llm_config.handle == handle: + matched_llm_config = llm_config + break + if not matched_llm_config: + raise HandleNotFoundError(handle, available_handles) if context_window_limit is not None: - if context_window_limit > llm_config.context_window: + if context_window_limit > matched_llm_config.context_window: raise LettaInvalidArgumentError( - f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})", + f"Context window limit ({context_window_limit}) is greater than maximum of ({matched_llm_config.context_window})", argument_name="context_window_limit", ) - llm_config.context_window = context_window_limit + matched_llm_config.context_window = context_window_limit else: - llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit) + matched_llm_config.context_window = min(matched_llm_config.context_window, model_settings.global_max_context_window_limit) if max_tokens is not None: - llm_config.max_tokens = max_tokens + matched_llm_config.max_tokens = max_tokens if max_reasoning_tokens is not None: if not max_tokens or max_reasoning_tokens > max_tokens: raise LettaInvalidArgumentError( f"Max reasoning tokens ({max_reasoning_tokens}) must be less than max tokens ({max_tokens})", argument_name="max_reasoning_tokens", ) - llm_config.max_reasoning_tokens = max_reasoning_tokens + matched_llm_config.max_reasoning_tokens = max_reasoning_tokens if enable_reasoner is not None: - llm_config.enable_reasoner = enable_reasoner - if enable_reasoner and llm_config.model_endpoint_type == "anthropic": - llm_config.put_inner_thoughts_in_kwargs = False + matched_llm_config.enable_reasoner = enable_reasoner + if enable_reasoner and matched_llm_config.model_endpoint_type == "anthropic": + matched_llm_config.put_inner_thoughts_in_kwargs = False - return llm_config + return matched_llm_config @trace_method async def get_embedding_config_from_handle_async( self, actor: User, handle: str, embedding_chunk_size: int = constants.DEFAULT_EMBEDDING_CHUNK_SIZE ) -> EmbeddingConfig: - try: - provider_name, model_name = handle.split("/", 1) - provider = await self.get_provider_from_name_async(provider_name, actor) - - all_embedding_configs = await provider.list_embedding_models_async() - embedding_configs = [config for config in all_embedding_configs if config.handle == handle] - if not embedding_configs: - raise LettaInvalidArgumentError( - f"Embedding model {model_name} is not supported by {provider_name}", argument_name="model_name" - ) - except LettaInvalidArgumentError as e: - # search local configs - embedding_configs = [config for config in self.get_local_embedding_configs() if config.handle == handle] - if not embedding_configs: - raise e - - if len(embedding_configs) == 1: - embedding_config = embedding_configs[0] - elif len(embedding_configs) > 1: - raise LettaInvalidArgumentError( - f"Multiple embedding models with name {model_name} supported by {provider_name}", argument_name="model_name" - ) - else: - embedding_config = embedding_configs[0] + matched_embedding_config = None + available_handles = [] + for provider in self._enabled_providers: + embedding_configs = await provider.list_embedding_models_async() + for embedding_config in embedding_configs: + available_handles.append(embedding_config.handle) + if embedding_config.handle == handle: + matched_embedding_config = embedding_config + break if embedding_chunk_size: - embedding_config.embedding_chunk_size = embedding_chunk_size + matched_embedding_config.embedding_chunk_size = embedding_chunk_size - return embedding_config + return matched_embedding_config async def get_provider_from_name_async(self, provider_name: str, actor: User) -> Provider: all_providers = await self.get_enabled_providers_async(actor) diff --git a/tests/integration_test_batch_api_cron_jobs.py b/tests/integration_test_batch_api_cron_jobs.py index b0a79e50..9826f167 100644 --- a/tests/integration_test_batch_api_cron_jobs.py +++ b/tests/integration_test_batch_api_cron_jobs.py @@ -134,7 +134,7 @@ def create_failed_response(custom_id: str) -> BetaMessageBatchIndividualResponse # --- Test Setup Helpers --- # -async def create_test_agent(name, actor, test_id: Optional[str] = None, model="anthropic/claude-3-5-sonnet-20241022"): +async def create_test_agent(name, actor, test_id: Optional[str] = None, model="anthropic/claude-sonnet-4-20250514"): """Create a test agent with standardized configuration.""" dummy_llm_config = LLMConfig( model="claude-3-7-sonnet-latest", diff --git a/tests/integration_test_voice_agent.py b/tests/integration_test_voice_agent.py index e3149966..7a593267 100644 --- a/tests/integration_test_voice_agent.py +++ b/tests/integration_test_voice_agent.py @@ -225,7 +225,7 @@ def _assert_valid_chunk(chunk, idx, chunks): @pytest.mark.asyncio(loop_scope="module") -@pytest.mark.parametrize("model", ["openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet-20241022"]) +@pytest.mark.parametrize("model", ["openai/gpt-4o-mini", "anthropic/claude-sonnet-4-20250514"]) @pytest.mark.parametrize( "message", ["How are you?", "Use the roll_dice tool to roll a die for me", "Use the run_code tool to calculate 2+2"] ) diff --git a/tests/test_letta_agent_batch.py b/tests/test_letta_agent_batch.py index b7845af8..02e16965 100644 --- a/tests/test_letta_agent_batch.py +++ b/tests/test_letta_agent_batch.py @@ -35,9 +35,9 @@ from tests.utils import create_tool_from_func # Model identifiers used in tests MODELS = { - "sonnet": "anthropic/claude-3-5-sonnet-20241022", - "haiku": "anthropic/claude-3-5-haiku-20241022", - "opus": "anthropic/claude-3-opus-20240229", + "sonnet": "anthropic/claude-sonnet-4-20250514", + "haiku": "anthropic/claude-haiku-4-5-20251001", + "opus": "anthropic/claude-opus-4-1-20250805", } # Expected message roles in batch requests @@ -933,9 +933,9 @@ async def test_step_until_request_prepares_and_submits_batch_correctly( # Map of agent IDs to their expected models expected_models = { - agent_sonnet.id: "claude-3-5-sonnet-20241022", - agent_haiku.id: "claude-3-5-haiku-20241022", - agent_opus.id: "claude-3-opus-20240229", + agent_sonnet.id: "claude-sonnet-4-20250514", + agent_haiku.id: "claude-haiku-4-5-20251001", + agent_opus.id: "claude-opus-4-1-20250805", } # Set up spy function for the Anthropic client diff --git a/tests/test_llm_clients.py b/tests/test_llm_clients.py index 8ce7ebb3..311b776e 100644 --- a/tests/test_llm_clients.py +++ b/tests/test_llm_clients.py @@ -17,7 +17,7 @@ def llm_config(): model_endpoint_type="anthropic", model_endpoint="https://api.anthropic.com/v1", context_window=32000, - handle="anthropic/claude-3-5-sonnet-20241022", + handle="anthropic/claude-sonnet-4-20250514", put_inner_thoughts_in_kwargs=False, max_tokens=4096, enable_reasoner=True, diff --git a/tests/test_sdk_client.py b/tests/test_sdk_client.py index a7196c7c..97e7f5d8 100644 --- a/tests/test_sdk_client.py +++ b/tests/test_sdk_client.py @@ -2331,7 +2331,7 @@ def test_create_agent(client: LettaSDKClient) -> None: label="human", ) ], - model="anthropic/claude-3-5-sonnet-20241022", + model="anthropic/claude-sonnet-4-20250514", embedding="openai/text-embedding-ada-002", ) assert agent is not None diff --git a/tests/test_sonnet_nonnative_reasoning_buffering.py b/tests/test_sonnet_nonnative_reasoning_buffering.py index ed7628af..7ca306dd 100755 --- a/tests/test_sonnet_nonnative_reasoning_buffering.py +++ b/tests/test_sonnet_nonnative_reasoning_buffering.py @@ -126,7 +126,7 @@ def detect_burst_chunks(chunks: List[Tuple[float, any]], burst_threshold: float @pytest.mark.parametrize( "model,expected_buffering", [ - ("anthropic/claude-3-5-sonnet-20241022", False), # With fine-grained streaming beta, should stream better + ("anthropic/claude-sonnet-4-20250514", False), # With fine-grained streaming beta, should stream better ("anthropic/claude-sonnet-4-20250514", False), # Sonnet 4 should NOT show buffering (has native reasoning) ("openai/gpt-4.1", False), # GPT-4.1 should NOT show buffering (uses native reasoning) ],