diff --git a/fern/examples/agent_config.py b/fern/examples/agent_config.py
index 09ac1234..84cb9779 100644
--- a/fern/examples/agent_config.py
+++ b/fern/examples/agent_config.py
@@ -32,7 +32,7 @@ azure_openai_agent = client.agents.create(
 
 # anthropic
 anthropic_agent = client.agents.create(
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-20250514",
     # note: anthropic does not support embeddings so you will need another provider
     embedding="openai/text-embedding-3-small",
     # optional configuration
diff --git a/fern/examples/data_sources.py b/fern/examples/data_sources.py
index dc8b61f8..018bb6e0 100644
--- a/fern/examples/data_sources.py
+++ b/fern/examples/data_sources.py
@@ -50,7 +50,7 @@ print(f"Passages in source: {passages}")
 agent = client.agents.create(
     name="my_agent",
     memory_blocks=[],
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-20250514",
     embedding=embedding_configs[0].handle,
     tags=["worker"],
 )
diff --git a/fern/examples/memory.py b/fern/examples/memory.py
index 38618e5e..0b656486 100644
--- a/fern/examples/memory.py
+++ b/fern/examples/memory.py
@@ -8,7 +8,7 @@ agent = client.agents.create(
         {"label": "persona", "value": "I am a memory agent"},
         {"label": "human", "value": "Name: Bob", "limit": 10000},
     ],
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-20250514",
     embedding="openai/text-embedding-3-small",
     tags=["worker"],
 )
@@ -25,7 +25,7 @@ block = client.blocks.create(
 shared_block_agent = client.agents.create(
     name="shared_block_agent",
     memory_blocks=[block.id],
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-20250514",
     embedding="openai/text-embedding-3-small",
     tags=["worker"],
 )
diff --git a/fern/examples/simple_multiagent.py b/fern/examples/simple_multiagent.py
index 5f6490c9..d04a4ef2 100644
--- a/fern/examples/simple_multiagent.py
+++ b/fern/examples/simple_multiagent.py
@@ -10,7 +10,7 @@ try:
         memory_blocks=[
             {"label": "persona", "value": "I am the supervisor, and I can communicate with worker agents with the tag `worker`"}
         ],
-        model="anthropic/claude-3-5-sonnet-20241022",
+        model="anthropic/claude-sonnet-4-20250514",
         embedding="openai/text-embedding-3-small",
         tags=["supervisor"],
         tools=["send_message_to_agents_matching_all_tags"],
@@ -28,7 +28,7 @@ try:
     worker_agent = client.agents.create(
         name="worker_agent",
         memory_blocks=[{"label": "persona", "value": f"I am the worker, my supervisor agent has ID {supervisor_agent.id}"}],
-        model="anthropic/claude-3-5-sonnet-20241022",
+        model="anthropic/claude-sonnet-4-20250514",
         embedding="openai/text-embedding-3-small",
         tool_ids=[tool.id],
         tags=["worker"],
diff --git a/fern/examples/tool_rules.py b/fern/examples/tool_rules.py
index 098d993d..041265a4 100644
--- a/fern/examples/tool_rules.py
+++ b/fern/examples/tool_rules.py
@@ -16,7 +16,7 @@ client = Letta(base_url="http://localhost:8283")
 search_agent = client.agents.create(
     name="search_agent",
     memory_blocks=[],
-    model="anthropic/claude-3-5-sonnet-20241022",
+    model="anthropic/claude-sonnet-4-20250514",
     embedding="openai/text-embedding-3-small",
     tags=["worker"],
     tool_rules=[
diff --git a/letta/llm_api/deepseek_client.py b/letta/llm_api/deepseek_client.py
index 8099155b..deba4b53 100644
--- a/letta/llm_api/deepseek_client.py
+++ b/letta/llm_api/deepseek_client.py
@@ -59,7 +59,7 @@ def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMe
     return assistant_message
 
 
-def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
+def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List["_Message"]:
     """
     Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
     Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
@@ -103,7 +103,7 @@ def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Messag
 
 def build_deepseek_chat_completions_request(
     llm_config: LLMConfig,
-    messages: List[_Message],
+    messages: List["_Message"],
     user_id: Optional[str],
     functions: Optional[list],
     function_call: Optional[str],
diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py
index 5a12c9c3..4f08b380 100644
--- a/letta/server/rest_api/app.py
+++ b/letta/server/rest_api/app.py
@@ -26,6 +26,7 @@ from letta.errors import (
     AgentFileImportError,
     AgentNotFoundForExportError,
     BedrockPermissionError,
+    HandleNotFoundError,
     LettaAgentNotFoundError,
     LettaExpiredError,
     LettaInvalidArgumentError,
@@ -369,6 +370,7 @@ def create_application() -> "FastAPI":
     app.add_exception_handler(LettaAgentNotFoundError, _error_handler_404_agent)
     app.add_exception_handler(LettaUserNotFoundError, _error_handler_404_user)
     app.add_exception_handler(AgentNotFoundForExportError, _error_handler_404)
+    app.add_exception_handler(HandleNotFoundError, _error_handler_404)
 
     # 410 Expired errors
     app.add_exception_handler(LettaExpiredError, _error_handler_410)
diff --git a/letta/server/server.py b/letta/server/server.py
index 064755b3..ef860f75 100644
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -430,6 +430,11 @@ class SyncServer(object):
             log_event(name="start get_cached_llm_config", attributes=config_params)
             request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params)
             log_event(name="end get_cached_llm_config", attributes=config_params)
+            if request.model and isinstance(request.model, str):
+                assert request.llm_config.handle == request.model, (
+                    f"LLM config handle {request.llm_config.handle} does not match request handle {request.model}"
+                )
+        print("GOT LLM CONFIG", request.llm_config)
 
         if request.reasoning is None:
             request.reasoning = request.llm_config.enable_reasoner or request.llm_config.put_inner_thoughts_in_kwargs
@@ -1031,92 +1036,63 @@ class SyncServer(object):
         max_reasoning_tokens: Optional[int] = None,
         enable_reasoner: Optional[bool] = None,
     ) -> LLMConfig:
-        try:
-            provider_name, model_name = handle.split("/", 1)
-            provider = await self.get_provider_from_name_async(provider_name, actor)
-
-            all_llm_configs = await provider.list_llm_models_async()
-            llm_configs = [config for config in all_llm_configs if config.handle == handle]
-            if not llm_configs:
-                llm_configs = [config for config in all_llm_configs if config.model == model_name]
-            if not llm_configs:
-                available_handles = [config.handle for config in all_llm_configs]
-                raise HandleNotFoundError(handle, available_handles)
-        except ValueError as e:
-            llm_configs = [config for config in self.get_local_llm_configs() if config.handle == handle]
-            if not llm_configs:
-                llm_configs = [config for config in self.get_local_llm_configs() if config.model == model_name]
-            if not llm_configs:
-                raise e
-
-        if len(llm_configs) == 1:
-            llm_config = llm_configs[0]
-        elif len(llm_configs) > 1:
-            raise LettaInvalidArgumentError(
-                f"Multiple LLM models with name {model_name} supported by {provider_name}", argument_name="model_name"
-            )
-        else:
-            llm_config = llm_configs[0]
+        """String match the `handle` to the available configs"""
+        matched_llm_config = None
+        available_handles = []
+        for provider in self._enabled_providers:
+            llm_configs = await provider.list_llm_models_async()
+            for llm_config in llm_configs:
+                available_handles.append(llm_config.handle)
+                if llm_config.handle == handle:
+                    matched_llm_config = llm_config
+                    break
+        if not matched_llm_config:
+            raise HandleNotFoundError(handle, available_handles)
 
         if context_window_limit is not None:
-            if context_window_limit > llm_config.context_window:
+            if context_window_limit > matched_llm_config.context_window:
                 raise LettaInvalidArgumentError(
-                    f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})",
+                    f"Context window limit ({context_window_limit}) is greater than maximum of ({matched_llm_config.context_window})",
                     argument_name="context_window_limit",
                 )
-            llm_config.context_window = context_window_limit
+            matched_llm_config.context_window = context_window_limit
         else:
-            llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
+            matched_llm_config.context_window = min(matched_llm_config.context_window, model_settings.global_max_context_window_limit)
 
         if max_tokens is not None:
-            llm_config.max_tokens = max_tokens
+            matched_llm_config.max_tokens = max_tokens
         if max_reasoning_tokens is not None:
             if not max_tokens or max_reasoning_tokens > max_tokens:
                 raise LettaInvalidArgumentError(
                     f"Max reasoning tokens ({max_reasoning_tokens}) must be less than max tokens ({max_tokens})",
                     argument_name="max_reasoning_tokens",
                 )
-            llm_config.max_reasoning_tokens = max_reasoning_tokens
+            matched_llm_config.max_reasoning_tokens = max_reasoning_tokens
         if enable_reasoner is not None:
-            llm_config.enable_reasoner = enable_reasoner
-            if enable_reasoner and llm_config.model_endpoint_type == "anthropic":
-                llm_config.put_inner_thoughts_in_kwargs = False
+            matched_llm_config.enable_reasoner = enable_reasoner
+            if enable_reasoner and matched_llm_config.model_endpoint_type == "anthropic":
+                matched_llm_config.put_inner_thoughts_in_kwargs = False
 
-        return llm_config
+        return matched_llm_config
 
     @trace_method
     async def get_embedding_config_from_handle_async(
         self, actor: User, handle: str, embedding_chunk_size: int = constants.DEFAULT_EMBEDDING_CHUNK_SIZE
     ) -> EmbeddingConfig:
-        try:
-            provider_name, model_name = handle.split("/", 1)
-            provider = await self.get_provider_from_name_async(provider_name, actor)
-
-            all_embedding_configs = await provider.list_embedding_models_async()
-            embedding_configs = [config for config in all_embedding_configs if config.handle == handle]
-            if not embedding_configs:
-                raise LettaInvalidArgumentError(
-                    f"Embedding model {model_name} is not supported by {provider_name}", argument_name="model_name"
-                )
-        except LettaInvalidArgumentError as e:
-            # search local configs
-            embedding_configs = [config for config in self.get_local_embedding_configs() if config.handle == handle]
-            if not embedding_configs:
-                raise e
-
-        if len(embedding_configs) == 1:
-            embedding_config = embedding_configs[0]
-        elif len(embedding_configs) > 1:
-            raise LettaInvalidArgumentError(
-                f"Multiple embedding models with name {model_name} supported by {provider_name}", argument_name="model_name"
-            )
-        else:
-            embedding_config = embedding_configs[0]
+        matched_embedding_config = None
+        available_handles = []
+        for provider in self._enabled_providers:
+            embedding_configs = await provider.list_embedding_models_async()
+            for embedding_config in embedding_configs:
+                available_handles.append(embedding_config.handle)
+                if embedding_config.handle == handle:
+                    matched_embedding_config = embedding_config
+                    break
 
         if embedding_chunk_size:
-            embedding_config.embedding_chunk_size = embedding_chunk_size
+            matched_embedding_config.embedding_chunk_size = embedding_chunk_size
 
-        return embedding_config
+        return matched_embedding_config
 
     async def get_provider_from_name_async(self, provider_name: str, actor: User) -> Provider:
         all_providers = await self.get_enabled_providers_async(actor)
diff --git a/tests/integration_test_batch_api_cron_jobs.py b/tests/integration_test_batch_api_cron_jobs.py
index b0a79e50..9826f167 100644
--- a/tests/integration_test_batch_api_cron_jobs.py
+++ b/tests/integration_test_batch_api_cron_jobs.py
@@ -134,7 +134,7 @@ def create_failed_response(custom_id: str) -> BetaMessageBatchIndividualResponse
 # --- Test Setup Helpers --- #
 
 
-async def create_test_agent(name, actor, test_id: Optional[str] = None, model="anthropic/claude-3-5-sonnet-20241022"):
+async def create_test_agent(name, actor, test_id: Optional[str] = None, model="anthropic/claude-sonnet-4-20250514"):
     """Create a test agent with standardized configuration."""
     dummy_llm_config = LLMConfig(
         model="claude-3-7-sonnet-latest",
diff --git a/tests/integration_test_voice_agent.py b/tests/integration_test_voice_agent.py
index e3149966..7a593267 100644
--- a/tests/integration_test_voice_agent.py
+++ b/tests/integration_test_voice_agent.py
@@ -225,7 +225,7 @@ def _assert_valid_chunk(chunk, idx, chunks):
 
 
 @pytest.mark.asyncio(loop_scope="module")
-@pytest.mark.parametrize("model", ["openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet-20241022"])
+@pytest.mark.parametrize("model", ["openai/gpt-4o-mini", "anthropic/claude-sonnet-4-20250514"])
 @pytest.mark.parametrize(
     "message", ["How are you?", "Use the roll_dice tool to roll a die for me", "Use the run_code tool to calculate 2+2"]
 )
diff --git a/tests/test_letta_agent_batch.py b/tests/test_letta_agent_batch.py
index b7845af8..02e16965 100644
--- a/tests/test_letta_agent_batch.py
+++ b/tests/test_letta_agent_batch.py
@@ -35,9 +35,9 @@ from tests.utils import create_tool_from_func
 
 # Model identifiers used in tests
 MODELS = {
-    "sonnet": "anthropic/claude-3-5-sonnet-20241022",
-    "haiku": "anthropic/claude-3-5-haiku-20241022",
-    "opus": "anthropic/claude-3-opus-20240229",
+    "sonnet": "anthropic/claude-sonnet-4-20250514",
+    "haiku": "anthropic/claude-haiku-4-5-20251001",
+    "opus": "anthropic/claude-opus-4-1-20250805",
 }
 
 # Expected message roles in batch requests
@@ -933,9 +933,9 @@ async def test_step_until_request_prepares_and_submits_batch_correctly(
 
     # Map of agent IDs to their expected models
     expected_models = {
-        agent_sonnet.id: "claude-3-5-sonnet-20241022",
-        agent_haiku.id: "claude-3-5-haiku-20241022",
-        agent_opus.id: "claude-3-opus-20240229",
+        agent_sonnet.id: "claude-sonnet-4-20250514",
+        agent_haiku.id: "claude-haiku-4-5-20251001",
+        agent_opus.id: "claude-opus-4-1-20250805",
     }
 
     # Set up spy function for the Anthropic client
diff --git a/tests/test_llm_clients.py b/tests/test_llm_clients.py
index 8ce7ebb3..311b776e 100644
--- a/tests/test_llm_clients.py
+++ b/tests/test_llm_clients.py
@@ -17,7 +17,7 @@ def llm_config():
         model_endpoint_type="anthropic",
         model_endpoint="https://api.anthropic.com/v1",
         context_window=32000,
-        handle="anthropic/claude-3-5-sonnet-20241022",
+        handle="anthropic/claude-sonnet-4-20250514",
         put_inner_thoughts_in_kwargs=False,
         max_tokens=4096,
         enable_reasoner=True,
diff --git a/tests/test_sdk_client.py b/tests/test_sdk_client.py
index a7196c7c..97e7f5d8 100644
--- a/tests/test_sdk_client.py
+++ b/tests/test_sdk_client.py
@@ -2331,7 +2331,7 @@ def test_create_agent(client: LettaSDKClient) -> None:
                 label="human",
             )
         ],
-        model="anthropic/claude-3-5-sonnet-20241022",
+        model="anthropic/claude-sonnet-4-20250514",
         embedding="openai/text-embedding-ada-002",
     )
     assert agent is not None
diff --git a/tests/test_sonnet_nonnative_reasoning_buffering.py b/tests/test_sonnet_nonnative_reasoning_buffering.py
index ed7628af..7ca306dd 100755
--- a/tests/test_sonnet_nonnative_reasoning_buffering.py
+++ b/tests/test_sonnet_nonnative_reasoning_buffering.py
@@ -126,7 +126,7 @@ def detect_burst_chunks(chunks: List[Tuple[float, any]], burst_threshold: float
 @pytest.mark.parametrize(
     "model,expected_buffering",
     [
-        ("anthropic/claude-3-5-sonnet-20241022", False),  # With fine-grained streaming beta, should stream better
+        ("anthropic/claude-sonnet-4-20250514", False),  # With fine-grained streaming beta, should stream better
         ("anthropic/claude-sonnet-4-20250514", False),  # Sonnet 4 should NOT show buffering (has native reasoning)
         ("openai/gpt-4.1", False),  # GPT-4.1 should NOT show buffering (uses native reasoning)
     ],