feat: patch model listing to actually match handle [LET-5888] (#5754)

This commit is contained in:
Sarah Wooders
2025-10-27 15:30:48 -07:00
committed by Caren Thomas
parent 042a32d371
commit e7fff12da0
14 changed files with 60 additions and 82 deletions

View File

@@ -32,7 +32,7 @@ azure_openai_agent = client.agents.create(
# anthropic
anthropic_agent = client.agents.create(
model="anthropic/claude-3-5-sonnet-20241022",
model="anthropic/claude-sonnet-4-20250514",
# note: anthropic does not support embeddings so you will need another provider
embedding="openai/text-embedding-3-small",
# optional configuration

View File

@@ -50,7 +50,7 @@ print(f"Passages in source: {passages}")
agent = client.agents.create(
name="my_agent",
memory_blocks=[],
model="anthropic/claude-3-5-sonnet-20241022",
model="anthropic/claude-sonnet-4-20250514",
embedding=embedding_configs[0].handle,
tags=["worker"],
)

View File

@@ -8,7 +8,7 @@ agent = client.agents.create(
{"label": "persona", "value": "I am a memory agent"},
{"label": "human", "value": "Name: Bob", "limit": 10000},
],
model="anthropic/claude-3-5-sonnet-20241022",
model="anthropic/claude-sonnet-4-20250514",
embedding="openai/text-embedding-3-small",
tags=["worker"],
)
@@ -25,7 +25,7 @@ block = client.blocks.create(
shared_block_agent = client.agents.create(
name="shared_block_agent",
memory_blocks=[block.id],
model="anthropic/claude-3-5-sonnet-20241022",
model="anthropic/claude-sonnet-4-20250514",
embedding="openai/text-embedding-3-small",
tags=["worker"],
)

View File

@@ -10,7 +10,7 @@ try:
memory_blocks=[
{"label": "persona", "value": "I am the supervisor, and I can communicate with worker agents with the tag `worker`"}
],
model="anthropic/claude-3-5-sonnet-20241022",
model="anthropic/claude-sonnet-4-20250514",
embedding="openai/text-embedding-3-small",
tags=["supervisor"],
tools=["send_message_to_agents_matching_all_tags"],
@@ -28,7 +28,7 @@ try:
worker_agent = client.agents.create(
name="worker_agent",
memory_blocks=[{"label": "persona", "value": f"I am the worker, my supervisor agent has ID {supervisor_agent.id}"}],
model="anthropic/claude-3-5-sonnet-20241022",
model="anthropic/claude-sonnet-4-20250514",
embedding="openai/text-embedding-3-small",
tool_ids=[tool.id],
tags=["worker"],

View File

@@ -16,7 +16,7 @@ client = Letta(base_url="http://localhost:8283")
search_agent = client.agents.create(
name="search_agent",
memory_blocks=[],
model="anthropic/claude-3-5-sonnet-20241022",
model="anthropic/claude-sonnet-4-20250514",
embedding="openai/text-embedding-3-small",
tags=["worker"],
tool_rules=[

View File

@@ -59,7 +59,7 @@ def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMe
return assistant_message
def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List["_Message"]:
"""
Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
@@ -103,7 +103,7 @@ def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Messag
def build_deepseek_chat_completions_request(
llm_config: LLMConfig,
messages: List[_Message],
messages: List["_Message"],
user_id: Optional[str],
functions: Optional[list],
function_call: Optional[str],

View File

@@ -26,6 +26,7 @@ from letta.errors import (
AgentFileImportError,
AgentNotFoundForExportError,
BedrockPermissionError,
HandleNotFoundError,
LettaAgentNotFoundError,
LettaExpiredError,
LettaInvalidArgumentError,
@@ -369,6 +370,7 @@ def create_application() -> "FastAPI":
app.add_exception_handler(LettaAgentNotFoundError, _error_handler_404_agent)
app.add_exception_handler(LettaUserNotFoundError, _error_handler_404_user)
app.add_exception_handler(AgentNotFoundForExportError, _error_handler_404)
app.add_exception_handler(HandleNotFoundError, _error_handler_404)
# 410 Expired errors
app.add_exception_handler(LettaExpiredError, _error_handler_410)

View File

@@ -430,6 +430,11 @@ class SyncServer(object):
log_event(name="start get_cached_llm_config", attributes=config_params)
request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params)
log_event(name="end get_cached_llm_config", attributes=config_params)
if request.model and isinstance(request.model, str):
assert request.llm_config.handle == request.model, (
f"LLM config handle {request.llm_config.handle} does not match request handle {request.model}"
)
print("GOT LLM CONFIG", request.llm_config)
if request.reasoning is None:
request.reasoning = request.llm_config.enable_reasoner or request.llm_config.put_inner_thoughts_in_kwargs
@@ -1031,92 +1036,63 @@ class SyncServer(object):
max_reasoning_tokens: Optional[int] = None,
enable_reasoner: Optional[bool] = None,
) -> LLMConfig:
try:
provider_name, model_name = handle.split("/", 1)
provider = await self.get_provider_from_name_async(provider_name, actor)
all_llm_configs = await provider.list_llm_models_async()
llm_configs = [config for config in all_llm_configs if config.handle == handle]
if not llm_configs:
llm_configs = [config for config in all_llm_configs if config.model == model_name]
if not llm_configs:
available_handles = [config.handle for config in all_llm_configs]
raise HandleNotFoundError(handle, available_handles)
except ValueError as e:
llm_configs = [config for config in self.get_local_llm_configs() if config.handle == handle]
if not llm_configs:
llm_configs = [config for config in self.get_local_llm_configs() if config.model == model_name]
if not llm_configs:
raise e
if len(llm_configs) == 1:
llm_config = llm_configs[0]
elif len(llm_configs) > 1:
raise LettaInvalidArgumentError(
f"Multiple LLM models with name {model_name} supported by {provider_name}", argument_name="model_name"
)
else:
llm_config = llm_configs[0]
"""String match the `handle` to the available configs"""
matched_llm_config = None
available_handles = []
for provider in self._enabled_providers:
llm_configs = await provider.list_llm_models_async()
for llm_config in llm_configs:
available_handles.append(llm_config.handle)
if llm_config.handle == handle:
matched_llm_config = llm_config
break
if not matched_llm_config:
raise HandleNotFoundError(handle, available_handles)
if context_window_limit is not None:
if context_window_limit > llm_config.context_window:
if context_window_limit > matched_llm_config.context_window:
raise LettaInvalidArgumentError(
f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})",
f"Context window limit ({context_window_limit}) is greater than maximum of ({matched_llm_config.context_window})",
argument_name="context_window_limit",
)
llm_config.context_window = context_window_limit
matched_llm_config.context_window = context_window_limit
else:
llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
matched_llm_config.context_window = min(matched_llm_config.context_window, model_settings.global_max_context_window_limit)
if max_tokens is not None:
llm_config.max_tokens = max_tokens
matched_llm_config.max_tokens = max_tokens
if max_reasoning_tokens is not None:
if not max_tokens or max_reasoning_tokens > max_tokens:
raise LettaInvalidArgumentError(
f"Max reasoning tokens ({max_reasoning_tokens}) must be less than max tokens ({max_tokens})",
argument_name="max_reasoning_tokens",
)
llm_config.max_reasoning_tokens = max_reasoning_tokens
matched_llm_config.max_reasoning_tokens = max_reasoning_tokens
if enable_reasoner is not None:
llm_config.enable_reasoner = enable_reasoner
if enable_reasoner and llm_config.model_endpoint_type == "anthropic":
llm_config.put_inner_thoughts_in_kwargs = False
matched_llm_config.enable_reasoner = enable_reasoner
if enable_reasoner and matched_llm_config.model_endpoint_type == "anthropic":
matched_llm_config.put_inner_thoughts_in_kwargs = False
return llm_config
return matched_llm_config
@trace_method
async def get_embedding_config_from_handle_async(
self, actor: User, handle: str, embedding_chunk_size: int = constants.DEFAULT_EMBEDDING_CHUNK_SIZE
) -> EmbeddingConfig:
try:
provider_name, model_name = handle.split("/", 1)
provider = await self.get_provider_from_name_async(provider_name, actor)
all_embedding_configs = await provider.list_embedding_models_async()
embedding_configs = [config for config in all_embedding_configs if config.handle == handle]
if not embedding_configs:
raise LettaInvalidArgumentError(
f"Embedding model {model_name} is not supported by {provider_name}", argument_name="model_name"
)
except LettaInvalidArgumentError as e:
# search local configs
embedding_configs = [config for config in self.get_local_embedding_configs() if config.handle == handle]
if not embedding_configs:
raise e
if len(embedding_configs) == 1:
embedding_config = embedding_configs[0]
elif len(embedding_configs) > 1:
raise LettaInvalidArgumentError(
f"Multiple embedding models with name {model_name} supported by {provider_name}", argument_name="model_name"
)
else:
embedding_config = embedding_configs[0]
matched_embedding_config = None
available_handles = []
for provider in self._enabled_providers:
embedding_configs = await provider.list_embedding_models_async()
for embedding_config in embedding_configs:
available_handles.append(embedding_config.handle)
if embedding_config.handle == handle:
matched_embedding_config = embedding_config
break
if embedding_chunk_size:
embedding_config.embedding_chunk_size = embedding_chunk_size
matched_embedding_config.embedding_chunk_size = embedding_chunk_size
return embedding_config
return matched_embedding_config
async def get_provider_from_name_async(self, provider_name: str, actor: User) -> Provider:
all_providers = await self.get_enabled_providers_async(actor)

View File

@@ -134,7 +134,7 @@ def create_failed_response(custom_id: str) -> BetaMessageBatchIndividualResponse
# --- Test Setup Helpers --- #
async def create_test_agent(name, actor, test_id: Optional[str] = None, model="anthropic/claude-3-5-sonnet-20241022"):
async def create_test_agent(name, actor, test_id: Optional[str] = None, model="anthropic/claude-sonnet-4-20250514"):
"""Create a test agent with standardized configuration."""
dummy_llm_config = LLMConfig(
model="claude-3-7-sonnet-latest",

View File

@@ -225,7 +225,7 @@ def _assert_valid_chunk(chunk, idx, chunks):
@pytest.mark.asyncio(loop_scope="module")
@pytest.mark.parametrize("model", ["openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet-20241022"])
@pytest.mark.parametrize("model", ["openai/gpt-4o-mini", "anthropic/claude-sonnet-4-20250514"])
@pytest.mark.parametrize(
"message", ["How are you?", "Use the roll_dice tool to roll a die for me", "Use the run_code tool to calculate 2+2"]
)

View File

@@ -35,9 +35,9 @@ from tests.utils import create_tool_from_func
# Model identifiers used in tests
MODELS = {
"sonnet": "anthropic/claude-3-5-sonnet-20241022",
"haiku": "anthropic/claude-3-5-haiku-20241022",
"opus": "anthropic/claude-3-opus-20240229",
"sonnet": "anthropic/claude-sonnet-4-20250514",
"haiku": "anthropic/claude-haiku-4-5-20251001",
"opus": "anthropic/claude-opus-4-1-20250805",
}
# Expected message roles in batch requests
@@ -933,9 +933,9 @@ async def test_step_until_request_prepares_and_submits_batch_correctly(
# Map of agent IDs to their expected models
expected_models = {
agent_sonnet.id: "claude-3-5-sonnet-20241022",
agent_haiku.id: "claude-3-5-haiku-20241022",
agent_opus.id: "claude-3-opus-20240229",
agent_sonnet.id: "claude-sonnet-4-20250514",
agent_haiku.id: "claude-haiku-4-5-20251001",
agent_opus.id: "claude-opus-4-1-20250805",
}
# Set up spy function for the Anthropic client

View File

@@ -17,7 +17,7 @@ def llm_config():
model_endpoint_type="anthropic",
model_endpoint="https://api.anthropic.com/v1",
context_window=32000,
handle="anthropic/claude-3-5-sonnet-20241022",
handle="anthropic/claude-sonnet-4-20250514",
put_inner_thoughts_in_kwargs=False,
max_tokens=4096,
enable_reasoner=True,

View File

@@ -2331,7 +2331,7 @@ def test_create_agent(client: LettaSDKClient) -> None:
label="human",
)
],
model="anthropic/claude-3-5-sonnet-20241022",
model="anthropic/claude-sonnet-4-20250514",
embedding="openai/text-embedding-ada-002",
)
assert agent is not None

View File

@@ -126,7 +126,7 @@ def detect_burst_chunks(chunks: List[Tuple[float, any]], burst_threshold: float
@pytest.mark.parametrize(
"model,expected_buffering",
[
("anthropic/claude-3-5-sonnet-20241022", False), # With fine-grained streaming beta, should stream better
("anthropic/claude-sonnet-4-20250514", False), # With fine-grained streaming beta, should stream better
("anthropic/claude-sonnet-4-20250514", False), # Sonnet 4 should NOT show buffering (has native reasoning)
("openai/gpt-4.1", False), # GPT-4.1 should NOT show buffering (uses native reasoning)
],