feat: patch model listing to actually match handle [LET-5888] (#5754)
This commit is contained in:
committed by
Caren Thomas
parent
042a32d371
commit
e7fff12da0
@@ -32,7 +32,7 @@ azure_openai_agent = client.agents.create(
|
||||
|
||||
# anthropic
|
||||
anthropic_agent = client.agents.create(
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
model="anthropic/claude-sonnet-4-20250514",
|
||||
# note: anthropic does not support embeddings so you will need another provider
|
||||
embedding="openai/text-embedding-3-small",
|
||||
# optional configuration
|
||||
|
||||
@@ -50,7 +50,7 @@ print(f"Passages in source: {passages}")
|
||||
agent = client.agents.create(
|
||||
name="my_agent",
|
||||
memory_blocks=[],
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
model="anthropic/claude-sonnet-4-20250514",
|
||||
embedding=embedding_configs[0].handle,
|
||||
tags=["worker"],
|
||||
)
|
||||
|
||||
@@ -8,7 +8,7 @@ agent = client.agents.create(
|
||||
{"label": "persona", "value": "I am a memory agent"},
|
||||
{"label": "human", "value": "Name: Bob", "limit": 10000},
|
||||
],
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
model="anthropic/claude-sonnet-4-20250514",
|
||||
embedding="openai/text-embedding-3-small",
|
||||
tags=["worker"],
|
||||
)
|
||||
@@ -25,7 +25,7 @@ block = client.blocks.create(
|
||||
shared_block_agent = client.agents.create(
|
||||
name="shared_block_agent",
|
||||
memory_blocks=[block.id],
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
model="anthropic/claude-sonnet-4-20250514",
|
||||
embedding="openai/text-embedding-3-small",
|
||||
tags=["worker"],
|
||||
)
|
||||
|
||||
@@ -10,7 +10,7 @@ try:
|
||||
memory_blocks=[
|
||||
{"label": "persona", "value": "I am the supervisor, and I can communicate with worker agents with the tag `worker`"}
|
||||
],
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
model="anthropic/claude-sonnet-4-20250514",
|
||||
embedding="openai/text-embedding-3-small",
|
||||
tags=["supervisor"],
|
||||
tools=["send_message_to_agents_matching_all_tags"],
|
||||
@@ -28,7 +28,7 @@ try:
|
||||
worker_agent = client.agents.create(
|
||||
name="worker_agent",
|
||||
memory_blocks=[{"label": "persona", "value": f"I am the worker, my supervisor agent has ID {supervisor_agent.id}"}],
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
model="anthropic/claude-sonnet-4-20250514",
|
||||
embedding="openai/text-embedding-3-small",
|
||||
tool_ids=[tool.id],
|
||||
tags=["worker"],
|
||||
|
||||
@@ -16,7 +16,7 @@ client = Letta(base_url="http://localhost:8283")
|
||||
search_agent = client.agents.create(
|
||||
name="search_agent",
|
||||
memory_blocks=[],
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
model="anthropic/claude-sonnet-4-20250514",
|
||||
embedding="openai/text-embedding-3-small",
|
||||
tags=["worker"],
|
||||
tool_rules=[
|
||||
|
||||
@@ -59,7 +59,7 @@ def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMe
|
||||
return assistant_message
|
||||
|
||||
|
||||
def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
|
||||
def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List["_Message"]:
|
||||
"""
|
||||
Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
|
||||
Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
|
||||
@@ -103,7 +103,7 @@ def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Messag
|
||||
|
||||
def build_deepseek_chat_completions_request(
|
||||
llm_config: LLMConfig,
|
||||
messages: List[_Message],
|
||||
messages: List["_Message"],
|
||||
user_id: Optional[str],
|
||||
functions: Optional[list],
|
||||
function_call: Optional[str],
|
||||
|
||||
@@ -26,6 +26,7 @@ from letta.errors import (
|
||||
AgentFileImportError,
|
||||
AgentNotFoundForExportError,
|
||||
BedrockPermissionError,
|
||||
HandleNotFoundError,
|
||||
LettaAgentNotFoundError,
|
||||
LettaExpiredError,
|
||||
LettaInvalidArgumentError,
|
||||
@@ -369,6 +370,7 @@ def create_application() -> "FastAPI":
|
||||
app.add_exception_handler(LettaAgentNotFoundError, _error_handler_404_agent)
|
||||
app.add_exception_handler(LettaUserNotFoundError, _error_handler_404_user)
|
||||
app.add_exception_handler(AgentNotFoundForExportError, _error_handler_404)
|
||||
app.add_exception_handler(HandleNotFoundError, _error_handler_404)
|
||||
|
||||
# 410 Expired errors
|
||||
app.add_exception_handler(LettaExpiredError, _error_handler_410)
|
||||
|
||||
@@ -430,6 +430,11 @@ class SyncServer(object):
|
||||
log_event(name="start get_cached_llm_config", attributes=config_params)
|
||||
request.llm_config = await self.get_cached_llm_config_async(actor=actor, **config_params)
|
||||
log_event(name="end get_cached_llm_config", attributes=config_params)
|
||||
if request.model and isinstance(request.model, str):
|
||||
assert request.llm_config.handle == request.model, (
|
||||
f"LLM config handle {request.llm_config.handle} does not match request handle {request.model}"
|
||||
)
|
||||
print("GOT LLM CONFIG", request.llm_config)
|
||||
|
||||
if request.reasoning is None:
|
||||
request.reasoning = request.llm_config.enable_reasoner or request.llm_config.put_inner_thoughts_in_kwargs
|
||||
@@ -1031,92 +1036,63 @@ class SyncServer(object):
|
||||
max_reasoning_tokens: Optional[int] = None,
|
||||
enable_reasoner: Optional[bool] = None,
|
||||
) -> LLMConfig:
|
||||
try:
|
||||
provider_name, model_name = handle.split("/", 1)
|
||||
provider = await self.get_provider_from_name_async(provider_name, actor)
|
||||
|
||||
all_llm_configs = await provider.list_llm_models_async()
|
||||
llm_configs = [config for config in all_llm_configs if config.handle == handle]
|
||||
if not llm_configs:
|
||||
llm_configs = [config for config in all_llm_configs if config.model == model_name]
|
||||
if not llm_configs:
|
||||
available_handles = [config.handle for config in all_llm_configs]
|
||||
raise HandleNotFoundError(handle, available_handles)
|
||||
except ValueError as e:
|
||||
llm_configs = [config for config in self.get_local_llm_configs() if config.handle == handle]
|
||||
if not llm_configs:
|
||||
llm_configs = [config for config in self.get_local_llm_configs() if config.model == model_name]
|
||||
if not llm_configs:
|
||||
raise e
|
||||
|
||||
if len(llm_configs) == 1:
|
||||
llm_config = llm_configs[0]
|
||||
elif len(llm_configs) > 1:
|
||||
raise LettaInvalidArgumentError(
|
||||
f"Multiple LLM models with name {model_name} supported by {provider_name}", argument_name="model_name"
|
||||
)
|
||||
else:
|
||||
llm_config = llm_configs[0]
|
||||
"""String match the `handle` to the available configs"""
|
||||
matched_llm_config = None
|
||||
available_handles = []
|
||||
for provider in self._enabled_providers:
|
||||
llm_configs = await provider.list_llm_models_async()
|
||||
for llm_config in llm_configs:
|
||||
available_handles.append(llm_config.handle)
|
||||
if llm_config.handle == handle:
|
||||
matched_llm_config = llm_config
|
||||
break
|
||||
if not matched_llm_config:
|
||||
raise HandleNotFoundError(handle, available_handles)
|
||||
|
||||
if context_window_limit is not None:
|
||||
if context_window_limit > llm_config.context_window:
|
||||
if context_window_limit > matched_llm_config.context_window:
|
||||
raise LettaInvalidArgumentError(
|
||||
f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})",
|
||||
f"Context window limit ({context_window_limit}) is greater than maximum of ({matched_llm_config.context_window})",
|
||||
argument_name="context_window_limit",
|
||||
)
|
||||
llm_config.context_window = context_window_limit
|
||||
matched_llm_config.context_window = context_window_limit
|
||||
else:
|
||||
llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
|
||||
matched_llm_config.context_window = min(matched_llm_config.context_window, model_settings.global_max_context_window_limit)
|
||||
|
||||
if max_tokens is not None:
|
||||
llm_config.max_tokens = max_tokens
|
||||
matched_llm_config.max_tokens = max_tokens
|
||||
if max_reasoning_tokens is not None:
|
||||
if not max_tokens or max_reasoning_tokens > max_tokens:
|
||||
raise LettaInvalidArgumentError(
|
||||
f"Max reasoning tokens ({max_reasoning_tokens}) must be less than max tokens ({max_tokens})",
|
||||
argument_name="max_reasoning_tokens",
|
||||
)
|
||||
llm_config.max_reasoning_tokens = max_reasoning_tokens
|
||||
matched_llm_config.max_reasoning_tokens = max_reasoning_tokens
|
||||
if enable_reasoner is not None:
|
||||
llm_config.enable_reasoner = enable_reasoner
|
||||
if enable_reasoner and llm_config.model_endpoint_type == "anthropic":
|
||||
llm_config.put_inner_thoughts_in_kwargs = False
|
||||
matched_llm_config.enable_reasoner = enable_reasoner
|
||||
if enable_reasoner and matched_llm_config.model_endpoint_type == "anthropic":
|
||||
matched_llm_config.put_inner_thoughts_in_kwargs = False
|
||||
|
||||
return llm_config
|
||||
return matched_llm_config
|
||||
|
||||
@trace_method
|
||||
async def get_embedding_config_from_handle_async(
|
||||
self, actor: User, handle: str, embedding_chunk_size: int = constants.DEFAULT_EMBEDDING_CHUNK_SIZE
|
||||
) -> EmbeddingConfig:
|
||||
try:
|
||||
provider_name, model_name = handle.split("/", 1)
|
||||
provider = await self.get_provider_from_name_async(provider_name, actor)
|
||||
|
||||
all_embedding_configs = await provider.list_embedding_models_async()
|
||||
embedding_configs = [config for config in all_embedding_configs if config.handle == handle]
|
||||
if not embedding_configs:
|
||||
raise LettaInvalidArgumentError(
|
||||
f"Embedding model {model_name} is not supported by {provider_name}", argument_name="model_name"
|
||||
)
|
||||
except LettaInvalidArgumentError as e:
|
||||
# search local configs
|
||||
embedding_configs = [config for config in self.get_local_embedding_configs() if config.handle == handle]
|
||||
if not embedding_configs:
|
||||
raise e
|
||||
|
||||
if len(embedding_configs) == 1:
|
||||
embedding_config = embedding_configs[0]
|
||||
elif len(embedding_configs) > 1:
|
||||
raise LettaInvalidArgumentError(
|
||||
f"Multiple embedding models with name {model_name} supported by {provider_name}", argument_name="model_name"
|
||||
)
|
||||
else:
|
||||
embedding_config = embedding_configs[0]
|
||||
matched_embedding_config = None
|
||||
available_handles = []
|
||||
for provider in self._enabled_providers:
|
||||
embedding_configs = await provider.list_embedding_models_async()
|
||||
for embedding_config in embedding_configs:
|
||||
available_handles.append(embedding_config.handle)
|
||||
if embedding_config.handle == handle:
|
||||
matched_embedding_config = embedding_config
|
||||
break
|
||||
|
||||
if embedding_chunk_size:
|
||||
embedding_config.embedding_chunk_size = embedding_chunk_size
|
||||
matched_embedding_config.embedding_chunk_size = embedding_chunk_size
|
||||
|
||||
return embedding_config
|
||||
return matched_embedding_config
|
||||
|
||||
async def get_provider_from_name_async(self, provider_name: str, actor: User) -> Provider:
|
||||
all_providers = await self.get_enabled_providers_async(actor)
|
||||
|
||||
@@ -134,7 +134,7 @@ def create_failed_response(custom_id: str) -> BetaMessageBatchIndividualResponse
|
||||
# --- Test Setup Helpers --- #
|
||||
|
||||
|
||||
async def create_test_agent(name, actor, test_id: Optional[str] = None, model="anthropic/claude-3-5-sonnet-20241022"):
|
||||
async def create_test_agent(name, actor, test_id: Optional[str] = None, model="anthropic/claude-sonnet-4-20250514"):
|
||||
"""Create a test agent with standardized configuration."""
|
||||
dummy_llm_config = LLMConfig(
|
||||
model="claude-3-7-sonnet-latest",
|
||||
|
||||
@@ -225,7 +225,7 @@ def _assert_valid_chunk(chunk, idx, chunks):
|
||||
|
||||
|
||||
@pytest.mark.asyncio(loop_scope="module")
|
||||
@pytest.mark.parametrize("model", ["openai/gpt-4o-mini", "anthropic/claude-3-5-sonnet-20241022"])
|
||||
@pytest.mark.parametrize("model", ["openai/gpt-4o-mini", "anthropic/claude-sonnet-4-20250514"])
|
||||
@pytest.mark.parametrize(
|
||||
"message", ["How are you?", "Use the roll_dice tool to roll a die for me", "Use the run_code tool to calculate 2+2"]
|
||||
)
|
||||
|
||||
@@ -35,9 +35,9 @@ from tests.utils import create_tool_from_func
|
||||
|
||||
# Model identifiers used in tests
|
||||
MODELS = {
|
||||
"sonnet": "anthropic/claude-3-5-sonnet-20241022",
|
||||
"haiku": "anthropic/claude-3-5-haiku-20241022",
|
||||
"opus": "anthropic/claude-3-opus-20240229",
|
||||
"sonnet": "anthropic/claude-sonnet-4-20250514",
|
||||
"haiku": "anthropic/claude-haiku-4-5-20251001",
|
||||
"opus": "anthropic/claude-opus-4-1-20250805",
|
||||
}
|
||||
|
||||
# Expected message roles in batch requests
|
||||
@@ -933,9 +933,9 @@ async def test_step_until_request_prepares_and_submits_batch_correctly(
|
||||
|
||||
# Map of agent IDs to their expected models
|
||||
expected_models = {
|
||||
agent_sonnet.id: "claude-3-5-sonnet-20241022",
|
||||
agent_haiku.id: "claude-3-5-haiku-20241022",
|
||||
agent_opus.id: "claude-3-opus-20240229",
|
||||
agent_sonnet.id: "claude-sonnet-4-20250514",
|
||||
agent_haiku.id: "claude-haiku-4-5-20251001",
|
||||
agent_opus.id: "claude-opus-4-1-20250805",
|
||||
}
|
||||
|
||||
# Set up spy function for the Anthropic client
|
||||
|
||||
@@ -17,7 +17,7 @@ def llm_config():
|
||||
model_endpoint_type="anthropic",
|
||||
model_endpoint="https://api.anthropic.com/v1",
|
||||
context_window=32000,
|
||||
handle="anthropic/claude-3-5-sonnet-20241022",
|
||||
handle="anthropic/claude-sonnet-4-20250514",
|
||||
put_inner_thoughts_in_kwargs=False,
|
||||
max_tokens=4096,
|
||||
enable_reasoner=True,
|
||||
|
||||
@@ -2331,7 +2331,7 @@ def test_create_agent(client: LettaSDKClient) -> None:
|
||||
label="human",
|
||||
)
|
||||
],
|
||||
model="anthropic/claude-3-5-sonnet-20241022",
|
||||
model="anthropic/claude-sonnet-4-20250514",
|
||||
embedding="openai/text-embedding-ada-002",
|
||||
)
|
||||
assert agent is not None
|
||||
|
||||
@@ -126,7 +126,7 @@ def detect_burst_chunks(chunks: List[Tuple[float, any]], burst_threshold: float
|
||||
@pytest.mark.parametrize(
|
||||
"model,expected_buffering",
|
||||
[
|
||||
("anthropic/claude-3-5-sonnet-20241022", False), # With fine-grained streaming beta, should stream better
|
||||
("anthropic/claude-sonnet-4-20250514", False), # With fine-grained streaming beta, should stream better
|
||||
("anthropic/claude-sonnet-4-20250514", False), # Sonnet 4 should NOT show buffering (has native reasoning)
|
||||
("openai/gpt-4.1", False), # GPT-4.1 should NOT show buffering (uses native reasoning)
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user