From 828c89c76fe5ffff2df95717cae1dc6e5f03d710 Mon Sep 17 00:00:00 2001 From: jnjpng Date: Thu, 19 Feb 2026 14:02:49 -0800 Subject: [PATCH] fix: populate max_tokens when listing LLM models (#9559) list_llm_models_async was constructing LLMConfig without max_tokens, causing the GET /models/ endpoint to return null for max_tokens. Now calls typed_provider.get_default_max_output_tokens() for both base and BYOK provider paths, matching get_llm_config_from_handle. --- letta/server/server.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/letta/server/server.py b/letta/server/server.py index 8612d87d..38cc3948 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -1268,11 +1268,14 @@ class SyncServer(object): # Build LLMConfig objects from database provider_cache: Dict[str, Provider] = {} + typed_provider_cache: Dict[str, Any] = {} for model in provider_models: # Get provider details (with caching to avoid N+1 queries) if model.provider_id not in provider_cache: provider_cache[model.provider_id] = await self.provider_manager.get_provider_async(model.provider_id, actor) + typed_provider_cache[model.provider_id] = provider_cache[model.provider_id].cast_to_subtype() provider = provider_cache[model.provider_id] + typed_provider = typed_provider_cache[model.provider_id] # Skip non-base providers (they're handled separately) if provider.provider_category != ProviderCategory.base: @@ -1287,11 +1290,13 @@ class SyncServer(object): # For bedrock, use schema default for base_url since DB may have NULL # TODO: can maybe do this for all models but want to isolate change so we don't break any other providers if provider.provider_type == ProviderType.bedrock: - typed_provider = provider.cast_to_subtype() model_endpoint = typed_provider.base_url else: model_endpoint = provider.base_url + # Get provider-specific default max_tokens + max_tokens = typed_provider.get_default_max_output_tokens(model.name) + llm_config = LLMConfig( model=model.name, model_endpoint_type=model.model_endpoint_type, @@ -1300,6 +1305,7 @@ class SyncServer(object): handle=model.handle, provider_name=provider.name, provider_category=provider.provider_category, + max_tokens=max_tokens, ) llm_models.append(llm_config) @@ -1354,6 +1360,7 @@ class SyncServer(object): enabled=True, ) for model in provider_llm_models: + max_tokens = typed_provider.get_default_max_output_tokens(model.name) llm_config = LLMConfig( model=model.name, model_endpoint_type=model.model_endpoint_type, @@ -1362,6 +1369,7 @@ class SyncServer(object): handle=model.handle, provider_name=provider.name, provider_category=ProviderCategory.byok, + max_tokens=max_tokens, ) llm_models.append(llm_config) except Exception as e: