fix: populate max_tokens when listing LLM models (#9559)

list_llm_models_async was constructing LLMConfig without max_tokens,
causing the GET /models/ endpoint to return null for max_tokens.
Now calls typed_provider.get_default_max_output_tokens() for both
base and BYOK provider paths, matching get_llm_config_from_handle.
This commit is contained in:
jnjpng
2026-02-19 14:02:49 -08:00
committed by Caren Thomas
parent bd5b5fa9f3
commit 828c89c76f

View File

@@ -1268,11 +1268,14 @@ class SyncServer(object):
# Build LLMConfig objects from database
provider_cache: Dict[str, Provider] = {}
typed_provider_cache: Dict[str, Any] = {}
for model in provider_models:
# Get provider details (with caching to avoid N+1 queries)
if model.provider_id not in provider_cache:
provider_cache[model.provider_id] = await self.provider_manager.get_provider_async(model.provider_id, actor)
typed_provider_cache[model.provider_id] = provider_cache[model.provider_id].cast_to_subtype()
provider = provider_cache[model.provider_id]
typed_provider = typed_provider_cache[model.provider_id]
# Skip non-base providers (they're handled separately)
if provider.provider_category != ProviderCategory.base:
@@ -1287,11 +1290,13 @@ class SyncServer(object):
# For bedrock, use schema default for base_url since DB may have NULL
# TODO: can maybe do this for all models but want to isolate change so we don't break any other providers
if provider.provider_type == ProviderType.bedrock:
typed_provider = provider.cast_to_subtype()
model_endpoint = typed_provider.base_url
else:
model_endpoint = provider.base_url
# Get provider-specific default max_tokens
max_tokens = typed_provider.get_default_max_output_tokens(model.name)
llm_config = LLMConfig(
model=model.name,
model_endpoint_type=model.model_endpoint_type,
@@ -1300,6 +1305,7 @@ class SyncServer(object):
handle=model.handle,
provider_name=provider.name,
provider_category=provider.provider_category,
max_tokens=max_tokens,
)
llm_models.append(llm_config)
@@ -1354,6 +1360,7 @@ class SyncServer(object):
enabled=True,
)
for model in provider_llm_models:
max_tokens = typed_provider.get_default_max_output_tokens(model.name)
llm_config = LLMConfig(
model=model.name,
model_endpoint_type=model.model_endpoint_type,
@@ -1362,6 +1369,7 @@ class SyncServer(object):
handle=model.handle,
provider_name=provider.name,
provider_category=ProviderCategory.byok,
max_tokens=max_tokens,
)
llm_models.append(llm_config)
except Exception as e: