fix: populate max_tokens when listing LLM models (#9559)
list_llm_models_async was constructing LLMConfig without max_tokens, causing the GET /models/ endpoint to return null for max_tokens. Now calls typed_provider.get_default_max_output_tokens() for both base and BYOK provider paths, matching get_llm_config_from_handle.
This commit is contained in:
@@ -1268,11 +1268,14 @@ class SyncServer(object):
|
|||||||
|
|
||||||
# Build LLMConfig objects from database
|
# Build LLMConfig objects from database
|
||||||
provider_cache: Dict[str, Provider] = {}
|
provider_cache: Dict[str, Provider] = {}
|
||||||
|
typed_provider_cache: Dict[str, Any] = {}
|
||||||
for model in provider_models:
|
for model in provider_models:
|
||||||
# Get provider details (with caching to avoid N+1 queries)
|
# Get provider details (with caching to avoid N+1 queries)
|
||||||
if model.provider_id not in provider_cache:
|
if model.provider_id not in provider_cache:
|
||||||
provider_cache[model.provider_id] = await self.provider_manager.get_provider_async(model.provider_id, actor)
|
provider_cache[model.provider_id] = await self.provider_manager.get_provider_async(model.provider_id, actor)
|
||||||
|
typed_provider_cache[model.provider_id] = provider_cache[model.provider_id].cast_to_subtype()
|
||||||
provider = provider_cache[model.provider_id]
|
provider = provider_cache[model.provider_id]
|
||||||
|
typed_provider = typed_provider_cache[model.provider_id]
|
||||||
|
|
||||||
# Skip non-base providers (they're handled separately)
|
# Skip non-base providers (they're handled separately)
|
||||||
if provider.provider_category != ProviderCategory.base:
|
if provider.provider_category != ProviderCategory.base:
|
||||||
@@ -1287,11 +1290,13 @@ class SyncServer(object):
|
|||||||
# For bedrock, use schema default for base_url since DB may have NULL
|
# For bedrock, use schema default for base_url since DB may have NULL
|
||||||
# TODO: can maybe do this for all models but want to isolate change so we don't break any other providers
|
# TODO: can maybe do this for all models but want to isolate change so we don't break any other providers
|
||||||
if provider.provider_type == ProviderType.bedrock:
|
if provider.provider_type == ProviderType.bedrock:
|
||||||
typed_provider = provider.cast_to_subtype()
|
|
||||||
model_endpoint = typed_provider.base_url
|
model_endpoint = typed_provider.base_url
|
||||||
else:
|
else:
|
||||||
model_endpoint = provider.base_url
|
model_endpoint = provider.base_url
|
||||||
|
|
||||||
|
# Get provider-specific default max_tokens
|
||||||
|
max_tokens = typed_provider.get_default_max_output_tokens(model.name)
|
||||||
|
|
||||||
llm_config = LLMConfig(
|
llm_config = LLMConfig(
|
||||||
model=model.name,
|
model=model.name,
|
||||||
model_endpoint_type=model.model_endpoint_type,
|
model_endpoint_type=model.model_endpoint_type,
|
||||||
@@ -1300,6 +1305,7 @@ class SyncServer(object):
|
|||||||
handle=model.handle,
|
handle=model.handle,
|
||||||
provider_name=provider.name,
|
provider_name=provider.name,
|
||||||
provider_category=provider.provider_category,
|
provider_category=provider.provider_category,
|
||||||
|
max_tokens=max_tokens,
|
||||||
)
|
)
|
||||||
llm_models.append(llm_config)
|
llm_models.append(llm_config)
|
||||||
|
|
||||||
@@ -1354,6 +1360,7 @@ class SyncServer(object):
|
|||||||
enabled=True,
|
enabled=True,
|
||||||
)
|
)
|
||||||
for model in provider_llm_models:
|
for model in provider_llm_models:
|
||||||
|
max_tokens = typed_provider.get_default_max_output_tokens(model.name)
|
||||||
llm_config = LLMConfig(
|
llm_config = LLMConfig(
|
||||||
model=model.name,
|
model=model.name,
|
||||||
model_endpoint_type=model.model_endpoint_type,
|
model_endpoint_type=model.model_endpoint_type,
|
||||||
@@ -1362,6 +1369,7 @@ class SyncServer(object):
|
|||||||
handle=model.handle,
|
handle=model.handle,
|
||||||
provider_name=provider.name,
|
provider_name=provider.name,
|
||||||
provider_category=ProviderCategory.byok,
|
provider_category=ProviderCategory.byok,
|
||||||
|
max_tokens=max_tokens,
|
||||||
)
|
)
|
||||||
llm_models.append(llm_config)
|
llm_models.append(llm_config)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user