fix: ollama support byom (#2602)

This commit is contained in:
Andy Li
2025-06-03 15:40:21 -07:00
committed by GitHub
parent 2d9cbf3830
commit 8fdfe9d66a
3 changed files with 34 additions and 1 deletions

View File

@@ -869,6 +869,38 @@ class OllamaProvider(OpenAIProvider):
..., description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
)
async def list_llm_models_async(self) -> List[LLMConfig]:
"""Async version of list_llm_models below"""
endpoint = f"{self.base_url}/api/tags"
import aiohttp
async with aiohttp.ClientSession() as session:
async with session.get(endpoint) as response:
if response.status != 200:
raise Exception(f"Failed to list Ollama models: {response.text}")
response_json = await response.json()
configs = []
for model in response_json["models"]:
context_window = self.get_model_context_window(model["name"])
if context_window is None:
print(f"Ollama model {model['name']} has no context window")
continue
configs.append(
LLMConfig(
model=model["name"],
model_endpoint_type="ollama",
model_endpoint=self.base_url,
model_wrapper=self.default_prompt_formatter,
context_window=context_window,
handle=self.get_handle(model["name"]),
provider_name=self.name,
provider_category=self.provider_category,
)
)
return configs
def list_llm_models(self) -> List[LLMConfig]:
# https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
import requests

View File

@@ -1593,7 +1593,7 @@ class SyncServer(Server):
actor=actor,
)
async def get_provider_models(provider):
async def get_provider_models(provider: Provider) -> list[LLMConfig]:
try:
return await provider.list_llm_models_async()
except Exception as e:

View File

@@ -147,6 +147,7 @@ all_configs = [
"gemini-2.5-flash-vertex.json",
"gemini-2.5-pro-vertex.json",
"together-qwen-2.5-72b-instruct.json",
"ollama.json",
]
requested = os.getenv("LLM_CONFIG_FILE")
filenames = [requested] if requested else all_configs