fix: ollama support byom (#2602)

2025-06-03 15:40:21 -07:00
parent 2d9cbf3830
commit 8fdfe9d66a
3 changed files with 34 additions and 1 deletions
--- a/letta/schemas/providers.py
+++ b/letta/schemas/providers.py
@@ -869,6 +869,38 @@ class OllamaProvider(OpenAIProvider):
        ..., description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
    )

+    async def list_llm_models_async(self) -> List[LLMConfig]:
+        """Async version of list_llm_models below"""
+        endpoint = f"{self.base_url}/api/tags"
+
+        import aiohttp
+
+        async with aiohttp.ClientSession() as session:
+            async with session.get(endpoint) as response:
+                if response.status != 200:
+                    raise Exception(f"Failed to list Ollama models: {response.text}")
+                response_json = await response.json()
+
+        configs = []
+        for model in response_json["models"]:
+            context_window = self.get_model_context_window(model["name"])
+            if context_window is None:
+                print(f"Ollama model {model['name']} has no context window")
+                continue
+            configs.append(
+                LLMConfig(
+                    model=model["name"],
+                    model_endpoint_type="ollama",
+                    model_endpoint=self.base_url,
+                    model_wrapper=self.default_prompt_formatter,
+                    context_window=context_window,
+                    handle=self.get_handle(model["name"]),
+                    provider_name=self.name,
+                    provider_category=self.provider_category,
+                )
+            )
+        return configs
+
    def list_llm_models(self) -> List[LLMConfig]:
        # https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
        import requests
--- a/letta/server/server.py
+++ b/letta/server/server.py
@@ -1593,7 +1593,7 @@ class SyncServer(Server):
            actor=actor,
        )

-        async def get_provider_models(provider):
+        async def get_provider_models(provider: Provider) -> list[LLMConfig]:
            try:
                return await provider.list_llm_models_async()
            except Exception as e:
--- a/tests/integration_test_send_message.py
+++ b/tests/integration_test_send_message.py
@@ -147,6 +147,7 @@ all_configs = [
    "gemini-2.5-flash-vertex.json",
    "gemini-2.5-pro-vertex.json",
    "together-qwen-2.5-72b-instruct.json",
+    "ollama.json",
 ]
 requested = os.getenv("LLM_CONFIG_FILE")
 filenames = [requested] if requested else all_configs