From fcaa6c78a84708a203707aff03e0585cb0331c88 Mon Sep 17 00:00:00 2001 From: jnjpng Date: Wed, 6 Aug 2025 14:37:55 -0700 Subject: [PATCH] fix: fix and update vllm tests Co-authored-by: Jin Peng Co-authored-by: Kian Jones --- letta/local_llm/vllm/api.py | 3 +++ letta/schemas/providers/vllm.py | 8 ++++---- tests/configs/llm_model_configs/vllm.json | 5 +++-- tests/integration_test_send_message.py | 1 + tests/test_providers.py | 8 ++++++-- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/letta/local_llm/vllm/api.py b/letta/local_llm/vllm/api.py index 48c48b32..dde863c8 100644 --- a/letta/local_llm/vllm/api.py +++ b/letta/local_llm/vllm/api.py @@ -32,6 +32,9 @@ def get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_wi if not endpoint.startswith(("http://", "https://")): raise ValueError(f"Endpoint ({endpoint}) must begin with http:// or https://") + if not endpoint.endswith("/v1"): + endpoint = endpoint.rstrip("/") + "/v1" + try: URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/")) response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key) diff --git a/letta/schemas/providers/vllm.py b/letta/schemas/providers/vllm.py index 2f261c3e..0481807e 100644 --- a/letta/schemas/providers/vllm.py +++ b/letta/schemas/providers/vllm.py @@ -27,12 +27,12 @@ class VLLMProvider(Provider): async def list_llm_models_async(self) -> list[LLMConfig]: from letta.llm_api.openai import openai_get_model_list_async - # TODO (cliandy): previously unsupported with vLLM; confirm if this is still the case or not - response = await openai_get_model_list_async(self.base_url, api_key=self.api_key) - + base_url = self.base_url.rstrip("/") + "/v1" if not self.base_url.endswith("/v1") else self.base_url + response = await openai_get_model_list_async(base_url, api_key=self.api_key) data = response.get("data", response) configs = [] + for model in data: model_name = model["id"] @@ -40,7 +40,7 @@ class VLLMProvider(Provider): LLMConfig( model=model_name, model_endpoint_type="openai", # TODO (cliandy): this was previous vllm for the completions provider, why? - model_endpoint=self.base_url, + model_endpoint=base_url, model_wrapper=self.default_prompt_formatter, context_window=model["max_model_len"], handle=self.get_handle(model_name), diff --git a/tests/configs/llm_model_configs/vllm.json b/tests/configs/llm_model_configs/vllm.json index 54440ac4..5b1d2f00 100644 --- a/tests/configs/llm_model_configs/vllm.json +++ b/tests/configs/llm_model_configs/vllm.json @@ -1,7 +1,8 @@ { "context_window": 8192, - "model_endpoint_type": "vllm", - "model_endpoint": "http://127.0.0.1:8000", + "model_endpoint_type": "openai", + "provider_name": "vllm", + "model_endpoint": "http://127.0.0.1:8000/v1", "model": "Qwen/Qwen3-32B-AWQ", "put_inner_thoughts_in_kwargs": true } diff --git a/tests/integration_test_send_message.py b/tests/integration_test_send_message.py index b30aa42c..4d62ab09 100644 --- a/tests/integration_test_send_message.py +++ b/tests/integration_test_send_message.py @@ -113,6 +113,7 @@ USER_MESSAGE_BASE64_IMAGE: List[MessageCreate] = [ limited_configs = [ "ollama.json", "together-qwen-2.5-72b-instruct.json", + "vllm.json", ] all_configs = [ diff --git a/tests/test_providers.py b/tests/test_providers.py index 71230218..0b7c50e7 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -172,9 +172,13 @@ async def test_ollama(): @pytest.mark.skipif(model_settings.vllm_api_base is None, reason="Only run if VLLM_API_BASE is set.") @pytest.mark.asyncio async def test_vllm(): - provider = VLLMProvider(base_url=model_settings.vllm_api_base) + provider = VLLMProvider(name="vllm", base_url=model_settings.vllm_api_base) models = await provider.list_llm_models_async() - print(models) + assert len(models) > 0 + assert models[0].handle == f"{provider.name}/{models[0].model}" + + embedding_models = await provider.list_embedding_models_async() + assert len(embedding_models) == 0 # embedding models currently not supported by vLLM # TODO: Add back in, difficulty adding this to CI properly, need boto credentials