fix: fix and update vllm tests

Co-authored-by: Jin Peng <jinjpeng@Jins-MacBook-Pro.local>
Co-authored-by: Kian Jones <kian@letta.com>
This commit is contained in:
jnjpng
2025-08-06 14:37:55 -07:00
committed by GitHub
parent 30b35764a6
commit fcaa6c78a8
5 changed files with 17 additions and 8 deletions

View File

@@ -32,6 +32,9 @@ def get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_wi
if not endpoint.startswith(("http://", "https://")):
raise ValueError(f"Endpoint ({endpoint}) must begin with http:// or https://")
if not endpoint.endswith("/v1"):
endpoint = endpoint.rstrip("/") + "/v1"
try:
URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/"))
response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)

View File

@@ -27,12 +27,12 @@ class VLLMProvider(Provider):
async def list_llm_models_async(self) -> list[LLMConfig]:
from letta.llm_api.openai import openai_get_model_list_async
# TODO (cliandy): previously unsupported with vLLM; confirm if this is still the case or not
response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
base_url = self.base_url.rstrip("/") + "/v1" if not self.base_url.endswith("/v1") else self.base_url
response = await openai_get_model_list_async(base_url, api_key=self.api_key)
data = response.get("data", response)
configs = []
for model in data:
model_name = model["id"]
@@ -40,7 +40,7 @@ class VLLMProvider(Provider):
LLMConfig(
model=model_name,
model_endpoint_type="openai", # TODO (cliandy): this was previous vllm for the completions provider, why?
model_endpoint=self.base_url,
model_endpoint=base_url,
model_wrapper=self.default_prompt_formatter,
context_window=model["max_model_len"],
handle=self.get_handle(model_name),

View File

@@ -1,7 +1,8 @@
{
"context_window": 8192,
"model_endpoint_type": "vllm",
"model_endpoint": "http://127.0.0.1:8000",
"model_endpoint_type": "openai",
"provider_name": "vllm",
"model_endpoint": "http://127.0.0.1:8000/v1",
"model": "Qwen/Qwen3-32B-AWQ",
"put_inner_thoughts_in_kwargs": true
}

View File

@@ -113,6 +113,7 @@ USER_MESSAGE_BASE64_IMAGE: List[MessageCreate] = [
limited_configs = [
"ollama.json",
"together-qwen-2.5-72b-instruct.json",
"vllm.json",
]
all_configs = [

View File

@@ -172,9 +172,13 @@ async def test_ollama():
@pytest.mark.skipif(model_settings.vllm_api_base is None, reason="Only run if VLLM_API_BASE is set.")
@pytest.mark.asyncio
async def test_vllm():
provider = VLLMProvider(base_url=model_settings.vllm_api_base)
provider = VLLMProvider(name="vllm", base_url=model_settings.vllm_api_base)
models = await provider.list_llm_models_async()
print(models)
assert len(models) > 0
assert models[0].handle == f"{provider.name}/{models[0].model}"
embedding_models = await provider.list_embedding_models_async()
assert len(embedding_models) == 0 # embedding models currently not supported by vLLM
# TODO: Add back in, difficulty adding this to CI properly, need boto credentials