fix: fix and update vllm tests
Co-authored-by: Jin Peng <jinjpeng@Jins-MacBook-Pro.local> Co-authored-by: Kian Jones <kian@letta.com>
This commit is contained in:
@@ -32,6 +32,9 @@ def get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_wi
|
||||
if not endpoint.startswith(("http://", "https://")):
|
||||
raise ValueError(f"Endpoint ({endpoint}) must begin with http:// or https://")
|
||||
|
||||
if not endpoint.endswith("/v1"):
|
||||
endpoint = endpoint.rstrip("/") + "/v1"
|
||||
|
||||
try:
|
||||
URI = urljoin(endpoint.strip("/") + "/", WEBUI_API_SUFFIX.strip("/"))
|
||||
response = post_json_auth_request(uri=URI, json_payload=request, auth_type=auth_type, auth_key=auth_key)
|
||||
|
||||
@@ -27,12 +27,12 @@ class VLLMProvider(Provider):
|
||||
async def list_llm_models_async(self) -> list[LLMConfig]:
|
||||
from letta.llm_api.openai import openai_get_model_list_async
|
||||
|
||||
# TODO (cliandy): previously unsupported with vLLM; confirm if this is still the case or not
|
||||
response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
|
||||
|
||||
base_url = self.base_url.rstrip("/") + "/v1" if not self.base_url.endswith("/v1") else self.base_url
|
||||
response = await openai_get_model_list_async(base_url, api_key=self.api_key)
|
||||
data = response.get("data", response)
|
||||
|
||||
configs = []
|
||||
|
||||
for model in data:
|
||||
model_name = model["id"]
|
||||
|
||||
@@ -40,7 +40,7 @@ class VLLMProvider(Provider):
|
||||
LLMConfig(
|
||||
model=model_name,
|
||||
model_endpoint_type="openai", # TODO (cliandy): this was previous vllm for the completions provider, why?
|
||||
model_endpoint=self.base_url,
|
||||
model_endpoint=base_url,
|
||||
model_wrapper=self.default_prompt_formatter,
|
||||
context_window=model["max_model_len"],
|
||||
handle=self.get_handle(model_name),
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
{
|
||||
"context_window": 8192,
|
||||
"model_endpoint_type": "vllm",
|
||||
"model_endpoint": "http://127.0.0.1:8000",
|
||||
"model_endpoint_type": "openai",
|
||||
"provider_name": "vllm",
|
||||
"model_endpoint": "http://127.0.0.1:8000/v1",
|
||||
"model": "Qwen/Qwen3-32B-AWQ",
|
||||
"put_inner_thoughts_in_kwargs": true
|
||||
}
|
||||
|
||||
@@ -113,6 +113,7 @@ USER_MESSAGE_BASE64_IMAGE: List[MessageCreate] = [
|
||||
limited_configs = [
|
||||
"ollama.json",
|
||||
"together-qwen-2.5-72b-instruct.json",
|
||||
"vllm.json",
|
||||
]
|
||||
|
||||
all_configs = [
|
||||
|
||||
@@ -172,9 +172,13 @@ async def test_ollama():
|
||||
@pytest.mark.skipif(model_settings.vllm_api_base is None, reason="Only run if VLLM_API_BASE is set.")
|
||||
@pytest.mark.asyncio
|
||||
async def test_vllm():
|
||||
provider = VLLMProvider(base_url=model_settings.vllm_api_base)
|
||||
provider = VLLMProvider(name="vllm", base_url=model_settings.vllm_api_base)
|
||||
models = await provider.list_llm_models_async()
|
||||
print(models)
|
||||
assert len(models) > 0
|
||||
assert models[0].handle == f"{provider.name}/{models[0].model}"
|
||||
|
||||
embedding_models = await provider.list_embedding_models_async()
|
||||
assert len(embedding_models) == 0 # embedding models currently not supported by vLLM
|
||||
|
||||
|
||||
# TODO: Add back in, difficulty adding this to CI properly, need boto credentials
|
||||
|
||||
Reference in New Issue
Block a user