From aac047c2193f3a64244a79ace4a95288393a80ac Mon Sep 17 00:00:00 2001 From: Kelvin He <62563309+hitpoint6@users.noreply.github.com> Date: Mon, 7 Oct 2024 17:20:41 -0700 Subject: [PATCH] Docker compose vllm (#1821) --- docker-compose-vllm.yaml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 docker-compose-vllm.yaml diff --git a/docker-compose-vllm.yaml b/docker-compose-vllm.yaml new file mode 100644 index 00000000..f6ab57e6 --- /dev/null +++ b/docker-compose-vllm.yaml @@ -0,0 +1,35 @@ +version: '3.8' + +services: + letta: + image: lettaai/letta:latest + ports: + - "8083:8083" + environment: + - LETTA_LLM_ENDPOINT=http://vllm:8000 + - LETTA_LLM_ENDPOINT_TYPE=vllm + - LETTA_LLM_MODEL=${LETTA_LLM_MODEL} # Replace with your model + - LETTA_LLM_CONTEXT_WINDOW=8192 + depends_on: + - vllm + + vllm: + image: vllm/vllm-openai:latest + runtime: nvidia + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + environment: + - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN} + volumes: + - ~/.cache/huggingface:/root/.cache/huggingface + ports: + - "8000:8000" + command: > + --model ${LETTA_LLM_MODEL} --max_model_len=8000 + # Replace with your model + ipc: host \ No newline at end of file