Files
letta-server/letta/schemas/providers/sglang.py

63 lines
2.7 KiB
Python

"""
SGLang provider for Letta.
SGLang is a high-performance inference engine that exposes OpenAI-compatible API endpoints.
"""
from typing import Literal
from pydantic import Field
from letta.schemas.embedding_config import EmbeddingConfig
from letta.schemas.enums import ProviderCategory, ProviderType
from letta.schemas.llm_config import LLMConfig
from letta.schemas.providers.base import Provider
class SGLangProvider(Provider):
provider_type: Literal[ProviderType.sglang] = Field(ProviderType.sglang, description="The type of the provider.")
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
base_url: str = Field(..., description="Base URL for the SGLang API (e.g., http://localhost:30000).")
api_key: str | None = Field(None, description="API key for the SGLang API (optional for local instances).")
default_prompt_formatter: str | None = Field(default=None, description="Default prompt formatter (aka model wrapper).")
handle_base: str | None = Field(None, description="Custom handle base name for model handles.")
async def list_llm_models_async(self) -> list[LLMConfig]:
from letta.llm_api.openai import openai_get_model_list_async
# Ensure base_url ends with /v1 (SGLang uses same convention as vLLM)
base_url = self.base_url.rstrip("/")
if not base_url.endswith("/v1"):
base_url = base_url + "/v1"
# Decrypt API key before using (may be None for local instances)
api_key = await self.api_key_enc.get_plaintext_async() if self.api_key_enc else None
response = await openai_get_model_list_async(base_url, api_key=api_key)
data = response.get("data", response)
configs = []
for model in data:
model_name = model["id"]
configs.append(
LLMConfig(
model=model_name,
model_endpoint_type="openai", # SGLang is OpenAI-compatible
model_endpoint=base_url,
model_wrapper=self.default_prompt_formatter,
context_window=model.get("max_model_len", 32768),
handle=self.get_handle(model_name, base_name=self.handle_base) if self.handle_base else self.get_handle(model_name),
max_tokens=self.get_default_max_output_tokens(model_name),
provider_name=self.name,
provider_category=self.provider_category,
)
)
return configs
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
# SGLang embedding support not common for training use cases
return []