Files
letta-server/memgpt/server/rest_api/models/index.py
2024-01-23 14:41:33 -08:00

49 lines
1.7 KiB
Python

import uuid
from typing import List
from fastapi import APIRouter, Query
from pydantic import BaseModel, Field
from memgpt.server.server import SyncServer
from memgpt.server.rest_api.interface import QueuingInterface
router = APIRouter()
class Model(BaseModel):
name: str = Field(..., description="The name of the model.")
endpoint: str = Field(..., description="Endpoint URL for the model.")
endpoint_type: str = Field(..., description="Type of the model endpoint.")
wrapper: str = Field(None, description="Wrapper used for the model.")
context_window: int = Field(..., description="Context window size for the model.")
class ListModelsResponse(BaseModel):
models: List[Model] = Field(..., description="List of model configurations.")
def setup_models_index_router(server: SyncServer, interface: QueuingInterface):
@router.get("/models", tags=["models"], response_model=ListModelsResponse)
async def list_models(user_id: str = Query(..., description="Unique identifier of the user.")):
# Validate and parse the user ID
user_id = None if user_id == "null" else user_id
user_id = uuid.UUID(user_id) if user_id else None
# Clear the interface
interface.clear()
# TODO: Replace with actual data fetching logic once available
models_data = [
Model(
name="ehartford/dolphin-2.5-mixtral-8x7b",
endpoint="https://api.memgpt.ai",
endpoint_type="vllm",
wrapper="chatml",
context_window=16384,
),
Model(name="gpt-4", endpoint="https://api.openai.com/v1", endpoint_type="openai", context_window=8192),
]
return ListModelsResponse(models=models_data)
return router