feat: endpoint for current size in GB of the organization's embeddings (#1880)

docs: reorganize embedding models
This commit is contained in:
Andy Li
2025-04-25 17:08:04 -07:00
committed by GitHub
parent 56e0a0eaee
commit af87e11d3e
4 changed files with 38 additions and 2 deletions

View File

@@ -1,5 +1,6 @@
from letta.server.rest_api.routers.v1.agents import router as agents_router
from letta.server.rest_api.routers.v1.blocks import router as blocks_router
from letta.server.rest_api.routers.v1.embeddings import router as embeddings_router
from letta.server.rest_api.routers.v1.groups import router as groups_router
from letta.server.rest_api.routers.v1.health import router as health_router
from letta.server.rest_api.routers.v1.identities import router as identities_router
@@ -32,4 +33,5 @@ ROUTERS = [
tags_router,
messages_router,
voice_router,
embeddings_router,
]

View File

@@ -0,0 +1,20 @@
from typing import Optional
from fastapi import APIRouter, Depends, Header
from letta.server.rest_api.utils import get_letta_server
from letta.server.server import SyncServer
router = APIRouter(prefix="/embeddings", tags=["embeddings"])
@router.get("/total_storage_size", response_model=float, operation_id="get_total_storage_size")
def get_embeddings_storage_size(
server: SyncServer = Depends(get_letta_server),
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
):
"""
Get the total size of all embeddings in the database for a user in GB.
"""
actor = server.user_manager.get_user_or_default(user_id=actor_id)
return server.passage_manager.estimate_embeddings_size_GB(actor=actor)

View File

@@ -13,7 +13,7 @@ router = APIRouter(prefix="/models", tags=["models", "llms"])
@router.get("/", response_model=List[LLMConfig], operation_id="list_models")
def list_llm_backends(
def list_llm_models(
server: "SyncServer" = Depends(get_letta_server),
):
@@ -23,7 +23,7 @@ def list_llm_backends(
@router.get("/embedding", response_model=List[EmbeddingConfig], operation_id="list_embedding_models")
def list_embedding_backends(
def list_embedding_models(
server: "SyncServer" = Depends(get_letta_server),
):

View File

@@ -3,6 +3,7 @@ from typing import List, Optional
from openai import OpenAI
from letta.constants import MAX_EMBEDDING_DIM
from letta.embeddings import embedding_model, parse_and_chunk_text
from letta.orm.errors import NoResultFound
from letta.orm.passage import AgentPassage, SourcePassage
@@ -218,3 +219,16 @@ class PassageManager:
"""
with self.session_maker() as session:
return AgentPassage.size(db_session=session, actor=actor, agent_id=agent_id)
def estimate_embeddings_size_GB(
self,
actor: PydanticUser,
agent_id: Optional[str] = None,
) -> float:
"""
Estimate the size of the embeddings in GB.
"""
BYTES_PER_EMBEDDING_DIM = 4
BYTES_PER_GB = 1024 * 1024 * 1024
GB_PER_EMBEDDING = BYTES_PER_EMBEDDING_DIM / BYTES_PER_GB * MAX_EMBEDDING_DIM
return self.size(actor=actor, agent_id=agent_id) * GB_PER_EMBEDDING