diff --git a/letta/server/rest_api/routers/v1/__init__.py b/letta/server/rest_api/routers/v1/__init__.py index d08a1d82..666aeedc 100644 --- a/letta/server/rest_api/routers/v1/__init__.py +++ b/letta/server/rest_api/routers/v1/__init__.py @@ -1,5 +1,6 @@ from letta.server.rest_api.routers.v1.agents import router as agents_router from letta.server.rest_api.routers.v1.blocks import router as blocks_router +from letta.server.rest_api.routers.v1.embeddings import router as embeddings_router from letta.server.rest_api.routers.v1.groups import router as groups_router from letta.server.rest_api.routers.v1.health import router as health_router from letta.server.rest_api.routers.v1.identities import router as identities_router @@ -32,4 +33,5 @@ ROUTERS = [ tags_router, messages_router, voice_router, + embeddings_router, ] diff --git a/letta/server/rest_api/routers/v1/embeddings.py b/letta/server/rest_api/routers/v1/embeddings.py new file mode 100644 index 00000000..5b5d51cd --- /dev/null +++ b/letta/server/rest_api/routers/v1/embeddings.py @@ -0,0 +1,20 @@ +from typing import Optional + +from fastapi import APIRouter, Depends, Header + +from letta.server.rest_api.utils import get_letta_server +from letta.server.server import SyncServer + +router = APIRouter(prefix="/embeddings", tags=["embeddings"]) + + +@router.get("/total_storage_size", response_model=float, operation_id="get_total_storage_size") +def get_embeddings_storage_size( + server: SyncServer = Depends(get_letta_server), + actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present +): + """ + Get the total size of all embeddings in the database for a user in GB. + """ + actor = server.user_manager.get_user_or_default(user_id=actor_id) + return server.passage_manager.estimate_embeddings_size_GB(actor=actor) diff --git a/letta/server/rest_api/routers/v1/llms.py b/letta/server/rest_api/routers/v1/llms.py index 2c05aa48..173b1a57 100644 --- a/letta/server/rest_api/routers/v1/llms.py +++ b/letta/server/rest_api/routers/v1/llms.py @@ -13,7 +13,7 @@ router = APIRouter(prefix="/models", tags=["models", "llms"]) @router.get("/", response_model=List[LLMConfig], operation_id="list_models") -def list_llm_backends( +def list_llm_models( server: "SyncServer" = Depends(get_letta_server), ): @@ -23,7 +23,7 @@ def list_llm_backends( @router.get("/embedding", response_model=List[EmbeddingConfig], operation_id="list_embedding_models") -def list_embedding_backends( +def list_embedding_models( server: "SyncServer" = Depends(get_letta_server), ): diff --git a/letta/services/passage_manager.py b/letta/services/passage_manager.py index 6dc25e1a..6cc3b3a6 100644 --- a/letta/services/passage_manager.py +++ b/letta/services/passage_manager.py @@ -3,6 +3,7 @@ from typing import List, Optional from openai import OpenAI +from letta.constants import MAX_EMBEDDING_DIM from letta.embeddings import embedding_model, parse_and_chunk_text from letta.orm.errors import NoResultFound from letta.orm.passage import AgentPassage, SourcePassage @@ -218,3 +219,16 @@ class PassageManager: """ with self.session_maker() as session: return AgentPassage.size(db_session=session, actor=actor, agent_id=agent_id) + + def estimate_embeddings_size_GB( + self, + actor: PydanticUser, + agent_id: Optional[str] = None, + ) -> float: + """ + Estimate the size of the embeddings in GB. + """ + BYTES_PER_EMBEDDING_DIM = 4 + BYTES_PER_GB = 1024 * 1024 * 1024 + GB_PER_EMBEDDING = BYTES_PER_EMBEDDING_DIM / BYTES_PER_GB * MAX_EMBEDDING_DIM + return self.size(actor=actor, agent_id=agent_id) * GB_PER_EMBEDDING