feat: endpoint for current size in GB of the organization's embeddings (#1880)
docs: reorganize embedding models
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
from letta.server.rest_api.routers.v1.agents import router as agents_router
|
||||
from letta.server.rest_api.routers.v1.blocks import router as blocks_router
|
||||
from letta.server.rest_api.routers.v1.embeddings import router as embeddings_router
|
||||
from letta.server.rest_api.routers.v1.groups import router as groups_router
|
||||
from letta.server.rest_api.routers.v1.health import router as health_router
|
||||
from letta.server.rest_api.routers.v1.identities import router as identities_router
|
||||
@@ -32,4 +33,5 @@ ROUTERS = [
|
||||
tags_router,
|
||||
messages_router,
|
||||
voice_router,
|
||||
embeddings_router,
|
||||
]
|
||||
|
||||
20
letta/server/rest_api/routers/v1/embeddings.py
Normal file
20
letta/server/rest_api/routers/v1/embeddings.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, Header
|
||||
|
||||
from letta.server.rest_api.utils import get_letta_server
|
||||
from letta.server.server import SyncServer
|
||||
|
||||
router = APIRouter(prefix="/embeddings", tags=["embeddings"])
|
||||
|
||||
|
||||
@router.get("/total_storage_size", response_model=float, operation_id="get_total_storage_size")
|
||||
def get_embeddings_storage_size(
|
||||
server: SyncServer = Depends(get_letta_server),
|
||||
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
||||
):
|
||||
"""
|
||||
Get the total size of all embeddings in the database for a user in GB.
|
||||
"""
|
||||
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
||||
return server.passage_manager.estimate_embeddings_size_GB(actor=actor)
|
||||
@@ -13,7 +13,7 @@ router = APIRouter(prefix="/models", tags=["models", "llms"])
|
||||
|
||||
|
||||
@router.get("/", response_model=List[LLMConfig], operation_id="list_models")
|
||||
def list_llm_backends(
|
||||
def list_llm_models(
|
||||
server: "SyncServer" = Depends(get_letta_server),
|
||||
):
|
||||
|
||||
@@ -23,7 +23,7 @@ def list_llm_backends(
|
||||
|
||||
|
||||
@router.get("/embedding", response_model=List[EmbeddingConfig], operation_id="list_embedding_models")
|
||||
def list_embedding_backends(
|
||||
def list_embedding_models(
|
||||
server: "SyncServer" = Depends(get_letta_server),
|
||||
):
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import List, Optional
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from letta.constants import MAX_EMBEDDING_DIM
|
||||
from letta.embeddings import embedding_model, parse_and_chunk_text
|
||||
from letta.orm.errors import NoResultFound
|
||||
from letta.orm.passage import AgentPassage, SourcePassage
|
||||
@@ -218,3 +219,16 @@ class PassageManager:
|
||||
"""
|
||||
with self.session_maker() as session:
|
||||
return AgentPassage.size(db_session=session, actor=actor, agent_id=agent_id)
|
||||
|
||||
def estimate_embeddings_size_GB(
|
||||
self,
|
||||
actor: PydanticUser,
|
||||
agent_id: Optional[str] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Estimate the size of the embeddings in GB.
|
||||
"""
|
||||
BYTES_PER_EMBEDDING_DIM = 4
|
||||
BYTES_PER_GB = 1024 * 1024 * 1024
|
||||
GB_PER_EMBEDDING = BYTES_PER_EMBEDDING_DIM / BYTES_PER_GB * MAX_EMBEDDING_DIM
|
||||
return self.size(actor=actor, agent_id=agent_id) * GB_PER_EMBEDDING
|
||||
|
||||
Reference in New Issue
Block a user