feat: add usage columns to steps table (#9270)
* feat: add usage columns to steps table Adds denormalized usage fields to the steps table for easier querying: - model_handle: The model handle (e.g., "openai/gpt-4o-mini") - cached_input_tokens: Tokens served from cache - cache_write_tokens: Tokens written to cache (Anthropic) - reasoning_tokens: Reasoning/thinking tokens These fields mirror LettaUsageStatistics and are extracted from the existing prompt_tokens_details and completion_tokens_details JSON columns. 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * chore: regenerate OpenAPI specs and SDK for usage columns 🤖 Generated with [Letta Code](https://letta.com) Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com> --------- Co-authored-by: Letta <noreply@letta.com> Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>
This commit is contained in:
committed by
Caren Thomas
parent
f957beaa37
commit
e0a23f7039
33
alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py
Normal file
33
alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""add_usage_columns_to_steps
|
||||
|
||||
Revision ID: 3e54e2fa2f7e
|
||||
Revises: a1b2c3d4e5f8
|
||||
Create Date: 2026-02-03 16:35:51.327031
|
||||
|
||||
"""
|
||||
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "3e54e2fa2f7e"
|
||||
down_revision: Union[str, None] = "a1b2c3d4e5f8"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.add_column("steps", sa.Column("model_handle", sa.String(), nullable=True))
|
||||
op.add_column("steps", sa.Column("cached_input_tokens", sa.Integer(), nullable=True))
|
||||
op.add_column("steps", sa.Column("cache_write_tokens", sa.Integer(), nullable=True))
|
||||
op.add_column("steps", sa.Column("reasoning_tokens", sa.Integer(), nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_column("steps", "reasoning_tokens")
|
||||
op.drop_column("steps", "cache_write_tokens")
|
||||
op.drop_column("steps", "cached_input_tokens")
|
||||
op.drop_column("steps", "model_handle")
|
||||
@@ -42460,6 +42460,18 @@
|
||||
"title": "Model",
|
||||
"description": "The name of the model used for this step."
|
||||
},
|
||||
"model_handle": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Model Handle",
|
||||
"description": "The model handle (e.g., 'openai/gpt-4o-mini') used for this step."
|
||||
},
|
||||
"model_endpoint": {
|
||||
"anyOf": [
|
||||
{
|
||||
@@ -42520,6 +42532,42 @@
|
||||
"title": "Total Tokens",
|
||||
"description": "The total number of tokens processed by the agent during this step."
|
||||
},
|
||||
"cached_input_tokens": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Cached Input Tokens",
|
||||
"description": "The number of input tokens served from cache. None if not reported by provider."
|
||||
},
|
||||
"cache_write_tokens": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Cache Write Tokens",
|
||||
"description": "The number of input tokens written to cache (Anthropic only). None if not reported by provider."
|
||||
},
|
||||
"reasoning_tokens": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Reasoning Tokens",
|
||||
"description": "The number of reasoning/thinking tokens generated. None if not reported by provider."
|
||||
},
|
||||
"completion_tokens_details": {
|
||||
"anyOf": [
|
||||
{
|
||||
|
||||
@@ -311,6 +311,7 @@ class LettaAgent(BaseAgent):
|
||||
step_id=step_id,
|
||||
project_id=agent_state.project_id,
|
||||
status=StepStatus.PENDING,
|
||||
model_handle=agent_state.llm_config.handle,
|
||||
)
|
||||
# Only use step_id in messages if step was actually created
|
||||
effective_step_id = step_id if logged_step else None
|
||||
@@ -645,6 +646,7 @@ class LettaAgent(BaseAgent):
|
||||
step_id=step_id,
|
||||
project_id=agent_state.project_id,
|
||||
status=StepStatus.PENDING,
|
||||
model_handle=agent_state.llm_config.handle,
|
||||
)
|
||||
# Only use step_id in messages if step was actually created
|
||||
effective_step_id = step_id if logged_step else None
|
||||
@@ -981,6 +983,7 @@ class LettaAgent(BaseAgent):
|
||||
step_id=step_id,
|
||||
project_id=agent_state.project_id,
|
||||
status=StepStatus.PENDING,
|
||||
model_handle=agent_state.llm_config.handle,
|
||||
)
|
||||
# Only use step_id in messages if step was actually created
|
||||
effective_step_id = step_id if logged_step else None
|
||||
|
||||
@@ -873,6 +873,7 @@ class LettaAgentV2(BaseAgentV2):
|
||||
step_id=step_id,
|
||||
project_id=self.agent_state.project_id,
|
||||
status=StepStatus.PENDING,
|
||||
model_handle=self.agent_state.llm_config.handle,
|
||||
)
|
||||
|
||||
# Also create step metrics early and update at the end of the step
|
||||
|
||||
@@ -43,6 +43,9 @@ class Step(SqlalchemyBase, ProjectMixin):
|
||||
provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
|
||||
provider_category: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The category of the provider used for this step.")
|
||||
model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
|
||||
model_handle: Mapped[Optional[str]] = mapped_column(
|
||||
None, nullable=True, doc="The model handle (e.g., 'openai/gpt-4o-mini') used for this step."
|
||||
)
|
||||
model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")
|
||||
context_window_limit: Mapped[Optional[int]] = mapped_column(
|
||||
None, nullable=True, doc="The context window limit configured for this step."
|
||||
@@ -50,6 +53,15 @@ class Step(SqlalchemyBase, ProjectMixin):
|
||||
completion_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens generated by the agent")
|
||||
prompt_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens in the prompt")
|
||||
total_tokens: Mapped[int] = mapped_column(default=0, doc="Total number of tokens processed by the agent")
|
||||
cached_input_tokens: Mapped[Optional[int]] = mapped_column(
|
||||
None, nullable=True, doc="Number of input tokens served from cache. None if not reported by provider."
|
||||
)
|
||||
cache_write_tokens: Mapped[Optional[int]] = mapped_column(
|
||||
None, nullable=True, doc="Number of input tokens written to cache (Anthropic only). None if not reported by provider."
|
||||
)
|
||||
reasoning_tokens: Mapped[Optional[int]] = mapped_column(
|
||||
None, nullable=True, doc="Number of reasoning/thinking tokens generated. None if not reported by provider."
|
||||
)
|
||||
completion_tokens_details: Mapped[Optional[Dict]] = mapped_column(
|
||||
JSON, nullable=True, doc="Detailed completion token breakdown (e.g., reasoning_tokens)."
|
||||
)
|
||||
|
||||
@@ -25,11 +25,21 @@ class Step(StepBase):
|
||||
provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
|
||||
provider_category: Optional[str] = Field(None, description="The category of the provider used for this step.")
|
||||
model: Optional[str] = Field(None, description="The name of the model used for this step.")
|
||||
model_handle: Optional[str] = Field(None, description="The model handle (e.g., 'openai/gpt-4o-mini') used for this step.")
|
||||
model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.")
|
||||
context_window_limit: Optional[int] = Field(None, description="The context window limit configured for this step.")
|
||||
completion_tokens: Optional[int] = Field(None, description="The number of tokens generated by the agent during this step.")
|
||||
prompt_tokens: Optional[int] = Field(None, description="The number of tokens in the prompt during this step.")
|
||||
total_tokens: Optional[int] = Field(None, description="The total number of tokens processed by the agent during this step.")
|
||||
cached_input_tokens: Optional[int] = Field(
|
||||
None, description="The number of input tokens served from cache. None if not reported by provider."
|
||||
)
|
||||
cache_write_tokens: Optional[int] = Field(
|
||||
None, description="The number of input tokens written to cache (Anthropic only). None if not reported by provider."
|
||||
)
|
||||
reasoning_tokens: Optional[int] = Field(
|
||||
None, description="The number of reasoning/thinking tokens generated. None if not reported by provider."
|
||||
)
|
||||
completion_tokens_details: Optional[Dict] = Field(None, description="Detailed completion token breakdown (e.g., reasoning_tokens).")
|
||||
prompt_tokens_details: Optional[Dict] = Field(
|
||||
None, description="Detailed prompt token breakdown (e.g., cached_tokens, cache_read_tokens, cache_creation_tokens)."
|
||||
|
||||
@@ -19,6 +19,7 @@ from letta.schemas.message import Message as PydanticMessage
|
||||
from letta.schemas.openai.chat_completion_response import UsageStatistics
|
||||
from letta.schemas.step import Step as PydanticStep
|
||||
from letta.schemas.step_metrics import StepMetrics as PydanticStepMetrics
|
||||
from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens
|
||||
from letta.schemas.user import User as PydanticUser
|
||||
from letta.server.db import db_registry
|
||||
from letta.server.rest_api.middleware.request_id import get_request_id
|
||||
@@ -107,6 +108,26 @@ class StepManager:
|
||||
error_type: Optional[str] = None,
|
||||
error_data: Optional[Dict] = None,
|
||||
) -> PydanticStep:
|
||||
# Extract normalized usage fields
|
||||
cached_input_tokens = None
|
||||
cache_write_tokens = None
|
||||
reasoning_tokens = None
|
||||
prompt_tokens_details = None
|
||||
completion_tokens_details = None
|
||||
|
||||
if usage.prompt_tokens_details:
|
||||
prompt_tokens_details = usage.prompt_tokens_details.model_dump()
|
||||
cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details)
|
||||
if cached_input > 0:
|
||||
cached_input_tokens = cached_input
|
||||
if cache_write > 0:
|
||||
cache_write_tokens = cache_write
|
||||
if usage.completion_tokens_details:
|
||||
completion_tokens_details = usage.completion_tokens_details.model_dump()
|
||||
reasoning = normalize_reasoning_tokens(usage.completion_tokens_details)
|
||||
if reasoning > 0:
|
||||
reasoning_tokens = reasoning
|
||||
|
||||
step_data = {
|
||||
"origin": None,
|
||||
"organization_id": actor.organization_id,
|
||||
@@ -115,11 +136,17 @@ class StepManager:
|
||||
"provider_name": provider_name,
|
||||
"provider_category": provider_category,
|
||||
"model": model,
|
||||
"model_handle": model_handle,
|
||||
"model_endpoint": model_endpoint,
|
||||
"context_window_limit": context_window_limit,
|
||||
"completion_tokens": usage.completion_tokens,
|
||||
"prompt_tokens": usage.prompt_tokens,
|
||||
"total_tokens": usage.total_tokens,
|
||||
"cached_input_tokens": cached_input_tokens,
|
||||
"cache_write_tokens": cache_write_tokens,
|
||||
"reasoning_tokens": reasoning_tokens,
|
||||
"prompt_tokens_details": prompt_tokens_details,
|
||||
"completion_tokens_details": completion_tokens_details,
|
||||
"run_id": run_id,
|
||||
"tags": [],
|
||||
"tid": None,
|
||||
@@ -166,6 +193,7 @@ class StepManager:
|
||||
error_type: Optional[str] = None,
|
||||
error_data: Optional[Dict] = None,
|
||||
allow_partial: Optional[bool] = False,
|
||||
model_handle: Optional[str] = None,
|
||||
) -> PydanticStep:
|
||||
step_data = {
|
||||
"origin": None,
|
||||
@@ -416,8 +444,18 @@ class StepManager:
|
||||
# Persist detailed token breakdowns if available
|
||||
if usage.prompt_tokens_details:
|
||||
step.prompt_tokens_details = usage.prompt_tokens_details.model_dump()
|
||||
# Extract normalized cache tokens
|
||||
cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details)
|
||||
if cached_input > 0:
|
||||
step.cached_input_tokens = cached_input
|
||||
if cache_write > 0:
|
||||
step.cache_write_tokens = cache_write
|
||||
if usage.completion_tokens_details:
|
||||
step.completion_tokens_details = usage.completion_tokens_details.model_dump()
|
||||
# Extract normalized reasoning tokens
|
||||
reasoning = normalize_reasoning_tokens(usage.completion_tokens_details)
|
||||
if reasoning > 0:
|
||||
step.reasoning_tokens = reasoning
|
||||
|
||||
# context manager now handles commits
|
||||
# await session.commit()
|
||||
|
||||
Reference in New Issue
Block a user