feat: add usage columns to steps table (#9270)

* feat: add usage columns to steps table

Adds denormalized usage fields to the steps table for easier querying:
- model_handle: The model handle (e.g., "openai/gpt-4o-mini")
- cached_input_tokens: Tokens served from cache
- cache_write_tokens: Tokens written to cache (Anthropic)
- reasoning_tokens: Reasoning/thinking tokens

These fields mirror LettaUsageStatistics and are extracted from the
existing prompt_tokens_details and completion_tokens_details JSON columns.

🤖 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* chore: regenerate OpenAPI specs and SDK for usage columns

🤖 Generated with [Letta Code](https://letta.com)

Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>

---------

Co-authored-by: Letta <noreply@letta.com>
Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com>
Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>
This commit is contained in:
Sarah Wooders
2026-02-04 12:24:52 -08:00
committed by Caren Thomas
parent f957beaa37
commit e0a23f7039
7 changed files with 145 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
"""add_usage_columns_to_steps
Revision ID: 3e54e2fa2f7e
Revises: a1b2c3d4e5f8
Create Date: 2026-02-03 16:35:51.327031
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
# revision identifiers, used by Alembic.
revision: str = "3e54e2fa2f7e"
down_revision: Union[str, None] = "a1b2c3d4e5f8"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column("steps", sa.Column("model_handle", sa.String(), nullable=True))
op.add_column("steps", sa.Column("cached_input_tokens", sa.Integer(), nullable=True))
op.add_column("steps", sa.Column("cache_write_tokens", sa.Integer(), nullable=True))
op.add_column("steps", sa.Column("reasoning_tokens", sa.Integer(), nullable=True))
def downgrade() -> None:
op.drop_column("steps", "reasoning_tokens")
op.drop_column("steps", "cache_write_tokens")
op.drop_column("steps", "cached_input_tokens")
op.drop_column("steps", "model_handle")

View File

@@ -42460,6 +42460,18 @@
"title": "Model",
"description": "The name of the model used for this step."
},
"model_handle": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"title": "Model Handle",
"description": "The model handle (e.g., 'openai/gpt-4o-mini') used for this step."
},
"model_endpoint": {
"anyOf": [
{
@@ -42520,6 +42532,42 @@
"title": "Total Tokens",
"description": "The total number of tokens processed by the agent during this step."
},
"cached_input_tokens": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "Cached Input Tokens",
"description": "The number of input tokens served from cache. None if not reported by provider."
},
"cache_write_tokens": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "Cache Write Tokens",
"description": "The number of input tokens written to cache (Anthropic only). None if not reported by provider."
},
"reasoning_tokens": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "Reasoning Tokens",
"description": "The number of reasoning/thinking tokens generated. None if not reported by provider."
},
"completion_tokens_details": {
"anyOf": [
{

View File

@@ -311,6 +311,7 @@ class LettaAgent(BaseAgent):
step_id=step_id,
project_id=agent_state.project_id,
status=StepStatus.PENDING,
model_handle=agent_state.llm_config.handle,
)
# Only use step_id in messages if step was actually created
effective_step_id = step_id if logged_step else None
@@ -645,6 +646,7 @@ class LettaAgent(BaseAgent):
step_id=step_id,
project_id=agent_state.project_id,
status=StepStatus.PENDING,
model_handle=agent_state.llm_config.handle,
)
# Only use step_id in messages if step was actually created
effective_step_id = step_id if logged_step else None
@@ -981,6 +983,7 @@ class LettaAgent(BaseAgent):
step_id=step_id,
project_id=agent_state.project_id,
status=StepStatus.PENDING,
model_handle=agent_state.llm_config.handle,
)
# Only use step_id in messages if step was actually created
effective_step_id = step_id if logged_step else None

View File

@@ -873,6 +873,7 @@ class LettaAgentV2(BaseAgentV2):
step_id=step_id,
project_id=self.agent_state.project_id,
status=StepStatus.PENDING,
model_handle=self.agent_state.llm_config.handle,
)
# Also create step metrics early and update at the end of the step

View File

@@ -43,6 +43,9 @@ class Step(SqlalchemyBase, ProjectMixin):
provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
provider_category: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The category of the provider used for this step.")
model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
model_handle: Mapped[Optional[str]] = mapped_column(
None, nullable=True, doc="The model handle (e.g., 'openai/gpt-4o-mini') used for this step."
)
model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")
context_window_limit: Mapped[Optional[int]] = mapped_column(
None, nullable=True, doc="The context window limit configured for this step."
@@ -50,6 +53,15 @@ class Step(SqlalchemyBase, ProjectMixin):
completion_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens generated by the agent")
prompt_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens in the prompt")
total_tokens: Mapped[int] = mapped_column(default=0, doc="Total number of tokens processed by the agent")
cached_input_tokens: Mapped[Optional[int]] = mapped_column(
None, nullable=True, doc="Number of input tokens served from cache. None if not reported by provider."
)
cache_write_tokens: Mapped[Optional[int]] = mapped_column(
None, nullable=True, doc="Number of input tokens written to cache (Anthropic only). None if not reported by provider."
)
reasoning_tokens: Mapped[Optional[int]] = mapped_column(
None, nullable=True, doc="Number of reasoning/thinking tokens generated. None if not reported by provider."
)
completion_tokens_details: Mapped[Optional[Dict]] = mapped_column(
JSON, nullable=True, doc="Detailed completion token breakdown (e.g., reasoning_tokens)."
)

View File

@@ -25,11 +25,21 @@ class Step(StepBase):
provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
provider_category: Optional[str] = Field(None, description="The category of the provider used for this step.")
model: Optional[str] = Field(None, description="The name of the model used for this step.")
model_handle: Optional[str] = Field(None, description="The model handle (e.g., 'openai/gpt-4o-mini') used for this step.")
model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.")
context_window_limit: Optional[int] = Field(None, description="The context window limit configured for this step.")
completion_tokens: Optional[int] = Field(None, description="The number of tokens generated by the agent during this step.")
prompt_tokens: Optional[int] = Field(None, description="The number of tokens in the prompt during this step.")
total_tokens: Optional[int] = Field(None, description="The total number of tokens processed by the agent during this step.")
cached_input_tokens: Optional[int] = Field(
None, description="The number of input tokens served from cache. None if not reported by provider."
)
cache_write_tokens: Optional[int] = Field(
None, description="The number of input tokens written to cache (Anthropic only). None if not reported by provider."
)
reasoning_tokens: Optional[int] = Field(
None, description="The number of reasoning/thinking tokens generated. None if not reported by provider."
)
completion_tokens_details: Optional[Dict] = Field(None, description="Detailed completion token breakdown (e.g., reasoning_tokens).")
prompt_tokens_details: Optional[Dict] = Field(
None, description="Detailed prompt token breakdown (e.g., cached_tokens, cache_read_tokens, cache_creation_tokens)."

View File

@@ -19,6 +19,7 @@ from letta.schemas.message import Message as PydanticMessage
from letta.schemas.openai.chat_completion_response import UsageStatistics
from letta.schemas.step import Step as PydanticStep
from letta.schemas.step_metrics import StepMetrics as PydanticStepMetrics
from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens
from letta.schemas.user import User as PydanticUser
from letta.server.db import db_registry
from letta.server.rest_api.middleware.request_id import get_request_id
@@ -107,6 +108,26 @@ class StepManager:
error_type: Optional[str] = None,
error_data: Optional[Dict] = None,
) -> PydanticStep:
# Extract normalized usage fields
cached_input_tokens = None
cache_write_tokens = None
reasoning_tokens = None
prompt_tokens_details = None
completion_tokens_details = None
if usage.prompt_tokens_details:
prompt_tokens_details = usage.prompt_tokens_details.model_dump()
cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details)
if cached_input > 0:
cached_input_tokens = cached_input
if cache_write > 0:
cache_write_tokens = cache_write
if usage.completion_tokens_details:
completion_tokens_details = usage.completion_tokens_details.model_dump()
reasoning = normalize_reasoning_tokens(usage.completion_tokens_details)
if reasoning > 0:
reasoning_tokens = reasoning
step_data = {
"origin": None,
"organization_id": actor.organization_id,
@@ -115,11 +136,17 @@ class StepManager:
"provider_name": provider_name,
"provider_category": provider_category,
"model": model,
"model_handle": model_handle,
"model_endpoint": model_endpoint,
"context_window_limit": context_window_limit,
"completion_tokens": usage.completion_tokens,
"prompt_tokens": usage.prompt_tokens,
"total_tokens": usage.total_tokens,
"cached_input_tokens": cached_input_tokens,
"cache_write_tokens": cache_write_tokens,
"reasoning_tokens": reasoning_tokens,
"prompt_tokens_details": prompt_tokens_details,
"completion_tokens_details": completion_tokens_details,
"run_id": run_id,
"tags": [],
"tid": None,
@@ -166,6 +193,7 @@ class StepManager:
error_type: Optional[str] = None,
error_data: Optional[Dict] = None,
allow_partial: Optional[bool] = False,
model_handle: Optional[str] = None,
) -> PydanticStep:
step_data = {
"origin": None,
@@ -416,8 +444,18 @@ class StepManager:
# Persist detailed token breakdowns if available
if usage.prompt_tokens_details:
step.prompt_tokens_details = usage.prompt_tokens_details.model_dump()
# Extract normalized cache tokens
cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details)
if cached_input > 0:
step.cached_input_tokens = cached_input
if cache_write > 0:
step.cache_write_tokens = cache_write
if usage.completion_tokens_details:
step.completion_tokens_details = usage.completion_tokens_details.model_dump()
# Extract normalized reasoning tokens
reasoning = normalize_reasoning_tokens(usage.completion_tokens_details)
if reasoning > 0:
step.reasoning_tokens = reasoning
# context manager now handles commits
# await session.commit()