feat: add usage columns to steps table (#9270)

* feat: add usage columns to steps table Adds denormalized usage fields to the steps table for easier querying: - model_handle: The model handle (e.g., "openai/gpt-4o-mini") - cached_input_tokens: Tokens served from cache - cache_write_tokens: Tokens written to cache (Anthropic) - reasoning_tokens: Reasoning/thinking tokens These fields mirror LettaUsageStatistics and are extracted from the existing prompt_tokens_details and completion_tokens_details JSON columns. 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * chore: regenerate OpenAPI specs and SDK for usage columns 🤖 Generated with [Letta Code](https://letta.com) Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com> --------- Co-authored-by: Letta <noreply@letta.com> Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>
2026-02-04 12:24:52 -08:00
parent f957beaa37
commit e0a23f7039
7 changed files with 145 additions and 0 deletions
--- a/alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py
+++ b/alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py
@@ -0,0 +1,33 @@
+"""add_usage_columns_to_steps
+
+Revision ID: 3e54e2fa2f7e
+Revises: a1b2c3d4e5f8
+Create Date: 2026-02-03 16:35:51.327031
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "3e54e2fa2f7e"
+down_revision: Union[str, None] = "a1b2c3d4e5f8"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.add_column("steps", sa.Column("model_handle", sa.String(), nullable=True))
+    op.add_column("steps", sa.Column("cached_input_tokens", sa.Integer(), nullable=True))
+    op.add_column("steps", sa.Column("cache_write_tokens", sa.Integer(), nullable=True))
+    op.add_column("steps", sa.Column("reasoning_tokens", sa.Integer(), nullable=True))
+
+
+def downgrade() -> None:
+    op.drop_column("steps", "reasoning_tokens")
+    op.drop_column("steps", "cache_write_tokens")
+    op.drop_column("steps", "cached_input_tokens")
+    op.drop_column("steps", "model_handle")
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -42460,6 +42460,18 @@
            "title": "Model",
            "description": "The name of the model used for this step."
          },
+          "model_handle": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Model Handle",
+            "description": "The model handle (e.g., 'openai/gpt-4o-mini') used for this step."
+          },
          "model_endpoint": {
            "anyOf": [
              {
@@ -42520,6 +42532,42 @@
            "title": "Total Tokens",
            "description": "The total number of tokens processed by the agent during this step."
          },
+          "cached_input_tokens": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cached Input Tokens",
+            "description": "The number of input tokens served from cache. None if not reported by provider."
+          },
+          "cache_write_tokens": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cache Write Tokens",
+            "description": "The number of input tokens written to cache (Anthropic only). None if not reported by provider."
+          },
+          "reasoning_tokens": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Reasoning Tokens",
+            "description": "The number of reasoning/thinking tokens generated. None if not reported by provider."
+          },
          "completion_tokens_details": {
            "anyOf": [
              {
--- a/letta/agents/letta_agent.py
+++ b/letta/agents/letta_agent.py
@@ -311,6 +311,7 @@ class LettaAgent(BaseAgent):
                    step_id=step_id,
                    project_id=agent_state.project_id,
                    status=StepStatus.PENDING,
+                    model_handle=agent_state.llm_config.handle,
                )
                # Only use step_id in messages if step was actually created
                effective_step_id = step_id if logged_step else None
@@ -645,6 +646,7 @@ class LettaAgent(BaseAgent):
                    step_id=step_id,
                    project_id=agent_state.project_id,
                    status=StepStatus.PENDING,
+                    model_handle=agent_state.llm_config.handle,
                )
                # Only use step_id in messages if step was actually created
                effective_step_id = step_id if logged_step else None
@@ -981,6 +983,7 @@ class LettaAgent(BaseAgent):
                    step_id=step_id,
                    project_id=agent_state.project_id,
                    status=StepStatus.PENDING,
+                    model_handle=agent_state.llm_config.handle,
                )
                # Only use step_id in messages if step was actually created
                effective_step_id = step_id if logged_step else None
--- a/letta/agents/letta_agent_v2.py
+++ b/letta/agents/letta_agent_v2.py
@@ -873,6 +873,7 @@ class LettaAgentV2(BaseAgentV2):
            step_id=step_id,
            project_id=self.agent_state.project_id,
            status=StepStatus.PENDING,
+            model_handle=self.agent_state.llm_config.handle,
        )

        # Also create step metrics early and update at the end of the step
--- a/letta/orm/step.py
+++ b/letta/orm/step.py
@@ -43,6 +43,9 @@ class Step(SqlalchemyBase, ProjectMixin):
    provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
    provider_category: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The category of the provider used for this step.")
    model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
+    model_handle: Mapped[Optional[str]] = mapped_column(
+        None, nullable=True, doc="The model handle (e.g., 'openai/gpt-4o-mini') used for this step."
+    )
    model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")
    context_window_limit: Mapped[Optional[int]] = mapped_column(
        None, nullable=True, doc="The context window limit configured for this step."
@@ -50,6 +53,15 @@ class Step(SqlalchemyBase, ProjectMixin):
    completion_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens generated by the agent")
    prompt_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens in the prompt")
    total_tokens: Mapped[int] = mapped_column(default=0, doc="Total number of tokens processed by the agent")
+    cached_input_tokens: Mapped[Optional[int]] = mapped_column(
+        None, nullable=True, doc="Number of input tokens served from cache. None if not reported by provider."
+    )
+    cache_write_tokens: Mapped[Optional[int]] = mapped_column(
+        None, nullable=True, doc="Number of input tokens written to cache (Anthropic only). None if not reported by provider."
+    )
+    reasoning_tokens: Mapped[Optional[int]] = mapped_column(
+        None, nullable=True, doc="Number of reasoning/thinking tokens generated. None if not reported by provider."
+    )
    completion_tokens_details: Mapped[Optional[Dict]] = mapped_column(
        JSON, nullable=True, doc="Detailed completion token breakdown (e.g., reasoning_tokens)."
    )
--- a/letta/schemas/step.py
+++ b/letta/schemas/step.py
@@ -25,11 +25,21 @@ class Step(StepBase):
    provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
    provider_category: Optional[str] = Field(None, description="The category of the provider used for this step.")
    model: Optional[str] = Field(None, description="The name of the model used for this step.")
+    model_handle: Optional[str] = Field(None, description="The model handle (e.g., 'openai/gpt-4o-mini') used for this step.")
    model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.")
    context_window_limit: Optional[int] = Field(None, description="The context window limit configured for this step.")
    completion_tokens: Optional[int] = Field(None, description="The number of tokens generated by the agent during this step.")
    prompt_tokens: Optional[int] = Field(None, description="The number of tokens in the prompt during this step.")
    total_tokens: Optional[int] = Field(None, description="The total number of tokens processed by the agent during this step.")
+    cached_input_tokens: Optional[int] = Field(
+        None, description="The number of input tokens served from cache. None if not reported by provider."
+    )
+    cache_write_tokens: Optional[int] = Field(
+        None, description="The number of input tokens written to cache (Anthropic only). None if not reported by provider."
+    )
+    reasoning_tokens: Optional[int] = Field(
+        None, description="The number of reasoning/thinking tokens generated. None if not reported by provider."
+    )
    completion_tokens_details: Optional[Dict] = Field(None, description="Detailed completion token breakdown (e.g., reasoning_tokens).")
    prompt_tokens_details: Optional[Dict] = Field(
        None, description="Detailed prompt token breakdown (e.g., cached_tokens, cache_read_tokens, cache_creation_tokens)."
--- a/letta/services/step_manager.py
+++ b/letta/services/step_manager.py
@@ -19,6 +19,7 @@ from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_response import UsageStatistics
 from letta.schemas.step import Step as PydanticStep
 from letta.schemas.step_metrics import StepMetrics as PydanticStepMetrics
+from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens
 from letta.schemas.user import User as PydanticUser
 from letta.server.db import db_registry
 from letta.server.rest_api.middleware.request_id import get_request_id
@@ -107,6 +108,26 @@ class StepManager:
        error_type: Optional[str] = None,
        error_data: Optional[Dict] = None,
    ) -> PydanticStep:
+        # Extract normalized usage fields
+        cached_input_tokens = None
+        cache_write_tokens = None
+        reasoning_tokens = None
+        prompt_tokens_details = None
+        completion_tokens_details = None
+
+        if usage.prompt_tokens_details:
+            prompt_tokens_details = usage.prompt_tokens_details.model_dump()
+            cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details)
+            if cached_input > 0:
+                cached_input_tokens = cached_input
+            if cache_write > 0:
+                cache_write_tokens = cache_write
+        if usage.completion_tokens_details:
+            completion_tokens_details = usage.completion_tokens_details.model_dump()
+            reasoning = normalize_reasoning_tokens(usage.completion_tokens_details)
+            if reasoning > 0:
+                reasoning_tokens = reasoning
+
        step_data = {
            "origin": None,
            "organization_id": actor.organization_id,
@@ -115,11 +136,17 @@ class StepManager:
            "provider_name": provider_name,
            "provider_category": provider_category,
            "model": model,
+            "model_handle": model_handle,
            "model_endpoint": model_endpoint,
            "context_window_limit": context_window_limit,
            "completion_tokens": usage.completion_tokens,
            "prompt_tokens": usage.prompt_tokens,
            "total_tokens": usage.total_tokens,
+            "cached_input_tokens": cached_input_tokens,
+            "cache_write_tokens": cache_write_tokens,
+            "reasoning_tokens": reasoning_tokens,
+            "prompt_tokens_details": prompt_tokens_details,
+            "completion_tokens_details": completion_tokens_details,
            "run_id": run_id,
            "tags": [],
            "tid": None,
@@ -166,6 +193,7 @@ class StepManager:
        error_type: Optional[str] = None,
        error_data: Optional[Dict] = None,
        allow_partial: Optional[bool] = False,
+        model_handle: Optional[str] = None,
    ) -> PydanticStep:
        step_data = {
            "origin": None,
@@ -416,8 +444,18 @@ class StepManager:
            # Persist detailed token breakdowns if available
            if usage.prompt_tokens_details:
                step.prompt_tokens_details = usage.prompt_tokens_details.model_dump()
+                # Extract normalized cache tokens
+                cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details)
+                if cached_input > 0:
+                    step.cached_input_tokens = cached_input
+                if cache_write > 0:
+                    step.cache_write_tokens = cache_write
            if usage.completion_tokens_details:
                step.completion_tokens_details = usage.completion_tokens_details.model_dump()
+                # Extract normalized reasoning tokens
+                reasoning = normalize_reasoning_tokens(usage.completion_tokens_details)
+                if reasoning > 0:
+                    step.reasoning_tokens = reasoning

            # context manager now handles commits
            # await session.commit()