From e0a23f7039ec8de5bed20d3dfd4ca04cdb1a4d03 Mon Sep 17 00:00:00 2001
From: Sarah Wooders <sarahwooders@gmail.com>
Date: Wed, 4 Feb 2026 12:24:52 -0800
Subject: [PATCH] feat: add usage columns to steps table (#9270)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: add usage columns to steps table

Adds denormalized usage fields to the steps table for easier querying:
- model_handle: The model handle (e.g., "openai/gpt-4o-mini")
- cached_input_tokens: Tokens served from cache
- cache_write_tokens: Tokens written to cache (Anthropic)
- reasoning_tokens: Reasoning/thinking tokens

These fields mirror LettaUsageStatistics and are extracted from the
existing prompt_tokens_details and completion_tokens_details JSON columns.

🤖 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* chore: regenerate OpenAPI specs and SDK for usage columns

🤖 Generated with [Letta Code](https://letta.com)

Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>

---------

Co-authored-by: Letta <noreply@letta.com>
Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com>
Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>
---
 ...3e54e2fa2f7e_add_usage_columns_to_steps.py | 33 +++++++++++++
 fern/openapi.json                             | 48 +++++++++++++++++++
 letta/agents/letta_agent.py                   |  3 ++
 letta/agents/letta_agent_v2.py                |  1 +
 letta/orm/step.py                             | 12 +++++
 letta/schemas/step.py                         | 10 ++++
 letta/services/step_manager.py                | 38 +++++++++++++++
 7 files changed, 145 insertions(+)
 create mode 100644 alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py

diff --git a/alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py b/alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py
new file mode 100644
index 00000000..997d0d80
--- /dev/null
+++ b/alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py
@@ -0,0 +1,33 @@
+"""add_usage_columns_to_steps
+
+Revision ID: 3e54e2fa2f7e
+Revises: a1b2c3d4e5f8
+Create Date: 2026-02-03 16:35:51.327031
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "3e54e2fa2f7e"
+down_revision: Union[str, None] = "a1b2c3d4e5f8"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.add_column("steps", sa.Column("model_handle", sa.String(), nullable=True))
+    op.add_column("steps", sa.Column("cached_input_tokens", sa.Integer(), nullable=True))
+    op.add_column("steps", sa.Column("cache_write_tokens", sa.Integer(), nullable=True))
+    op.add_column("steps", sa.Column("reasoning_tokens", sa.Integer(), nullable=True))
+
+
+def downgrade() -> None:
+    op.drop_column("steps", "reasoning_tokens")
+    op.drop_column("steps", "cache_write_tokens")
+    op.drop_column("steps", "cached_input_tokens")
+    op.drop_column("steps", "model_handle")
diff --git a/fern/openapi.json b/fern/openapi.json
index 18c2b474..3230ece0 100644
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -42460,6 +42460,18 @@
             "title": "Model",
             "description": "The name of the model used for this step."
           },
+          "model_handle": {
+            "anyOf": [
+              {
+                "type": "string"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Model Handle",
+            "description": "The model handle (e.g., 'openai/gpt-4o-mini') used for this step."
+          },
           "model_endpoint": {
             "anyOf": [
               {
@@ -42520,6 +42532,42 @@
             "title": "Total Tokens",
             "description": "The total number of tokens processed by the agent during this step."
           },
+          "cached_input_tokens": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cached Input Tokens",
+            "description": "The number of input tokens served from cache. None if not reported by provider."
+          },
+          "cache_write_tokens": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Cache Write Tokens",
+            "description": "The number of input tokens written to cache (Anthropic only). None if not reported by provider."
+          },
+          "reasoning_tokens": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Reasoning Tokens",
+            "description": "The number of reasoning/thinking tokens generated. None if not reported by provider."
+          },
           "completion_tokens_details": {
             "anyOf": [
               {
diff --git a/letta/agents/letta_agent.py b/letta/agents/letta_agent.py
index 8460a46a..246f6d3f 100644
--- a/letta/agents/letta_agent.py
+++ b/letta/agents/letta_agent.py
@@ -311,6 +311,7 @@ class LettaAgent(BaseAgent):
                     step_id=step_id,
                     project_id=agent_state.project_id,
                     status=StepStatus.PENDING,
+                    model_handle=agent_state.llm_config.handle,
                 )
                 # Only use step_id in messages if step was actually created
                 effective_step_id = step_id if logged_step else None
@@ -645,6 +646,7 @@ class LettaAgent(BaseAgent):
                     step_id=step_id,
                     project_id=agent_state.project_id,
                     status=StepStatus.PENDING,
+                    model_handle=agent_state.llm_config.handle,
                 )
                 # Only use step_id in messages if step was actually created
                 effective_step_id = step_id if logged_step else None
@@ -981,6 +983,7 @@ class LettaAgent(BaseAgent):
                     step_id=step_id,
                     project_id=agent_state.project_id,
                     status=StepStatus.PENDING,
+                    model_handle=agent_state.llm_config.handle,
                 )
                 # Only use step_id in messages if step was actually created
                 effective_step_id = step_id if logged_step else None
diff --git a/letta/agents/letta_agent_v2.py b/letta/agents/letta_agent_v2.py
index 85e6b86c..657ac79d 100644
--- a/letta/agents/letta_agent_v2.py
+++ b/letta/agents/letta_agent_v2.py
@@ -873,6 +873,7 @@ class LettaAgentV2(BaseAgentV2):
             step_id=step_id,
             project_id=self.agent_state.project_id,
             status=StepStatus.PENDING,
+            model_handle=self.agent_state.llm_config.handle,
         )
 
         # Also create step metrics early and update at the end of the step
diff --git a/letta/orm/step.py b/letta/orm/step.py
index 13d6d552..64f0353a 100644
--- a/letta/orm/step.py
+++ b/letta/orm/step.py
@@ -43,6 +43,9 @@ class Step(SqlalchemyBase, ProjectMixin):
     provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
     provider_category: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The category of the provider used for this step.")
     model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
+    model_handle: Mapped[Optional[str]] = mapped_column(
+        None, nullable=True, doc="The model handle (e.g., 'openai/gpt-4o-mini') used for this step."
+    )
     model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")
     context_window_limit: Mapped[Optional[int]] = mapped_column(
         None, nullable=True, doc="The context window limit configured for this step."
@@ -50,6 +53,15 @@ class Step(SqlalchemyBase, ProjectMixin):
     completion_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens generated by the agent")
     prompt_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens in the prompt")
     total_tokens: Mapped[int] = mapped_column(default=0, doc="Total number of tokens processed by the agent")
+    cached_input_tokens: Mapped[Optional[int]] = mapped_column(
+        None, nullable=True, doc="Number of input tokens served from cache. None if not reported by provider."
+    )
+    cache_write_tokens: Mapped[Optional[int]] = mapped_column(
+        None, nullable=True, doc="Number of input tokens written to cache (Anthropic only). None if not reported by provider."
+    )
+    reasoning_tokens: Mapped[Optional[int]] = mapped_column(
+        None, nullable=True, doc="Number of reasoning/thinking tokens generated. None if not reported by provider."
+    )
     completion_tokens_details: Mapped[Optional[Dict]] = mapped_column(
         JSON, nullable=True, doc="Detailed completion token breakdown (e.g., reasoning_tokens)."
     )
diff --git a/letta/schemas/step.py b/letta/schemas/step.py
index 83126a96..0c525547 100644
--- a/letta/schemas/step.py
+++ b/letta/schemas/step.py
@@ -25,11 +25,21 @@ class Step(StepBase):
     provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
     provider_category: Optional[str] = Field(None, description="The category of the provider used for this step.")
     model: Optional[str] = Field(None, description="The name of the model used for this step.")
+    model_handle: Optional[str] = Field(None, description="The model handle (e.g., 'openai/gpt-4o-mini') used for this step.")
     model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.")
     context_window_limit: Optional[int] = Field(None, description="The context window limit configured for this step.")
     completion_tokens: Optional[int] = Field(None, description="The number of tokens generated by the agent during this step.")
     prompt_tokens: Optional[int] = Field(None, description="The number of tokens in the prompt during this step.")
     total_tokens: Optional[int] = Field(None, description="The total number of tokens processed by the agent during this step.")
+    cached_input_tokens: Optional[int] = Field(
+        None, description="The number of input tokens served from cache. None if not reported by provider."
+    )
+    cache_write_tokens: Optional[int] = Field(
+        None, description="The number of input tokens written to cache (Anthropic only). None if not reported by provider."
+    )
+    reasoning_tokens: Optional[int] = Field(
+        None, description="The number of reasoning/thinking tokens generated. None if not reported by provider."
+    )
     completion_tokens_details: Optional[Dict] = Field(None, description="Detailed completion token breakdown (e.g., reasoning_tokens).")
     prompt_tokens_details: Optional[Dict] = Field(
         None, description="Detailed prompt token breakdown (e.g., cached_tokens, cache_read_tokens, cache_creation_tokens)."
diff --git a/letta/services/step_manager.py b/letta/services/step_manager.py
index 92733e75..80db383c 100644
--- a/letta/services/step_manager.py
+++ b/letta/services/step_manager.py
@@ -19,6 +19,7 @@ from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_response import UsageStatistics
 from letta.schemas.step import Step as PydanticStep
 from letta.schemas.step_metrics import StepMetrics as PydanticStepMetrics
+from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens
 from letta.schemas.user import User as PydanticUser
 from letta.server.db import db_registry
 from letta.server.rest_api.middleware.request_id import get_request_id
@@ -107,6 +108,26 @@ class StepManager:
         error_type: Optional[str] = None,
         error_data: Optional[Dict] = None,
     ) -> PydanticStep:
+        # Extract normalized usage fields
+        cached_input_tokens = None
+        cache_write_tokens = None
+        reasoning_tokens = None
+        prompt_tokens_details = None
+        completion_tokens_details = None
+
+        if usage.prompt_tokens_details:
+            prompt_tokens_details = usage.prompt_tokens_details.model_dump()
+            cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details)
+            if cached_input > 0:
+                cached_input_tokens = cached_input
+            if cache_write > 0:
+                cache_write_tokens = cache_write
+        if usage.completion_tokens_details:
+            completion_tokens_details = usage.completion_tokens_details.model_dump()
+            reasoning = normalize_reasoning_tokens(usage.completion_tokens_details)
+            if reasoning > 0:
+                reasoning_tokens = reasoning
+
         step_data = {
             "origin": None,
             "organization_id": actor.organization_id,
@@ -115,11 +136,17 @@ class StepManager:
             "provider_name": provider_name,
             "provider_category": provider_category,
             "model": model,
+            "model_handle": model_handle,
             "model_endpoint": model_endpoint,
             "context_window_limit": context_window_limit,
             "completion_tokens": usage.completion_tokens,
             "prompt_tokens": usage.prompt_tokens,
             "total_tokens": usage.total_tokens,
+            "cached_input_tokens": cached_input_tokens,
+            "cache_write_tokens": cache_write_tokens,
+            "reasoning_tokens": reasoning_tokens,
+            "prompt_tokens_details": prompt_tokens_details,
+            "completion_tokens_details": completion_tokens_details,
             "run_id": run_id,
             "tags": [],
             "tid": None,
@@ -166,6 +193,7 @@ class StepManager:
         error_type: Optional[str] = None,
         error_data: Optional[Dict] = None,
         allow_partial: Optional[bool] = False,
+        model_handle: Optional[str] = None,
     ) -> PydanticStep:
         step_data = {
             "origin": None,
@@ -416,8 +444,18 @@ class StepManager:
             # Persist detailed token breakdowns if available
             if usage.prompt_tokens_details:
                 step.prompt_tokens_details = usage.prompt_tokens_details.model_dump()
+                # Extract normalized cache tokens
+                cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details)
+                if cached_input > 0:
+                    step.cached_input_tokens = cached_input
+                if cache_write > 0:
+                    step.cache_write_tokens = cache_write
             if usage.completion_tokens_details:
                 step.completion_tokens_details = usage.completion_tokens_details.model_dump()
+                # Extract normalized reasoning tokens
+                reasoning = normalize_reasoning_tokens(usage.completion_tokens_details)
+                if reasoning > 0:
+                    step.reasoning_tokens = reasoning
 
             # context manager now handles commits
             # await session.commit()