From e0a23f7039ec8de5bed20d3dfd4ca04cdb1a4d03 Mon Sep 17 00:00:00 2001 From: Sarah Wooders Date: Wed, 4 Feb 2026 12:24:52 -0800 Subject: [PATCH] feat: add usage columns to steps table (#9270) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add usage columns to steps table Adds denormalized usage fields to the steps table for easier querying: - model_handle: The model handle (e.g., "openai/gpt-4o-mini") - cached_input_tokens: Tokens served from cache - cache_write_tokens: Tokens written to cache (Anthropic) - reasoning_tokens: Reasoning/thinking tokens These fields mirror LettaUsageStatistics and are extracted from the existing prompt_tokens_details and completion_tokens_details JSON columns. 🤖 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta * chore: regenerate OpenAPI specs and SDK for usage columns 🤖 Generated with [Letta Code](https://letta.com) Co-authored-by: Sarah Wooders --------- Co-authored-by: Letta Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Sarah Wooders --- ...3e54e2fa2f7e_add_usage_columns_to_steps.py | 33 +++++++++++++ fern/openapi.json | 48 +++++++++++++++++++ letta/agents/letta_agent.py | 3 ++ letta/agents/letta_agent_v2.py | 1 + letta/orm/step.py | 12 +++++ letta/schemas/step.py | 10 ++++ letta/services/step_manager.py | 38 +++++++++++++++ 7 files changed, 145 insertions(+) create mode 100644 alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py diff --git a/alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py b/alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py new file mode 100644 index 00000000..997d0d80 --- /dev/null +++ b/alembic/versions/3e54e2fa2f7e_add_usage_columns_to_steps.py @@ -0,0 +1,33 @@ +"""add_usage_columns_to_steps + +Revision ID: 3e54e2fa2f7e +Revises: a1b2c3d4e5f8 +Create Date: 2026-02-03 16:35:51.327031 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "3e54e2fa2f7e" +down_revision: Union[str, None] = "a1b2c3d4e5f8" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column("steps", sa.Column("model_handle", sa.String(), nullable=True)) + op.add_column("steps", sa.Column("cached_input_tokens", sa.Integer(), nullable=True)) + op.add_column("steps", sa.Column("cache_write_tokens", sa.Integer(), nullable=True)) + op.add_column("steps", sa.Column("reasoning_tokens", sa.Integer(), nullable=True)) + + +def downgrade() -> None: + op.drop_column("steps", "reasoning_tokens") + op.drop_column("steps", "cache_write_tokens") + op.drop_column("steps", "cached_input_tokens") + op.drop_column("steps", "model_handle") diff --git a/fern/openapi.json b/fern/openapi.json index 18c2b474..3230ece0 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -42460,6 +42460,18 @@ "title": "Model", "description": "The name of the model used for this step." }, + "model_handle": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Model Handle", + "description": "The model handle (e.g., 'openai/gpt-4o-mini') used for this step." + }, "model_endpoint": { "anyOf": [ { @@ -42520,6 +42532,42 @@ "title": "Total Tokens", "description": "The total number of tokens processed by the agent during this step." }, + "cached_input_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Cached Input Tokens", + "description": "The number of input tokens served from cache. None if not reported by provider." + }, + "cache_write_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Cache Write Tokens", + "description": "The number of input tokens written to cache (Anthropic only). None if not reported by provider." + }, + "reasoning_tokens": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Reasoning Tokens", + "description": "The number of reasoning/thinking tokens generated. None if not reported by provider." + }, "completion_tokens_details": { "anyOf": [ { diff --git a/letta/agents/letta_agent.py b/letta/agents/letta_agent.py index 8460a46a..246f6d3f 100644 --- a/letta/agents/letta_agent.py +++ b/letta/agents/letta_agent.py @@ -311,6 +311,7 @@ class LettaAgent(BaseAgent): step_id=step_id, project_id=agent_state.project_id, status=StepStatus.PENDING, + model_handle=agent_state.llm_config.handle, ) # Only use step_id in messages if step was actually created effective_step_id = step_id if logged_step else None @@ -645,6 +646,7 @@ class LettaAgent(BaseAgent): step_id=step_id, project_id=agent_state.project_id, status=StepStatus.PENDING, + model_handle=agent_state.llm_config.handle, ) # Only use step_id in messages if step was actually created effective_step_id = step_id if logged_step else None @@ -981,6 +983,7 @@ class LettaAgent(BaseAgent): step_id=step_id, project_id=agent_state.project_id, status=StepStatus.PENDING, + model_handle=agent_state.llm_config.handle, ) # Only use step_id in messages if step was actually created effective_step_id = step_id if logged_step else None diff --git a/letta/agents/letta_agent_v2.py b/letta/agents/letta_agent_v2.py index 85e6b86c..657ac79d 100644 --- a/letta/agents/letta_agent_v2.py +++ b/letta/agents/letta_agent_v2.py @@ -873,6 +873,7 @@ class LettaAgentV2(BaseAgentV2): step_id=step_id, project_id=self.agent_state.project_id, status=StepStatus.PENDING, + model_handle=self.agent_state.llm_config.handle, ) # Also create step metrics early and update at the end of the step diff --git a/letta/orm/step.py b/letta/orm/step.py index 13d6d552..64f0353a 100644 --- a/letta/orm/step.py +++ b/letta/orm/step.py @@ -43,6 +43,9 @@ class Step(SqlalchemyBase, ProjectMixin): provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.") provider_category: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The category of the provider used for this step.") model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.") + model_handle: Mapped[Optional[str]] = mapped_column( + None, nullable=True, doc="The model handle (e.g., 'openai/gpt-4o-mini') used for this step." + ) model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.") context_window_limit: Mapped[Optional[int]] = mapped_column( None, nullable=True, doc="The context window limit configured for this step." @@ -50,6 +53,15 @@ class Step(SqlalchemyBase, ProjectMixin): completion_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens generated by the agent") prompt_tokens: Mapped[int] = mapped_column(default=0, doc="Number of tokens in the prompt") total_tokens: Mapped[int] = mapped_column(default=0, doc="Total number of tokens processed by the agent") + cached_input_tokens: Mapped[Optional[int]] = mapped_column( + None, nullable=True, doc="Number of input tokens served from cache. None if not reported by provider." + ) + cache_write_tokens: Mapped[Optional[int]] = mapped_column( + None, nullable=True, doc="Number of input tokens written to cache (Anthropic only). None if not reported by provider." + ) + reasoning_tokens: Mapped[Optional[int]] = mapped_column( + None, nullable=True, doc="Number of reasoning/thinking tokens generated. None if not reported by provider." + ) completion_tokens_details: Mapped[Optional[Dict]] = mapped_column( JSON, nullable=True, doc="Detailed completion token breakdown (e.g., reasoning_tokens)." ) diff --git a/letta/schemas/step.py b/letta/schemas/step.py index 83126a96..0c525547 100644 --- a/letta/schemas/step.py +++ b/letta/schemas/step.py @@ -25,11 +25,21 @@ class Step(StepBase): provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.") provider_category: Optional[str] = Field(None, description="The category of the provider used for this step.") model: Optional[str] = Field(None, description="The name of the model used for this step.") + model_handle: Optional[str] = Field(None, description="The model handle (e.g., 'openai/gpt-4o-mini') used for this step.") model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.") context_window_limit: Optional[int] = Field(None, description="The context window limit configured for this step.") completion_tokens: Optional[int] = Field(None, description="The number of tokens generated by the agent during this step.") prompt_tokens: Optional[int] = Field(None, description="The number of tokens in the prompt during this step.") total_tokens: Optional[int] = Field(None, description="The total number of tokens processed by the agent during this step.") + cached_input_tokens: Optional[int] = Field( + None, description="The number of input tokens served from cache. None if not reported by provider." + ) + cache_write_tokens: Optional[int] = Field( + None, description="The number of input tokens written to cache (Anthropic only). None if not reported by provider." + ) + reasoning_tokens: Optional[int] = Field( + None, description="The number of reasoning/thinking tokens generated. None if not reported by provider." + ) completion_tokens_details: Optional[Dict] = Field(None, description="Detailed completion token breakdown (e.g., reasoning_tokens).") prompt_tokens_details: Optional[Dict] = Field( None, description="Detailed prompt token breakdown (e.g., cached_tokens, cache_read_tokens, cache_creation_tokens)." diff --git a/letta/services/step_manager.py b/letta/services/step_manager.py index 92733e75..80db383c 100644 --- a/letta/services/step_manager.py +++ b/letta/services/step_manager.py @@ -19,6 +19,7 @@ from letta.schemas.message import Message as PydanticMessage from letta.schemas.openai.chat_completion_response import UsageStatistics from letta.schemas.step import Step as PydanticStep from letta.schemas.step_metrics import StepMetrics as PydanticStepMetrics +from letta.schemas.usage import normalize_cache_tokens, normalize_reasoning_tokens from letta.schemas.user import User as PydanticUser from letta.server.db import db_registry from letta.server.rest_api.middleware.request_id import get_request_id @@ -107,6 +108,26 @@ class StepManager: error_type: Optional[str] = None, error_data: Optional[Dict] = None, ) -> PydanticStep: + # Extract normalized usage fields + cached_input_tokens = None + cache_write_tokens = None + reasoning_tokens = None + prompt_tokens_details = None + completion_tokens_details = None + + if usage.prompt_tokens_details: + prompt_tokens_details = usage.prompt_tokens_details.model_dump() + cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details) + if cached_input > 0: + cached_input_tokens = cached_input + if cache_write > 0: + cache_write_tokens = cache_write + if usage.completion_tokens_details: + completion_tokens_details = usage.completion_tokens_details.model_dump() + reasoning = normalize_reasoning_tokens(usage.completion_tokens_details) + if reasoning > 0: + reasoning_tokens = reasoning + step_data = { "origin": None, "organization_id": actor.organization_id, @@ -115,11 +136,17 @@ class StepManager: "provider_name": provider_name, "provider_category": provider_category, "model": model, + "model_handle": model_handle, "model_endpoint": model_endpoint, "context_window_limit": context_window_limit, "completion_tokens": usage.completion_tokens, "prompt_tokens": usage.prompt_tokens, "total_tokens": usage.total_tokens, + "cached_input_tokens": cached_input_tokens, + "cache_write_tokens": cache_write_tokens, + "reasoning_tokens": reasoning_tokens, + "prompt_tokens_details": prompt_tokens_details, + "completion_tokens_details": completion_tokens_details, "run_id": run_id, "tags": [], "tid": None, @@ -166,6 +193,7 @@ class StepManager: error_type: Optional[str] = None, error_data: Optional[Dict] = None, allow_partial: Optional[bool] = False, + model_handle: Optional[str] = None, ) -> PydanticStep: step_data = { "origin": None, @@ -416,8 +444,18 @@ class StepManager: # Persist detailed token breakdowns if available if usage.prompt_tokens_details: step.prompt_tokens_details = usage.prompt_tokens_details.model_dump() + # Extract normalized cache tokens + cached_input, cache_write = normalize_cache_tokens(usage.prompt_tokens_details) + if cached_input > 0: + step.cached_input_tokens = cached_input + if cache_write > 0: + step.cache_write_tokens = cache_write if usage.completion_tokens_details: step.completion_tokens_details = usage.completion_tokens_details.model_dump() + # Extract normalized reasoning tokens + reasoning = normalize_reasoning_tokens(usage.completion_tokens_details) + if reasoning > 0: + step.reasoning_tokens = reasoning # context manager now handles commits # await session.commit()