From 94376a323395c7908bc6bcdf57d1ebdedec03edb Mon Sep 17 00:00:00 2001 From: Devansh Jain <31609257+devanshrj@users.noreply.github.com> Date: Wed, 11 Feb 2026 12:53:05 -0800 Subject: [PATCH] chore: Track all token usage metrics (#916) --- src/agent/stats.ts | 21 +++++-- src/cli/App.tsx | 5 ++ src/cli/components/SessionStats.tsx | 6 ++ src/cli/helpers/accumulator.ts | 41 ++++++++++++-- src/headless.ts | 29 +++++----- src/telemetry/index.ts | 20 +++++-- src/tests/cli/accumulator-usage.test.ts | 75 +++++++++++++++++++++++++ 7 files changed, 171 insertions(+), 26 deletions(-) create mode 100644 src/tests/cli/accumulator-usage.test.ts diff --git a/src/agent/stats.ts b/src/agent/stats.ts index 968e9e1..38f81ac 100644 --- a/src/agent/stats.ts +++ b/src/agent/stats.ts @@ -4,8 +4,10 @@ export interface UsageStats { promptTokens: number; completionTokens: number; totalTokens: number; - cachedTokens: number; + cachedInputTokens: number; + cacheWriteTokens: number; reasoningTokens: number; + contextTokens?: number; stepCount: number; } @@ -47,8 +49,10 @@ export class SessionStats { promptTokens: 0, completionTokens: 0, totalTokens: 0, - cachedTokens: 0, + cachedInputTokens: 0, + cacheWriteTokens: 0, reasoningTokens: 0, + contextTokens: undefined, stepCount: 0, }; this.lastUsageSnapshot = { ...this.usage }; @@ -78,14 +82,19 @@ export class SessionStats { nextUsage.completionTokens - prevUsage.completionTokens, ), totalTokens: Math.max(0, nextUsage.totalTokens - prevUsage.totalTokens), - cachedTokens: Math.max( + cachedInputTokens: Math.max( 0, - nextUsage.cachedTokens - prevUsage.cachedTokens, + nextUsage.cachedInputTokens - prevUsage.cachedInputTokens, + ), + cacheWriteTokens: Math.max( + 0, + nextUsage.cacheWriteTokens - prevUsage.cacheWriteTokens, ), reasoningTokens: Math.max( 0, nextUsage.reasoningTokens - prevUsage.reasoningTokens, ), + contextTokens: nextUsage.contextTokens, stepCount: Math.max(0, nextUsage.stepCount - prevUsage.stepCount), }; @@ -172,8 +181,10 @@ export class SessionStats { promptTokens: 0, completionTokens: 0, totalTokens: 0, - cachedTokens: 0, + cachedInputTokens: 0, + cacheWriteTokens: 0, reasoningTokens: 0, + contextTokens: undefined, stepCount: 0, }; this.lastUsageSnapshot = { ...this.usage }; diff --git a/src/cli/App.tsx b/src/cli/App.tsx index a4ed98f..c449872 100644 --- a/src/cli/App.tsx +++ b/src/cli/App.tsx @@ -9353,6 +9353,11 @@ ${SYSTEM_REMINDER_CLOSE} step_count: stats.usage.stepCount, prompt_tokens: stats.usage.promptTokens, completion_tokens: stats.usage.completionTokens, + total_tokens: stats.usage.totalTokens, + cached_input_tokens: stats.usage.cachedInputTokens, + cache_write_tokens: stats.usage.cacheWriteTokens, + reasoning_tokens: stats.usage.reasoningTokens, + context_tokens: stats.usage.contextTokens, }; })(), agent_info: { diff --git a/src/cli/components/SessionStats.tsx b/src/cli/components/SessionStats.tsx index 7a4857c..6c28324 100644 --- a/src/cli/components/SessionStats.tsx +++ b/src/cli/components/SessionStats.tsx @@ -47,6 +47,12 @@ export function formatUsageStats({ `Total duration (API): ${formatDuration(stats.totalApiMs)}`, `Total duration (wall): ${formatDuration(stats.totalWallMs)}`, `Session usage: ${stats.usage.stepCount} steps, ${formatCompact(stats.usage.promptTokens)} input, ${formatCompact(stats.usage.completionTokens)} output`, + `Token details: ${formatCompact(stats.usage.totalTokens)} total, ${formatCompact(stats.usage.cachedInputTokens)} cached_input, ${formatCompact(stats.usage.cacheWriteTokens)} cache_write, ${formatCompact(stats.usage.reasoningTokens)} reasoning`, + ...(stats.usage.contextTokens !== undefined + ? [ + `Latest context: ${formatCompact(stats.usage.contextTokens)} tokens`, + ] + : []), "", ]; diff --git a/src/cli/helpers/accumulator.ts b/src/cli/helpers/accumulator.ts index 11ac715..340dec8 100644 --- a/src/cli/helpers/accumulator.ts +++ b/src/cli/helpers/accumulator.ts @@ -220,8 +220,10 @@ export type Buffers = { promptTokens: number; completionTokens: number; totalTokens: number; - cachedTokens: number; + cachedInputTokens: number; + cacheWriteTokens: number; reasoningTokens: number; + contextTokens?: number; stepCount: number; }; // Aggressive static promotion: split streaming content at paragraph boundaries @@ -249,7 +251,8 @@ export function createBuffers(agentId?: string): Buffers { promptTokens: 0, completionTokens: 0, totalTokens: 0, - cachedTokens: 0, + cachedInputTokens: 0, + cacheWriteTokens: 0, reasoningTokens: 0, stepCount: 0, }, @@ -807,10 +810,40 @@ export function onChunk( if (chunk.total_tokens !== undefined) { b.usage.totalTokens += chunk.total_tokens; } + if ( + chunk.cached_input_tokens !== undefined && + chunk.cached_input_tokens !== null + ) { + b.usage.cachedInputTokens += chunk.cached_input_tokens; + } + if ( + chunk.cache_write_tokens !== undefined && + chunk.cache_write_tokens !== null + ) { + b.usage.cacheWriteTokens += chunk.cache_write_tokens; + } + if ( + chunk.reasoning_tokens !== undefined && + chunk.reasoning_tokens !== null + ) { + b.usage.reasoningTokens += chunk.reasoning_tokens; + } + const usageChunk = chunk as typeof chunk & { + context_tokens?: number | null; + }; + if ( + usageChunk.context_tokens !== undefined && + usageChunk.context_tokens !== null + ) { + // context_tokens is a snapshot metric, not additive. + b.usage.contextTokens = usageChunk.context_tokens; + } // Use context_tokens from SDK (estimate of tokens in context window) if (ctx) { - const usageChunk = chunk as typeof chunk & { context_tokens?: number }; - if (usageChunk.context_tokens !== undefined) { + if ( + usageChunk.context_tokens !== undefined && + usageChunk.context_tokens !== null + ) { ctx.lastContextTokens = usageChunk.context_tokens; // Track history for time-series display const compacted = ctx.pendingCompaction; diff --git a/src/headless.ts b/src/headless.ts index 4799399..f1c60ff 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -1860,9 +1860,22 @@ ${SYSTEM_REMINDER_CLOSE} lastToolResult?.resultText || "No assistant response found"; + const stats = sessionStats.getSnapshot(); + const usage = { + prompt_tokens: stats.usage.promptTokens, + completion_tokens: stats.usage.completionTokens, + total_tokens: stats.usage.totalTokens, + step_count: stats.usage.stepCount, + cached_input_tokens: stats.usage.cachedInputTokens, + cache_write_tokens: stats.usage.cacheWriteTokens, + reasoning_tokens: stats.usage.reasoningTokens, + ...(stats.usage.contextTokens !== undefined && { + context_tokens: stats.usage.contextTokens, + }), + }; + // Output based on format if (outputFormat === "json") { - const stats = sessionStats.getSnapshot(); const output = { type: "result", subtype: "success", @@ -1873,17 +1886,11 @@ ${SYSTEM_REMINDER_CLOSE} result: resultText, agent_id: agent.id, conversation_id: conversationId, - usage: { - prompt_tokens: stats.usage.promptTokens, - completion_tokens: stats.usage.completionTokens, - total_tokens: stats.usage.totalTokens, - }, + usage, }; console.log(JSON.stringify(output, null, 2)); } else if (outputFormat === "stream-json") { // Output final result event - const stats = sessionStats.getSnapshot(); - // Collect all run_ids from buffers const allRunIds = new Set(); for (const line of toLines(buffers)) { @@ -1910,11 +1917,7 @@ ${SYSTEM_REMINDER_CLOSE} agent_id: agent.id, conversation_id: conversationId, run_ids: Array.from(allRunIds), - usage: { - prompt_tokens: stats.usage.promptTokens, - completion_tokens: stats.usage.completionTokens, - total_tokens: stats.usage.totalTokens, - }, + usage, uuid: resultUuid, }; console.log(JSON.stringify(resultEvent)); diff --git a/src/telemetry/index.ts b/src/telemetry/index.ts index 3f0075a..0b75fcf 100644 --- a/src/telemetry/index.ts +++ b/src/telemetry/index.ts @@ -24,8 +24,11 @@ export interface SessionEndData { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number; + cached_input_tokens?: number; cached_tokens?: number; + cache_write_tokens?: number; reasoning_tokens?: number; + context_tokens?: number; step_count?: number; } @@ -74,8 +77,10 @@ class TelemetryManager { promptTokens: number; completionTokens: number; totalTokens: number; - cachedTokens: number; + cachedInputTokens: number; + cacheWriteTokens: number; reasoningTokens: number; + contextTokens?: number; stepCount: number; }; }; @@ -213,8 +218,10 @@ class TelemetryManager { promptTokens: number; completionTokens: number; totalTokens: number; - cachedTokens: number; + cachedInputTokens: number; + cacheWriteTokens: number; reasoningTokens: number; + contextTokens?: number; stepCount: number; }; }, @@ -267,8 +274,10 @@ class TelemetryManager { promptTokens: number; completionTokens: number; totalTokens: number; - cachedTokens: number; + cachedInputTokens: number; + cacheWriteTokens: number; reasoningTokens: number; + contextTokens?: number; stepCount: number; }; }, @@ -302,8 +311,11 @@ class TelemetryManager { prompt_tokens: sessionStats?.usage.promptTokens, completion_tokens: sessionStats?.usage.completionTokens, total_tokens: sessionStats?.usage.totalTokens, - cached_tokens: sessionStats?.usage.cachedTokens, + cached_input_tokens: sessionStats?.usage.cachedInputTokens, + cached_tokens: sessionStats?.usage.cachedInputTokens, + cache_write_tokens: sessionStats?.usage.cacheWriteTokens, reasoning_tokens: sessionStats?.usage.reasoningTokens, + context_tokens: sessionStats?.usage.contextTokens, step_count: sessionStats?.usage.stepCount, }; this.track("session_end", data); diff --git a/src/tests/cli/accumulator-usage.test.ts b/src/tests/cli/accumulator-usage.test.ts new file mode 100644 index 0000000..45da7d9 --- /dev/null +++ b/src/tests/cli/accumulator-usage.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, test } from "bun:test"; +import type { LettaStreamingResponse } from "@letta-ai/letta-client/resources/agents/messages"; +import { createBuffers, onChunk } from "../../cli/helpers/accumulator"; + +function usageChunk( + fields: Record, +): LettaStreamingResponse { + return { + message_type: "usage_statistics", + ...fields, + } as LettaStreamingResponse; +} + +describe("accumulator usage statistics", () => { + test("captures all LettaUsageStatistics token metrics", () => { + const buffers = createBuffers(); + + onChunk( + buffers, + usageChunk({ + prompt_tokens: 100, + completion_tokens: 20, + total_tokens: 120, + step_count: 1, + cached_input_tokens: 60, + cache_write_tokens: 11, + reasoning_tokens: 7, + context_tokens: 512, + }), + ); + + onChunk( + buffers, + usageChunk({ + prompt_tokens: 40, + completion_tokens: 8, + total_tokens: 48, + step_count: 2, + cached_input_tokens: 5, + cache_write_tokens: 3, + reasoning_tokens: 2, + context_tokens: 640, + }), + ); + + expect(buffers.usage.promptTokens).toBe(140); + expect(buffers.usage.completionTokens).toBe(28); + expect(buffers.usage.totalTokens).toBe(168); + expect(buffers.usage.stepCount).toBe(3); + expect(buffers.usage.cachedInputTokens).toBe(65); + expect(buffers.usage.cacheWriteTokens).toBe(14); + expect(buffers.usage.reasoningTokens).toBe(9); + // context_tokens is a snapshot value, so we keep the latest one. + expect(buffers.usage.contextTokens).toBe(640); + }); + + test("ignores null optional token metrics", () => { + const buffers = createBuffers(); + + onChunk( + buffers, + usageChunk({ + cached_input_tokens: null, + cache_write_tokens: null, + reasoning_tokens: null, + context_tokens: null, + }), + ); + + expect(buffers.usage.cachedInputTokens).toBe(0); + expect(buffers.usage.cacheWriteTokens).toBe(0); + expect(buffers.usage.reasoningTokens).toBe(0); + expect(buffers.usage.contextTokens).toBeUndefined(); + }); +});