chore: Track all token usage metrics (#916)
This commit is contained in:
@@ -4,8 +4,10 @@ export interface UsageStats {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
cachedTokens: number;
|
||||
cachedInputTokens: number;
|
||||
cacheWriteTokens: number;
|
||||
reasoningTokens: number;
|
||||
contextTokens?: number;
|
||||
stepCount: number;
|
||||
}
|
||||
|
||||
@@ -47,8 +49,10 @@ export class SessionStats {
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0,
|
||||
cachedTokens: 0,
|
||||
cachedInputTokens: 0,
|
||||
cacheWriteTokens: 0,
|
||||
reasoningTokens: 0,
|
||||
contextTokens: undefined,
|
||||
stepCount: 0,
|
||||
};
|
||||
this.lastUsageSnapshot = { ...this.usage };
|
||||
@@ -78,14 +82,19 @@ export class SessionStats {
|
||||
nextUsage.completionTokens - prevUsage.completionTokens,
|
||||
),
|
||||
totalTokens: Math.max(0, nextUsage.totalTokens - prevUsage.totalTokens),
|
||||
cachedTokens: Math.max(
|
||||
cachedInputTokens: Math.max(
|
||||
0,
|
||||
nextUsage.cachedTokens - prevUsage.cachedTokens,
|
||||
nextUsage.cachedInputTokens - prevUsage.cachedInputTokens,
|
||||
),
|
||||
cacheWriteTokens: Math.max(
|
||||
0,
|
||||
nextUsage.cacheWriteTokens - prevUsage.cacheWriteTokens,
|
||||
),
|
||||
reasoningTokens: Math.max(
|
||||
0,
|
||||
nextUsage.reasoningTokens - prevUsage.reasoningTokens,
|
||||
),
|
||||
contextTokens: nextUsage.contextTokens,
|
||||
stepCount: Math.max(0, nextUsage.stepCount - prevUsage.stepCount),
|
||||
};
|
||||
|
||||
@@ -172,8 +181,10 @@ export class SessionStats {
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0,
|
||||
cachedTokens: 0,
|
||||
cachedInputTokens: 0,
|
||||
cacheWriteTokens: 0,
|
||||
reasoningTokens: 0,
|
||||
contextTokens: undefined,
|
||||
stepCount: 0,
|
||||
};
|
||||
this.lastUsageSnapshot = { ...this.usage };
|
||||
|
||||
@@ -9353,6 +9353,11 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
step_count: stats.usage.stepCount,
|
||||
prompt_tokens: stats.usage.promptTokens,
|
||||
completion_tokens: stats.usage.completionTokens,
|
||||
total_tokens: stats.usage.totalTokens,
|
||||
cached_input_tokens: stats.usage.cachedInputTokens,
|
||||
cache_write_tokens: stats.usage.cacheWriteTokens,
|
||||
reasoning_tokens: stats.usage.reasoningTokens,
|
||||
context_tokens: stats.usage.contextTokens,
|
||||
};
|
||||
})(),
|
||||
agent_info: {
|
||||
|
||||
@@ -47,6 +47,12 @@ export function formatUsageStats({
|
||||
`Total duration (API): ${formatDuration(stats.totalApiMs)}`,
|
||||
`Total duration (wall): ${formatDuration(stats.totalWallMs)}`,
|
||||
`Session usage: ${stats.usage.stepCount} steps, ${formatCompact(stats.usage.promptTokens)} input, ${formatCompact(stats.usage.completionTokens)} output`,
|
||||
`Token details: ${formatCompact(stats.usage.totalTokens)} total, ${formatCompact(stats.usage.cachedInputTokens)} cached_input, ${formatCompact(stats.usage.cacheWriteTokens)} cache_write, ${formatCompact(stats.usage.reasoningTokens)} reasoning`,
|
||||
...(stats.usage.contextTokens !== undefined
|
||||
? [
|
||||
`Latest context: ${formatCompact(stats.usage.contextTokens)} tokens`,
|
||||
]
|
||||
: []),
|
||||
"",
|
||||
];
|
||||
|
||||
|
||||
@@ -220,8 +220,10 @@ export type Buffers = {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
cachedTokens: number;
|
||||
cachedInputTokens: number;
|
||||
cacheWriteTokens: number;
|
||||
reasoningTokens: number;
|
||||
contextTokens?: number;
|
||||
stepCount: number;
|
||||
};
|
||||
// Aggressive static promotion: split streaming content at paragraph boundaries
|
||||
@@ -249,7 +251,8 @@ export function createBuffers(agentId?: string): Buffers {
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0,
|
||||
cachedTokens: 0,
|
||||
cachedInputTokens: 0,
|
||||
cacheWriteTokens: 0,
|
||||
reasoningTokens: 0,
|
||||
stepCount: 0,
|
||||
},
|
||||
@@ -807,10 +810,40 @@ export function onChunk(
|
||||
if (chunk.total_tokens !== undefined) {
|
||||
b.usage.totalTokens += chunk.total_tokens;
|
||||
}
|
||||
if (
|
||||
chunk.cached_input_tokens !== undefined &&
|
||||
chunk.cached_input_tokens !== null
|
||||
) {
|
||||
b.usage.cachedInputTokens += chunk.cached_input_tokens;
|
||||
}
|
||||
if (
|
||||
chunk.cache_write_tokens !== undefined &&
|
||||
chunk.cache_write_tokens !== null
|
||||
) {
|
||||
b.usage.cacheWriteTokens += chunk.cache_write_tokens;
|
||||
}
|
||||
if (
|
||||
chunk.reasoning_tokens !== undefined &&
|
||||
chunk.reasoning_tokens !== null
|
||||
) {
|
||||
b.usage.reasoningTokens += chunk.reasoning_tokens;
|
||||
}
|
||||
const usageChunk = chunk as typeof chunk & {
|
||||
context_tokens?: number | null;
|
||||
};
|
||||
if (
|
||||
usageChunk.context_tokens !== undefined &&
|
||||
usageChunk.context_tokens !== null
|
||||
) {
|
||||
// context_tokens is a snapshot metric, not additive.
|
||||
b.usage.contextTokens = usageChunk.context_tokens;
|
||||
}
|
||||
// Use context_tokens from SDK (estimate of tokens in context window)
|
||||
if (ctx) {
|
||||
const usageChunk = chunk as typeof chunk & { context_tokens?: number };
|
||||
if (usageChunk.context_tokens !== undefined) {
|
||||
if (
|
||||
usageChunk.context_tokens !== undefined &&
|
||||
usageChunk.context_tokens !== null
|
||||
) {
|
||||
ctx.lastContextTokens = usageChunk.context_tokens;
|
||||
// Track history for time-series display
|
||||
const compacted = ctx.pendingCompaction;
|
||||
|
||||
@@ -1860,9 +1860,22 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
lastToolResult?.resultText ||
|
||||
"No assistant response found";
|
||||
|
||||
const stats = sessionStats.getSnapshot();
|
||||
const usage = {
|
||||
prompt_tokens: stats.usage.promptTokens,
|
||||
completion_tokens: stats.usage.completionTokens,
|
||||
total_tokens: stats.usage.totalTokens,
|
||||
step_count: stats.usage.stepCount,
|
||||
cached_input_tokens: stats.usage.cachedInputTokens,
|
||||
cache_write_tokens: stats.usage.cacheWriteTokens,
|
||||
reasoning_tokens: stats.usage.reasoningTokens,
|
||||
...(stats.usage.contextTokens !== undefined && {
|
||||
context_tokens: stats.usage.contextTokens,
|
||||
}),
|
||||
};
|
||||
|
||||
// Output based on format
|
||||
if (outputFormat === "json") {
|
||||
const stats = sessionStats.getSnapshot();
|
||||
const output = {
|
||||
type: "result",
|
||||
subtype: "success",
|
||||
@@ -1873,17 +1886,11 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
result: resultText,
|
||||
agent_id: agent.id,
|
||||
conversation_id: conversationId,
|
||||
usage: {
|
||||
prompt_tokens: stats.usage.promptTokens,
|
||||
completion_tokens: stats.usage.completionTokens,
|
||||
total_tokens: stats.usage.totalTokens,
|
||||
},
|
||||
usage,
|
||||
};
|
||||
console.log(JSON.stringify(output, null, 2));
|
||||
} else if (outputFormat === "stream-json") {
|
||||
// Output final result event
|
||||
const stats = sessionStats.getSnapshot();
|
||||
|
||||
// Collect all run_ids from buffers
|
||||
const allRunIds = new Set<string>();
|
||||
for (const line of toLines(buffers)) {
|
||||
@@ -1910,11 +1917,7 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
agent_id: agent.id,
|
||||
conversation_id: conversationId,
|
||||
run_ids: Array.from(allRunIds),
|
||||
usage: {
|
||||
prompt_tokens: stats.usage.promptTokens,
|
||||
completion_tokens: stats.usage.completionTokens,
|
||||
total_tokens: stats.usage.totalTokens,
|
||||
},
|
||||
usage,
|
||||
uuid: resultUuid,
|
||||
};
|
||||
console.log(JSON.stringify(resultEvent));
|
||||
|
||||
@@ -24,8 +24,11 @@ export interface SessionEndData {
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
total_tokens?: number;
|
||||
cached_input_tokens?: number;
|
||||
cached_tokens?: number;
|
||||
cache_write_tokens?: number;
|
||||
reasoning_tokens?: number;
|
||||
context_tokens?: number;
|
||||
step_count?: number;
|
||||
}
|
||||
|
||||
@@ -74,8 +77,10 @@ class TelemetryManager {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
cachedTokens: number;
|
||||
cachedInputTokens: number;
|
||||
cacheWriteTokens: number;
|
||||
reasoningTokens: number;
|
||||
contextTokens?: number;
|
||||
stepCount: number;
|
||||
};
|
||||
};
|
||||
@@ -213,8 +218,10 @@ class TelemetryManager {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
cachedTokens: number;
|
||||
cachedInputTokens: number;
|
||||
cacheWriteTokens: number;
|
||||
reasoningTokens: number;
|
||||
contextTokens?: number;
|
||||
stepCount: number;
|
||||
};
|
||||
},
|
||||
@@ -267,8 +274,10 @@ class TelemetryManager {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
cachedTokens: number;
|
||||
cachedInputTokens: number;
|
||||
cacheWriteTokens: number;
|
||||
reasoningTokens: number;
|
||||
contextTokens?: number;
|
||||
stepCount: number;
|
||||
};
|
||||
},
|
||||
@@ -302,8 +311,11 @@ class TelemetryManager {
|
||||
prompt_tokens: sessionStats?.usage.promptTokens,
|
||||
completion_tokens: sessionStats?.usage.completionTokens,
|
||||
total_tokens: sessionStats?.usage.totalTokens,
|
||||
cached_tokens: sessionStats?.usage.cachedTokens,
|
||||
cached_input_tokens: sessionStats?.usage.cachedInputTokens,
|
||||
cached_tokens: sessionStats?.usage.cachedInputTokens,
|
||||
cache_write_tokens: sessionStats?.usage.cacheWriteTokens,
|
||||
reasoning_tokens: sessionStats?.usage.reasoningTokens,
|
||||
context_tokens: sessionStats?.usage.contextTokens,
|
||||
step_count: sessionStats?.usage.stepCount,
|
||||
};
|
||||
this.track("session_end", data);
|
||||
|
||||
75
src/tests/cli/accumulator-usage.test.ts
Normal file
75
src/tests/cli/accumulator-usage.test.ts
Normal file
@@ -0,0 +1,75 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import type { LettaStreamingResponse } from "@letta-ai/letta-client/resources/agents/messages";
|
||||
import { createBuffers, onChunk } from "../../cli/helpers/accumulator";
|
||||
|
||||
function usageChunk(
|
||||
fields: Record<string, number | null | undefined>,
|
||||
): LettaStreamingResponse {
|
||||
return {
|
||||
message_type: "usage_statistics",
|
||||
...fields,
|
||||
} as LettaStreamingResponse;
|
||||
}
|
||||
|
||||
describe("accumulator usage statistics", () => {
|
||||
test("captures all LettaUsageStatistics token metrics", () => {
|
||||
const buffers = createBuffers();
|
||||
|
||||
onChunk(
|
||||
buffers,
|
||||
usageChunk({
|
||||
prompt_tokens: 100,
|
||||
completion_tokens: 20,
|
||||
total_tokens: 120,
|
||||
step_count: 1,
|
||||
cached_input_tokens: 60,
|
||||
cache_write_tokens: 11,
|
||||
reasoning_tokens: 7,
|
||||
context_tokens: 512,
|
||||
}),
|
||||
);
|
||||
|
||||
onChunk(
|
||||
buffers,
|
||||
usageChunk({
|
||||
prompt_tokens: 40,
|
||||
completion_tokens: 8,
|
||||
total_tokens: 48,
|
||||
step_count: 2,
|
||||
cached_input_tokens: 5,
|
||||
cache_write_tokens: 3,
|
||||
reasoning_tokens: 2,
|
||||
context_tokens: 640,
|
||||
}),
|
||||
);
|
||||
|
||||
expect(buffers.usage.promptTokens).toBe(140);
|
||||
expect(buffers.usage.completionTokens).toBe(28);
|
||||
expect(buffers.usage.totalTokens).toBe(168);
|
||||
expect(buffers.usage.stepCount).toBe(3);
|
||||
expect(buffers.usage.cachedInputTokens).toBe(65);
|
||||
expect(buffers.usage.cacheWriteTokens).toBe(14);
|
||||
expect(buffers.usage.reasoningTokens).toBe(9);
|
||||
// context_tokens is a snapshot value, so we keep the latest one.
|
||||
expect(buffers.usage.contextTokens).toBe(640);
|
||||
});
|
||||
|
||||
test("ignores null optional token metrics", () => {
|
||||
const buffers = createBuffers();
|
||||
|
||||
onChunk(
|
||||
buffers,
|
||||
usageChunk({
|
||||
cached_input_tokens: null,
|
||||
cache_write_tokens: null,
|
||||
reasoning_tokens: null,
|
||||
context_tokens: null,
|
||||
}),
|
||||
);
|
||||
|
||||
expect(buffers.usage.cachedInputTokens).toBe(0);
|
||||
expect(buffers.usage.cacheWriteTokens).toBe(0);
|
||||
expect(buffers.usage.reasoningTokens).toBe(0);
|
||||
expect(buffers.usage.contextTokens).toBeUndefined();
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user