feat: add usage tracking, output mode, and cli exit mode (#7)

2025-10-25 16:50:15 -07:00
parent a8dff2d86e
commit 1d65606697
9 changed files with 261 additions and 15 deletions
--- a/src/agent/stats.ts
+++ b/src/agent/stats.ts
@@ -0,0 +1,67 @@
+import type { Buffers } from "../cli/helpers/accumulator";
+
+export interface UsageStats {
+  promptTokens: number;
+  completionTokens: number;
+  totalTokens: number;
+  cachedTokens: number;
+  reasoningTokens: number;
+  stepCount: number;
+}
+
+export interface SessionStatsSnapshot {
+  sessionStartMs: number;
+  totalWallMs: number;
+  totalApiMs: number;
+  usage: UsageStats;
+}
+
+export class SessionStats {
+  private sessionStartMs: number;
+  private totalApiMs: number;
+  private usage: UsageStats;
+
+  constructor() {
+    this.sessionStartMs = performance.now();
+    this.totalApiMs = 0;
+    this.usage = {
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      cachedTokens: 0,
+      reasoningTokens: 0,
+      stepCount: 0,
+    };
+  }
+
+  endTurn(apiDurationMs: number): void {
+    this.totalApiMs += apiDurationMs;
+  }
+
+  updateUsageFromBuffers(buffers: Buffers): void {
+    this.usage = { ...buffers.usage };
+  }
+
+  getSnapshot(): SessionStatsSnapshot {
+    const now = performance.now();
+    return {
+      sessionStartMs: this.sessionStartMs,
+      totalWallMs: now - this.sessionStartMs,
+      totalApiMs: this.totalApiMs,
+      usage: { ...this.usage },
+    };
+  }
+
+  reset(): void {
+    this.sessionStartMs = performance.now();
+    this.totalApiMs = 0;
+    this.usage = {
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      cachedTokens: 0,
+      reasoningTokens: 0,
+      stepCount: 0,
+    };
+  }
+}
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -4,6 +4,7 @@ import { Letta } from "@letta-ai/letta-client";
 import { Box, Static } from "ink";
 import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { sendMessageStream } from "../agent/message";
+import { SessionStats } from "../agent/stats";
 import type { ApprovalContext } from "../permissions/analyzer";
 import { permissionMode } from "../permissions/mode";
 import {
@@ -24,6 +25,7 @@ import { ModelSelector } from "./components/ModelSelector";
 import { PlanModeDialog } from "./components/PlanModeDialog";
 // import { ReasoningMessage } from "./components/ReasoningMessage";
 import { ReasoningMessage } from "./components/ReasoningMessageRich";
+import { SessionStats as SessionStatsComponent } from "./components/SessionStats";
 // import { ToolCallMessage } from "./components/ToolCallMessage";
 import { ToolCallMessage } from "./components/ToolCallMessageRich";
 // import { UserMessage } from "./components/UserMessage";
@@ -128,6 +130,12 @@ export default function App({
    getRandomThinkingMessage(),
  );

+  // Session stats tracking
+  const sessionStatsRef = useRef(new SessionStats());
+
+  // Show exit stats on exit
+  const [showExitStats, setShowExitStats] = useState(false);
+
  // Static items (things that are done rendering and can be frozen)
  const [staticItems, setStaticItems] = useState<StaticItem[]>([]);

@@ -353,11 +361,16 @@ export default function App({
        while (true) {
          // Stream one turn
          const stream = await sendMessageStream(agentId, currentInput);
-          const { stopReason, approval } = await drainStream(
+          const { stopReason, approval, apiDurationMs } = await drainStream(
            stream,
            buffersRef.current,
            refreshDerivedThrottled,
          );
+
+          // Track API duration
+          sessionStatsRef.current.endTurn(apiDurationMs);
+          sessionStatsRef.current.updateUsageFromBuffers(buffersRef.current);
+
          // Immediate refresh after stream completes to show final state
          refreshDerived();

@@ -479,6 +492,14 @@ export default function App({
    [agentId, appendError, refreshDerived, refreshDerivedThrottled],
  );

+  const handleExit = useCallback(() => {
+    setShowExitStats(true);
+    // Give React time to render the stats, then exit
+    setTimeout(() => {
+      process.exit(0);
+    }, 100);
+  }, []);
+
  const onSubmit = useCallback(
    async (message?: string) => {
      const msg = message?.trim() ?? "";
@@ -509,6 +530,12 @@ export default function App({
          return;
        }

+        // Special handling for /exit command - show stats and exit
+        if (msg.trim() === "/exit") {
+          handleExit();
+          return;
+        }
+
        // Special handling for /stream command - toggle and save
        if (msg.trim() === "/stream") {
          const newValue = !tokenStreamingEnabled;
@@ -658,6 +685,7 @@ export default function App({
      tokenStreamingEnabled,
      refreshDerived,
      agentId,
+      handleExit,
    ],
  );

@@ -1061,10 +1089,21 @@ export default function App({

            {/* Ensure 1 blank line above input when there are no live items */}
            {liveItems.length === 0 && <Box height={1} />}
+
+            {/* Show exit stats when exiting */}
+            {showExitStats && (
+              <SessionStatsComponent
+                stats={sessionStatsRef.current.getSnapshot()}
+              />
+            )}
+
            {/* Input row - always mounted to preserve state */}
            <Input
              visible={
-                !pendingApproval && !modelSelectorOpen && !planApprovalPending
+                !showExitStats &&
+                !pendingApproval &&
+                !modelSelectorOpen &&
+                !planApprovalPending
              }
              streaming={streaming}
              commandRunning={commandRunning}
@@ -1073,6 +1112,7 @@ export default function App({
              onSubmit={onSubmit}
              permissionMode={uiPermissionMode}
              onPermissionModeChange={setUiPermissionMode}
+              onExit={handleExit}
            />

            {/* Model Selector - conditionally mounted as overlay */}
--- a/src/cli/commands/registry.ts
+++ b/src/cli/commands/registry.ts
@@ -29,6 +29,13 @@ export const commands: Record<string, Command> = {
      return "Toggling token streaming...";
    },
  },
+  "/exit": {
+    desc: "Exit and show session stats",
+    handler: () => {
+      // Handled specially in App.tsx to show stats
+      return "Exiting...";
+    },
+  },
 };

 /**
--- a/src/cli/components/InputRich.tsx
+++ b/src/cli/components/InputRich.tsx
@@ -26,6 +26,7 @@ export function Input({
  onSubmit,
  permissionMode: externalMode,
  onPermissionModeChange,
+  onExit,
 }: {
  visible?: boolean;
  streaming: boolean;
@@ -35,6 +36,7 @@ export function Input({
  onSubmit: (message?: string) => void;
  permissionMode?: PermissionMode;
  onPermissionModeChange?: (mode: PermissionMode) => void;
+  onExit?: () => void;
 }) {
  const [value, setValue] = useState("");
  const [escapePressed, setEscapePressed] = useState(false);
@@ -84,7 +86,8 @@ export function Input({
  useInput((input, key) => {
    if (input === "c" && key.ctrl) {
      if (ctrlCPressed) {
-        // Second CTRL-C - exit application
+        // Second CTRL-C - call onExit callback then exit application
+        if (onExit) onExit();
        process.exit(0);
      } else {
        // First CTRL-C - start 1-second timer
@@ -209,7 +212,7 @@ export function Input({
              message={thinkingMessage}
              shimmerOffset={shimmerOffset}
            />
-            {shouldShowTokenCount && <Text dimColor> ({tokenCount}↑)</Text>}
+            {shouldShowTokenCount && <Text dimColor> ({tokenCount} ↑)</Text>}
          </Box>
        </Box>
      )}
--- a/src/cli/components/SessionStats.tsx
+++ b/src/cli/components/SessionStats.tsx
@@ -0,0 +1,34 @@
+import { Box, Text } from "ink";
+import type { SessionStatsSnapshot } from "../../agent/stats";
+
+interface SessionStatsProps {
+  stats: SessionStatsSnapshot;
+}
+
+function formatDuration(ms: number): string {
+  if (ms < 1000) {
+    return `${Math.round(ms)}ms`;
+  }
+  return `${(ms / 1000).toFixed(1)}s`;
+}
+
+function formatNumber(n: number): string {
+  return n.toLocaleString();
+}
+
+export function SessionStats({ stats }: SessionStatsProps) {
+  const wallDuration = formatDuration(stats.totalWallMs);
+  const apiDuration = formatDuration(stats.totalApiMs);
+
+  return (
+    <Box flexDirection="column" paddingTop={1}>
+      <Text dimColor>Total duration (API): {apiDuration}</Text>
+      <Text dimColor>Total duration (wall): {wallDuration}</Text>
+      <Text dimColor>
+        Usage: {stats.usage.stepCount} steps,{" "}
+        {formatNumber(stats.usage.promptTokens)} input,{" "}
+        {formatNumber(stats.usage.completionTokens)} output
+      </Text>
+    </Box>
+  );
+}
--- a/src/cli/helpers/accumulator.ts
+++ b/src/cli/helpers/accumulator.ts
@@ -55,6 +55,14 @@ export type Buffers = {
  toolCallIdToLineId: Map<string, string>;
  lastOtid: string | null; // Track the last otid to detect transitions
  pendingRefresh?: boolean; // Track throttled refresh state
+  usage: {
+    promptTokens: number;
+    completionTokens: number;
+    totalTokens: number;
+    cachedTokens: number;
+    reasoningTokens: number;
+    stepCount: number;
+  };
 };

 export function createBuffers(): Buffers {
@@ -65,6 +73,14 @@ export function createBuffers(): Buffers {
    pendingToolByRun: new Map(),
    toolCallIdToLineId: new Map(),
    lastOtid: null,
+    usage: {
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      cachedTokens: 0,
+      reasoningTokens: 0,
+      stepCount: 0,
+    },
  };
 }

@@ -339,8 +355,26 @@ export function onChunk(
      break;
    }

+    case "usage_statistics": {
+      // Accumulate usage statistics from the stream
+      // These messages arrive after stop_reason in the stream
+      if (chunk.promptTokens !== undefined) {
+        b.usage.promptTokens += chunk.promptTokens;
+      }
+      if (chunk.completionTokens !== undefined) {
+        b.usage.completionTokens += chunk.completionTokens;
+      }
+      if (chunk.totalTokens !== undefined) {
+        b.usage.totalTokens += chunk.totalTokens;
+      }
+      if (chunk.stepCount !== undefined) {
+        b.usage.stepCount += chunk.stepCount;
+      }
+      break;
+    }
+
    default:
-      break; // ignore ping/usage/etc
+      break; // ignore ping/etc
  }
 }

--- a/src/cli/helpers/stream.ts
+++ b/src/cli/helpers/stream.ts
@@ -16,6 +16,7 @@ type DrainResult = {
  lastRunId?: string | null;
  lastSeqId?: number | null;
  approval?: ApprovalRequest | null; // present only if we ended due to approval
+  apiDurationMs: number; // time spent in API call
 };

 export async function drainStream(
@@ -23,6 +24,8 @@ export async function drainStream(
  buffers: ReturnType<typeof createBuffers>,
  refresh: () => void,
 ): Promise<DrainResult> {
+  const startTime = performance.now();
+
  let approvalRequestId: string | null = null;
  let toolCallId: string | null = null;
  let toolName: string | null = null;
@@ -78,10 +81,15 @@ export async function drainStream(

    if (chunk.messageType === "stop_reason") {
      stopReason = chunk.stopReason;
-      break; // end of turn
+      // Continue reading stream to get usage_statistics that may come after
    }
  }

+  // Stream has ended, check if we captured a stop reason
+  if (!stopReason) {
+    stopReason = Letta.StopReasonType.Error;
+  }
+
  // Mark the final line as finished now that stream has ended
  markCurrentLineAsFinished(buffers);
  queueMicrotask(refresh);
@@ -96,9 +104,7 @@ export async function drainStream(
        }
      : null;

-  if (!stopReason) {
-    stopReason = Letta.StopReasonType.Error;
-  }
+  const apiDurationMs = performance.now() - startTime;

-  return { stopReason, approval, lastRunId, lastSeqId };
+  return { stopReason, approval, lastRunId, lastSeqId, apiDurationMs };
 }
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -3,6 +3,7 @@ import { Letta } from "@letta-ai/letta-client";
 import { getClient } from "./agent/client";
 import { createAgent } from "./agent/create";
 import { sendMessageStream } from "./agent/message";
+import { SessionStats } from "./agent/stats";
 import { createBuffers, toLines } from "./cli/helpers/accumulator";
 import { safeJsonParseOr } from "./cli/helpers/safeJsonParse";
 import { drainStream } from "./cli/helpers/stream";
@@ -18,6 +19,7 @@ export async function handleHeadlessCommand(argv: string[]) {
    options: {
      continue: { type: "boolean", short: "c" },
      agent: { type: "string", short: "a" },
+      "output-format": { type: "string" },
    },
    strict: false,
    allowPositionals: true,
@@ -73,9 +75,22 @@ export async function handleHeadlessCommand(argv: string[]) {
    await updateSettings({ lastAgent: agent.id });
  }

+  // Validate output format
+  const outputFormat =
+    (values["output-format"] as string | undefined) || "text";
+  if (!["text", "json", "stream-json"].includes(outputFormat)) {
+    console.error(
+      `Error: Invalid output format "${outputFormat}". Valid formats: text, json, stream-json`,
+    );
+    process.exit(1);
+  }
+
  // Create buffers to accumulate stream
  const buffers = createBuffers();

+  // Initialize session stats
+  const sessionStats = new SessionStats();
+
  // Send message and process stream loop
  let currentInput: Array<Letta.MessageCreate | Letta.ApprovalCreate> = [
    {
@@ -89,12 +104,15 @@ export async function handleHeadlessCommand(argv: string[]) {
      const stream = await sendMessageStream(agent.id, currentInput);

      // Drain stream and collect approval requests
-      const { stopReason, approval } = await drainStream(
+      const { stopReason, approval, apiDurationMs } = await drainStream(
        stream,
        buffers,
        () => {}, // No UI refresh needed in headless mode
      );

+      // Track API duration for this stream
+      sessionStats.endTurn(apiDurationMs);
+
      // Case 1: Turn ended normally
      if (stopReason === Letta.StopReasonType.EndTurn) {
        break;
@@ -173,16 +191,47 @@ export async function handleHeadlessCommand(argv: string[]) {
    process.exit(1);
  }

+  // Update stats with final usage data from buffers
+  sessionStats.updateUsageFromBuffers(buffers);
+
  // Extract final assistant message
  const lines = toLines(buffers);
  const lastAssistant = [...lines]
    .reverse()
    .find((line) => line.kind === "assistant");

-  if (lastAssistant && "text" in lastAssistant) {
-    console.log(lastAssistant.text);
-  } else {
-    console.error("No assistant response found");
+  const resultText =
+    lastAssistant && "text" in lastAssistant
+      ? lastAssistant.text
+      : "No assistant response found";
+
+  // Output based on format
+  if (outputFormat === "json") {
+    const stats = sessionStats.getSnapshot();
+    const output = {
+      type: "result",
+      subtype: "success",
+      is_error: false,
+      duration_ms: Math.round(stats.totalWallMs),
+      duration_api_ms: Math.round(stats.totalApiMs),
+      num_turns: stats.usage.stepCount,
+      result: resultText,
+      session_id: agent.id,
+      usage: {
+        input_tokens: stats.usage.promptTokens,
+        output_tokens: stats.usage.completionTokens,
+      },
+    };
+    console.log(JSON.stringify(output, null, 2));
+  } else if (outputFormat === "stream-json") {
+    console.error("stream-json format not yet implemented");
    process.exit(1);
+  } else {
+    // text format (default)
+    if (!lastAssistant || !("text" in lastAssistant)) {
+      console.error("No assistant response found");
+      process.exit(1);
+    }
+    console.log(lastAssistant.text);
  }
 }
--- a/src/index.ts
+++ b/src/index.ts
@@ -26,12 +26,17 @@ OPTIONS
  -c, --continue        Resume previous session (uses settings.lastAgent)
  -a, --agent <id>      Use a specific agent ID
  -p, --prompt          Headless prompt mode
+  --output-format <fmt> Output format for headless mode (text, json, stream-json)
+                        Default: text

 EXAMPLES
  # when installed as an executable
  letta --help
  letta --continue
  letta --agent agent_123
+  
+  # headless with JSON output (includes stats)
+  letta -p "hello" --output-format json

 `.trim();

@@ -58,6 +63,7 @@ async function main() {
        disallowedTools: { type: "string" },
        "permission-mode": { type: "string" },
        yolo: { type: "boolean" },
+        "output-format": { type: "string" },
      },
      strict: true,
      allowPositionals: true,