feat: reduce time-to-boot, remove default eager approval checks on inputs, auto-cancel stale approvals (#579)

Co-authored-by: Letta <noreply@letta.com>
2026-01-17 16:19:30 -08:00
parent f4eb921af7
commit 5f5c0df18e
13 changed files with 1376 additions and 93 deletions
--- a/scripts/latency-benchmark.ts
+++ b/scripts/latency-benchmark.ts
@@ -0,0 +1,341 @@
+#!/usr/bin/env bun
+/**
+ * Latency Benchmark Script for Letta Code CLI
+ *
+ * Runs headless mode with LETTA_DEBUG_TIMINGS=1 and parses the output
+ * to measure latency breakdown at different stages.
+ *
+ * Usage:
+ *   bun scripts/latency-benchmark.ts
+ *   bun scripts/latency-benchmark.ts --scenario fresh-agent
+ *   bun scripts/latency-benchmark.ts --iterations 5
+ *
+ * Requires: LETTA_API_KEY environment variable
+ */
+
+import { spawn } from "node:child_process";
+
+interface ApiCall {
+  method: string;
+  path: string;
+  durationMs: number;
+  status?: number;
+}
+
+interface Milestone {
+  name: string;
+  offsetMs: number;
+}
+
+interface BenchmarkResult {
+  scenario: string;
+  totalMs: number;
+  milestones: Milestone[];
+  apiCalls: ApiCall[];
+  exitCode: number;
+}
+
+interface ScenarioConfig {
+  name: string;
+  description: string;
+  args: string[];
+}
+
+// Define benchmark scenarios
+const SCENARIOS: ScenarioConfig[] = [
+  {
+    name: "fresh-agent",
+    description: "Create new agent and send simple prompt",
+    args: [
+      "-p",
+      "What is 2+2? Reply with just the number.",
+      "--new-agent",
+      "--yolo",
+      "--output-format",
+      "json",
+    ],
+  },
+  {
+    name: "resume-agent",
+    description: "Resume last agent and send simple prompt",
+    args: [
+      "-p",
+      "What is 3+3? Reply with just the number.",
+      "--continue",
+      "--yolo",
+      "--output-format",
+      "json",
+    ],
+  },
+  {
+    name: "minimal-math",
+    description: "Simple math question (no tool calls)",
+    args: [
+      "-p",
+      "What is 5+5? Reply with just the number.",
+      "--continue",
+      "--yolo",
+      "--output-format",
+      "json",
+    ],
+  },
+];
+
+/**
+ * Parse timing logs from stderr output
+ */
+function parseTimingLogs(stderr: string): {
+  milestones: Milestone[];
+  apiCalls: ApiCall[];
+} {
+  const milestones: Milestone[] = [];
+  const apiCalls: ApiCall[] = [];
+
+  const lines = stderr.split("\n");
+
+  for (const line of lines) {
+    // Parse milestones: [timing] MILESTONE CLI_START at +0ms (12:34:56.789)
+    const milestoneMatch = line.match(
+      /\[timing\] MILESTONE (\S+) at \+(\d+(?:\.\d+)?)(ms|s)/,
+    );
+    if (milestoneMatch) {
+      const name = milestoneMatch[1]!;
+      let offsetMs = parseFloat(milestoneMatch[2]!);
+      if (milestoneMatch[3] === "s") {
+        offsetMs *= 1000;
+      }
+      milestones.push({ name, offsetMs });
+      continue;
+    }
+
+    // Parse API calls: [timing] GET /v1/agents/... -> 245ms (status: 200)
+    const apiMatch = line.match(
+      /\[timing\] (GET|POST|PUT|DELETE|PATCH) (\S+) -> (\d+(?:\.\d+)?)(ms|s)(?: \(status: (\d+)\))?/,
+    );
+    if (apiMatch) {
+      const method = apiMatch[1]!;
+      const path = apiMatch[2]!;
+      let durationMs = parseFloat(apiMatch[3]!);
+      if (apiMatch[4] === "s") {
+        durationMs *= 1000;
+      }
+      const status = apiMatch[5] ? parseInt(apiMatch[5], 10) : undefined;
+      apiCalls.push({ method, path, durationMs, status });
+    }
+  }
+
+  return { milestones, apiCalls };
+}
+
+/**
+ * Run a single benchmark scenario
+ */
+async function runBenchmark(scenario: ScenarioConfig): Promise<BenchmarkResult> {
+  const start = performance.now();
+
+  return new Promise((resolve) => {
+    const proc = spawn("bun", ["run", "dev", ...scenario.args], {
+      env: { ...process.env, LETTA_DEBUG_TIMINGS: "1" },
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    proc.stdout.on("data", (data) => {
+      stdout += data.toString();
+    });
+
+    proc.stderr.on("data", (data) => {
+      stderr += data.toString();
+    });
+
+    proc.on("close", (code) => {
+      const totalMs = performance.now() - start;
+      const { milestones, apiCalls } = parseTimingLogs(stderr);
+
+      resolve({
+        scenario: scenario.name,
+        totalMs,
+        milestones,
+        apiCalls,
+        exitCode: code ?? 1,
+      });
+    });
+
+    // Timeout after 2 minutes
+    setTimeout(() => {
+      proc.kill("SIGTERM");
+    }, 120000);
+  });
+}
+
+/**
+ * Format duration for display
+ */
+function formatMs(ms: number): string {
+  if (ms < 1000) return `${Math.round(ms)}ms`;
+  return `${(ms / 1000).toFixed(2)}s`;
+}
+
+/**
+ * Print benchmark results
+ */
+function printResults(results: BenchmarkResult[]): void {
+  console.log("\n" + "=".repeat(70));
+  console.log("LATENCY BENCHMARK RESULTS");
+  console.log("=".repeat(70) + "\n");
+
+  for (const result of results) {
+    const scenario = SCENARIOS.find((s) => s.name === result.scenario);
+    console.log(`Scenario: ${result.scenario}`);
+    console.log(`  ${scenario?.description || ""}`);
+    console.log(`  Exit code: ${result.exitCode}`);
+    console.log(`  Total wall time: ${formatMs(result.totalMs)}`);
+    console.log("");
+
+    // Print milestones
+    if (result.milestones.length > 0) {
+      console.log("  Milestones:");
+      let prevMs = 0;
+      for (const milestone of result.milestones) {
+        const delta = milestone.offsetMs - prevMs;
+        const deltaStr = prevMs === 0 ? "" : ` (+${formatMs(delta)})`;
+        console.log(
+          `    +${formatMs(milestone.offsetMs).padStart(8)} ${milestone.name}${deltaStr}`,
+        );
+        prevMs = milestone.offsetMs;
+      }
+      console.log("");
+    }
+
+    // Print API calls summary
+    if (result.apiCalls.length > 0) {
+      console.log("  API Calls:");
+      const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
+
+      // Group by path pattern
+      const grouped: Record<string, { count: number; totalMs: number }> = {};
+      for (const call of result.apiCalls) {
+        // Normalize paths (remove UUIDs)
+        const normalizedPath = call.path.replace(
+          /[a-f0-9-]{36}/g,
+          "{id}",
+        );
+        const key = `${call.method} ${normalizedPath}`;
+        if (!grouped[key]) {
+          grouped[key] = { count: 0, totalMs: 0 };
+        }
+        grouped[key].count++;
+        grouped[key].totalMs += call.durationMs;
+      }
+
+      // Sort by total time
+      const sorted = Object.entries(grouped).sort(
+        (a, b) => b[1].totalMs - a[1].totalMs,
+      );
+
+      for (const [endpoint, stats] of sorted) {
+        const countStr = stats.count > 1 ? ` (x${stats.count})` : "";
+        console.log(
+          `    ${formatMs(stats.totalMs).padStart(8)} ${endpoint}${countStr}`,
+        );
+      }
+
+      console.log(`    ${"─".repeat(50)}`);
+      console.log(`    ${formatMs(totalApiMs).padStart(8)} Total API time`);
+      console.log(
+        `    ${formatMs(result.totalMs - totalApiMs).padStart(8)} CLI overhead (non-API)`,
+      );
+    }
+
+    console.log("\n" + "-".repeat(70) + "\n");
+  }
+
+  // Summary table
+  console.log("SUMMARY");
+  console.log("-".repeat(70));
+  console.log(
+    "Scenario".padEnd(20) +
+      "Total".padStart(12) +
+      "API Time".padStart(12) +
+      "CLI Overhead".padStart(14),
+  );
+  console.log("-".repeat(70));
+
+  for (const result of results) {
+    const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
+    const cliOverhead = result.totalMs - totalApiMs;
+    console.log(
+      result.scenario.padEnd(20) +
+        formatMs(result.totalMs).padStart(12) +
+        formatMs(totalApiMs).padStart(12) +
+        formatMs(cliOverhead).padStart(14),
+    );
+  }
+  console.log("-".repeat(70));
+}
+
+async function main(): Promise<void> {
+  // Parse args
+  const args = process.argv.slice(2);
+  let scenarioFilter: string | null = null;
+  let iterations = 1;
+
+  for (let i = 0; i < args.length; i++) {
+    if (args[i] === "--scenario" && args[i + 1]) {
+      scenarioFilter = args[++i]!;
+    } else if (args[i] === "--iterations" && args[i + 1]) {
+      iterations = parseInt(args[++i]!, 10);
+    }
+  }
+
+  // Check prereqs
+  if (!process.env.LETTA_API_KEY) {
+    console.error("Error: LETTA_API_KEY environment variable is required");
+    process.exit(1);
+  }
+
+  // Filter scenarios
+  const scenariosToRun = scenarioFilter
+    ? SCENARIOS.filter((s) => s.name === scenarioFilter)
+    : SCENARIOS;
+
+  if (scenariosToRun.length === 0) {
+    console.error(`Error: Unknown scenario "${scenarioFilter}"`);
+    console.error(`Available scenarios: ${SCENARIOS.map((s) => s.name).join(", ")}`);
+    process.exit(1);
+  }
+
+  console.log("Running latency benchmarks...");
+  console.log(`Scenarios: ${scenariosToRun.map((s) => s.name).join(", ")}`);
+  console.log(`Iterations: ${iterations}`);
+  console.log("");
+
+  const allResults: BenchmarkResult[] = [];
+
+  for (let iter = 0; iter < iterations; iter++) {
+    if (iterations > 1) {
+      console.log(`\n--- Iteration ${iter + 1} of ${iterations} ---`);
+    }
+
+    for (const scenario of scenariosToRun) {
+      console.log(`Running: ${scenario.name}...`);
+      const result = await runBenchmark(scenario);
+      allResults.push(result);
+
+      if (result.exitCode !== 0) {
+        console.warn(`  Warning: ${scenario.name} exited with code ${result.exitCode}`);
+      } else {
+        console.log(`  Completed in ${formatMs(result.totalMs)}`);
+      }
+    }
+  }
+
+  printResults(allResults);
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});