Files
letta-code/scripts/latency-benchmark.ts
2026-03-19 17:54:12 -07:00

340 lines
8.8 KiB
TypeScript

#!/usr/bin/env bun
/**
* Latency Benchmark Script for Letta Code CLI
*
* Runs headless mode with LETTA_DEBUG_TIMINGS=1 and parses the output
* to measure latency breakdown at different stages.
*
* Usage:
* bun scripts/latency-benchmark.ts
* bun scripts/latency-benchmark.ts --scenario fresh-agent
* bun scripts/latency-benchmark.ts --iterations 5
*
* Requires: LETTA_API_KEY environment variable
*/
import { spawn } from "node:child_process";
interface ApiCall {
method: string;
path: string;
durationMs: number;
status?: number;
}
interface Milestone {
name: string;
offsetMs: number;
}
interface BenchmarkResult {
scenario: string;
totalMs: number;
milestones: Milestone[];
apiCalls: ApiCall[];
exitCode: number;
}
interface ScenarioConfig {
name: string;
description: string;
args: string[];
}
// Define benchmark scenarios
const SCENARIOS: ScenarioConfig[] = [
{
name: "fresh-agent",
description: "Create new agent and send simple prompt",
args: [
"-p",
"What is 2+2? Reply with just the number.",
"--new-agent",
"--yolo",
"--output-format",
"json",
],
},
{
name: "resume-agent",
description: "Resume last agent and send simple prompt",
args: [
"-p",
"What is 3+3? Reply with just the number.",
"--yolo",
"--output-format",
"json",
],
},
{
name: "minimal-math",
description: "Simple math question (no tool calls)",
args: [
"-p",
"What is 5+5? Reply with just the number.",
"--yolo",
"--output-format",
"json",
],
},
];
/**
* Parse timing logs from stderr output
*/
function parseTimingLogs(stderr: string): {
milestones: Milestone[];
apiCalls: ApiCall[];
} {
const milestones: Milestone[] = [];
const apiCalls: ApiCall[] = [];
const lines = stderr.split("\n");
for (const line of lines) {
// Parse milestones: [timing] MILESTONE CLI_START at +0ms (12:34:56.789)
const milestoneMatch = line.match(
/\[timing\] MILESTONE (\S+) at \+(\d+(?:\.\d+)?)(ms|s)/,
);
if (milestoneMatch) {
const name = milestoneMatch[1]!;
let offsetMs = parseFloat(milestoneMatch[2]!);
if (milestoneMatch[3] === "s") {
offsetMs *= 1000;
}
milestones.push({ name, offsetMs });
continue;
}
// Parse API calls: [timing] GET /v1/agents/... -> 245ms (status: 200)
const apiMatch = line.match(
/\[timing\] (GET|POST|PUT|DELETE|PATCH) (\S+) -> (\d+(?:\.\d+)?)(ms|s)(?: \(status: (\d+)\))?/,
);
if (apiMatch) {
const method = apiMatch[1]!;
const path = apiMatch[2]!;
let durationMs = parseFloat(apiMatch[3]!);
if (apiMatch[4] === "s") {
durationMs *= 1000;
}
const status = apiMatch[5] ? parseInt(apiMatch[5], 10) : undefined;
apiCalls.push({ method, path, durationMs, status });
}
}
return { milestones, apiCalls };
}
/**
* Run a single benchmark scenario
*/
async function runBenchmark(scenario: ScenarioConfig): Promise<BenchmarkResult> {
const start = performance.now();
return new Promise((resolve) => {
const proc = spawn("bun", ["run", "dev", ...scenario.args], {
env: { ...process.env, LETTA_DEBUG_TIMINGS: "1" },
stdio: ["pipe", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
proc.stdout.on("data", (data) => {
stdout += data.toString();
});
proc.stderr.on("data", (data) => {
stderr += data.toString();
});
proc.on("close", (code) => {
const totalMs = performance.now() - start;
const { milestones, apiCalls } = parseTimingLogs(stderr);
resolve({
scenario: scenario.name,
totalMs,
milestones,
apiCalls,
exitCode: code ?? 1,
});
});
// Timeout after 2 minutes
setTimeout(() => {
proc.kill("SIGTERM");
}, 120000);
});
}
/**
* Format duration for display
*/
function formatMs(ms: number): string {
if (ms < 1000) return `${Math.round(ms)}ms`;
return `${(ms / 1000).toFixed(2)}s`;
}
/**
* Print benchmark results
*/
function printResults(results: BenchmarkResult[]): void {
console.log("\n" + "=".repeat(70));
console.log("LATENCY BENCHMARK RESULTS");
console.log("=".repeat(70) + "\n");
for (const result of results) {
const scenario = SCENARIOS.find((s) => s.name === result.scenario);
console.log(`Scenario: ${result.scenario}`);
console.log(` ${scenario?.description || ""}`);
console.log(` Exit code: ${result.exitCode}`);
console.log(` Total wall time: ${formatMs(result.totalMs)}`);
console.log("");
// Print milestones
if (result.milestones.length > 0) {
console.log(" Milestones:");
let prevMs = 0;
for (const milestone of result.milestones) {
const delta = milestone.offsetMs - prevMs;
const deltaStr = prevMs === 0 ? "" : ` (+${formatMs(delta)})`;
console.log(
` +${formatMs(milestone.offsetMs).padStart(8)} ${milestone.name}${deltaStr}`,
);
prevMs = milestone.offsetMs;
}
console.log("");
}
// Print API calls summary
if (result.apiCalls.length > 0) {
console.log(" API Calls:");
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
// Group by path pattern
const grouped: Record<string, { count: number; totalMs: number }> = {};
for (const call of result.apiCalls) {
// Normalize paths (remove UUIDs)
const normalizedPath = call.path.replace(
/[a-f0-9-]{36}/g,
"{id}",
);
const key = `${call.method} ${normalizedPath}`;
if (!grouped[key]) {
grouped[key] = { count: 0, totalMs: 0 };
}
grouped[key].count++;
grouped[key].totalMs += call.durationMs;
}
// Sort by total time
const sorted = Object.entries(grouped).sort(
(a, b) => b[1].totalMs - a[1].totalMs,
);
for (const [endpoint, stats] of sorted) {
const countStr = stats.count > 1 ? ` (x${stats.count})` : "";
console.log(
` ${formatMs(stats.totalMs).padStart(8)} ${endpoint}${countStr}`,
);
}
console.log(` ${"─".repeat(50)}`);
console.log(` ${formatMs(totalApiMs).padStart(8)} Total API time`);
console.log(
` ${formatMs(result.totalMs - totalApiMs).padStart(8)} CLI overhead (non-API)`,
);
}
console.log("\n" + "-".repeat(70) + "\n");
}
// Summary table
console.log("SUMMARY");
console.log("-".repeat(70));
console.log(
"Scenario".padEnd(20) +
"Total".padStart(12) +
"API Time".padStart(12) +
"CLI Overhead".padStart(14),
);
console.log("-".repeat(70));
for (const result of results) {
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
const cliOverhead = result.totalMs - totalApiMs;
console.log(
result.scenario.padEnd(20) +
formatMs(result.totalMs).padStart(12) +
formatMs(totalApiMs).padStart(12) +
formatMs(cliOverhead).padStart(14),
);
}
console.log("-".repeat(70));
}
async function main(): Promise<void> {
// Parse args
const args = process.argv.slice(2);
let scenarioFilter: string | null = null;
let iterations = 1;
for (let i = 0; i < args.length; i++) {
if (args[i] === "--scenario" && args[i + 1]) {
scenarioFilter = args[++i]!;
} else if (args[i] === "--iterations" && args[i + 1]) {
iterations = parseInt(args[++i]!, 10);
}
}
// Check prereqs
if (!process.env.LETTA_API_KEY) {
console.error("Error: LETTA_API_KEY environment variable is required");
process.exit(1);
}
// Filter scenarios
const scenariosToRun = scenarioFilter
? SCENARIOS.filter((s) => s.name === scenarioFilter)
: SCENARIOS;
if (scenariosToRun.length === 0) {
console.error(`Error: Unknown scenario "${scenarioFilter}"`);
console.error(`Available scenarios: ${SCENARIOS.map((s) => s.name).join(", ")}`);
process.exit(1);
}
console.log("Running latency benchmarks...");
console.log(`Scenarios: ${scenariosToRun.map((s) => s.name).join(", ")}`);
console.log(`Iterations: ${iterations}`);
console.log("");
const allResults: BenchmarkResult[] = [];
for (let iter = 0; iter < iterations; iter++) {
if (iterations > 1) {
console.log(`\n--- Iteration ${iter + 1} of ${iterations} ---`);
}
for (const scenario of scenariosToRun) {
console.log(`Running: ${scenario.name}...`);
const result = await runBenchmark(scenario);
allResults.push(result);
if (result.exitCode !== 0) {
console.warn(` Warning: ${scenario.name} exited with code ${result.exitCode}`);
} else {
console.log(` Completed in ${formatMs(result.totalMs)}`);
}
}
}
printResults(allResults);
}
main().catch((err) => {
console.error(err);
process.exit(1);
});