342 lines
8.9 KiB
TypeScript
342 lines
8.9 KiB
TypeScript
#!/usr/bin/env bun
|
|
/**
|
|
* Latency Benchmark Script for Letta Code CLI
|
|
*
|
|
* Runs headless mode with LETTA_DEBUG_TIMINGS=1 and parses the output
|
|
* to measure latency breakdown at different stages.
|
|
*
|
|
* Usage:
|
|
* bun scripts/latency-benchmark.ts
|
|
* bun scripts/latency-benchmark.ts --scenario fresh-agent
|
|
* bun scripts/latency-benchmark.ts --iterations 5
|
|
*
|
|
* Requires: LETTA_API_KEY environment variable
|
|
*/
|
|
|
|
import { spawn } from "node:child_process";
|
|
|
|
interface ApiCall {
|
|
method: string;
|
|
path: string;
|
|
durationMs: number;
|
|
status?: number;
|
|
}
|
|
|
|
interface Milestone {
|
|
name: string;
|
|
offsetMs: number;
|
|
}
|
|
|
|
interface BenchmarkResult {
|
|
scenario: string;
|
|
totalMs: number;
|
|
milestones: Milestone[];
|
|
apiCalls: ApiCall[];
|
|
exitCode: number;
|
|
}
|
|
|
|
interface ScenarioConfig {
|
|
name: string;
|
|
description: string;
|
|
args: string[];
|
|
}
|
|
|
|
// Define benchmark scenarios
|
|
const SCENARIOS: ScenarioConfig[] = [
|
|
{
|
|
name: "fresh-agent",
|
|
description: "Create new agent and send simple prompt",
|
|
args: [
|
|
"-p",
|
|
"What is 2+2? Reply with just the number.",
|
|
"--new-agent",
|
|
"--yolo",
|
|
"--output-format",
|
|
"json",
|
|
],
|
|
},
|
|
{
|
|
name: "resume-agent",
|
|
description: "Resume last agent and send simple prompt",
|
|
args: [
|
|
"-p",
|
|
"What is 3+3? Reply with just the number.",
|
|
"--continue",
|
|
"--yolo",
|
|
"--output-format",
|
|
"json",
|
|
],
|
|
},
|
|
{
|
|
name: "minimal-math",
|
|
description: "Simple math question (no tool calls)",
|
|
args: [
|
|
"-p",
|
|
"What is 5+5? Reply with just the number.",
|
|
"--continue",
|
|
"--yolo",
|
|
"--output-format",
|
|
"json",
|
|
],
|
|
},
|
|
];
|
|
|
|
/**
|
|
* Parse timing logs from stderr output
|
|
*/
|
|
function parseTimingLogs(stderr: string): {
|
|
milestones: Milestone[];
|
|
apiCalls: ApiCall[];
|
|
} {
|
|
const milestones: Milestone[] = [];
|
|
const apiCalls: ApiCall[] = [];
|
|
|
|
const lines = stderr.split("\n");
|
|
|
|
for (const line of lines) {
|
|
// Parse milestones: [timing] MILESTONE CLI_START at +0ms (12:34:56.789)
|
|
const milestoneMatch = line.match(
|
|
/\[timing\] MILESTONE (\S+) at \+(\d+(?:\.\d+)?)(ms|s)/,
|
|
);
|
|
if (milestoneMatch) {
|
|
const name = milestoneMatch[1]!;
|
|
let offsetMs = parseFloat(milestoneMatch[2]!);
|
|
if (milestoneMatch[3] === "s") {
|
|
offsetMs *= 1000;
|
|
}
|
|
milestones.push({ name, offsetMs });
|
|
continue;
|
|
}
|
|
|
|
// Parse API calls: [timing] GET /v1/agents/... -> 245ms (status: 200)
|
|
const apiMatch = line.match(
|
|
/\[timing\] (GET|POST|PUT|DELETE|PATCH) (\S+) -> (\d+(?:\.\d+)?)(ms|s)(?: \(status: (\d+)\))?/,
|
|
);
|
|
if (apiMatch) {
|
|
const method = apiMatch[1]!;
|
|
const path = apiMatch[2]!;
|
|
let durationMs = parseFloat(apiMatch[3]!);
|
|
if (apiMatch[4] === "s") {
|
|
durationMs *= 1000;
|
|
}
|
|
const status = apiMatch[5] ? parseInt(apiMatch[5], 10) : undefined;
|
|
apiCalls.push({ method, path, durationMs, status });
|
|
}
|
|
}
|
|
|
|
return { milestones, apiCalls };
|
|
}
|
|
|
|
/**
|
|
* Run a single benchmark scenario
|
|
*/
|
|
async function runBenchmark(scenario: ScenarioConfig): Promise<BenchmarkResult> {
|
|
const start = performance.now();
|
|
|
|
return new Promise((resolve) => {
|
|
const proc = spawn("bun", ["run", "dev", ...scenario.args], {
|
|
env: { ...process.env, LETTA_DEBUG_TIMINGS: "1" },
|
|
stdio: ["pipe", "pipe", "pipe"],
|
|
});
|
|
|
|
let stdout = "";
|
|
let stderr = "";
|
|
|
|
proc.stdout.on("data", (data) => {
|
|
stdout += data.toString();
|
|
});
|
|
|
|
proc.stderr.on("data", (data) => {
|
|
stderr += data.toString();
|
|
});
|
|
|
|
proc.on("close", (code) => {
|
|
const totalMs = performance.now() - start;
|
|
const { milestones, apiCalls } = parseTimingLogs(stderr);
|
|
|
|
resolve({
|
|
scenario: scenario.name,
|
|
totalMs,
|
|
milestones,
|
|
apiCalls,
|
|
exitCode: code ?? 1,
|
|
});
|
|
});
|
|
|
|
// Timeout after 2 minutes
|
|
setTimeout(() => {
|
|
proc.kill("SIGTERM");
|
|
}, 120000);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Format duration for display
|
|
*/
|
|
function formatMs(ms: number): string {
|
|
if (ms < 1000) return `${Math.round(ms)}ms`;
|
|
return `${(ms / 1000).toFixed(2)}s`;
|
|
}
|
|
|
|
/**
|
|
* Print benchmark results
|
|
*/
|
|
function printResults(results: BenchmarkResult[]): void {
|
|
console.log("\n" + "=".repeat(70));
|
|
console.log("LATENCY BENCHMARK RESULTS");
|
|
console.log("=".repeat(70) + "\n");
|
|
|
|
for (const result of results) {
|
|
const scenario = SCENARIOS.find((s) => s.name === result.scenario);
|
|
console.log(`Scenario: ${result.scenario}`);
|
|
console.log(` ${scenario?.description || ""}`);
|
|
console.log(` Exit code: ${result.exitCode}`);
|
|
console.log(` Total wall time: ${formatMs(result.totalMs)}`);
|
|
console.log("");
|
|
|
|
// Print milestones
|
|
if (result.milestones.length > 0) {
|
|
console.log(" Milestones:");
|
|
let prevMs = 0;
|
|
for (const milestone of result.milestones) {
|
|
const delta = milestone.offsetMs - prevMs;
|
|
const deltaStr = prevMs === 0 ? "" : ` (+${formatMs(delta)})`;
|
|
console.log(
|
|
` +${formatMs(milestone.offsetMs).padStart(8)} ${milestone.name}${deltaStr}`,
|
|
);
|
|
prevMs = milestone.offsetMs;
|
|
}
|
|
console.log("");
|
|
}
|
|
|
|
// Print API calls summary
|
|
if (result.apiCalls.length > 0) {
|
|
console.log(" API Calls:");
|
|
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
|
|
|
|
// Group by path pattern
|
|
const grouped: Record<string, { count: number; totalMs: number }> = {};
|
|
for (const call of result.apiCalls) {
|
|
// Normalize paths (remove UUIDs)
|
|
const normalizedPath = call.path.replace(
|
|
/[a-f0-9-]{36}/g,
|
|
"{id}",
|
|
);
|
|
const key = `${call.method} ${normalizedPath}`;
|
|
if (!grouped[key]) {
|
|
grouped[key] = { count: 0, totalMs: 0 };
|
|
}
|
|
grouped[key].count++;
|
|
grouped[key].totalMs += call.durationMs;
|
|
}
|
|
|
|
// Sort by total time
|
|
const sorted = Object.entries(grouped).sort(
|
|
(a, b) => b[1].totalMs - a[1].totalMs,
|
|
);
|
|
|
|
for (const [endpoint, stats] of sorted) {
|
|
const countStr = stats.count > 1 ? ` (x${stats.count})` : "";
|
|
console.log(
|
|
` ${formatMs(stats.totalMs).padStart(8)} ${endpoint}${countStr}`,
|
|
);
|
|
}
|
|
|
|
console.log(` ${"─".repeat(50)}`);
|
|
console.log(` ${formatMs(totalApiMs).padStart(8)} Total API time`);
|
|
console.log(
|
|
` ${formatMs(result.totalMs - totalApiMs).padStart(8)} CLI overhead (non-API)`,
|
|
);
|
|
}
|
|
|
|
console.log("\n" + "-".repeat(70) + "\n");
|
|
}
|
|
|
|
// Summary table
|
|
console.log("SUMMARY");
|
|
console.log("-".repeat(70));
|
|
console.log(
|
|
"Scenario".padEnd(20) +
|
|
"Total".padStart(12) +
|
|
"API Time".padStart(12) +
|
|
"CLI Overhead".padStart(14),
|
|
);
|
|
console.log("-".repeat(70));
|
|
|
|
for (const result of results) {
|
|
const totalApiMs = result.apiCalls.reduce((sum, c) => sum + c.durationMs, 0);
|
|
const cliOverhead = result.totalMs - totalApiMs;
|
|
console.log(
|
|
result.scenario.padEnd(20) +
|
|
formatMs(result.totalMs).padStart(12) +
|
|
formatMs(totalApiMs).padStart(12) +
|
|
formatMs(cliOverhead).padStart(14),
|
|
);
|
|
}
|
|
console.log("-".repeat(70));
|
|
}
|
|
|
|
async function main(): Promise<void> {
|
|
// Parse args
|
|
const args = process.argv.slice(2);
|
|
let scenarioFilter: string | null = null;
|
|
let iterations = 1;
|
|
|
|
for (let i = 0; i < args.length; i++) {
|
|
if (args[i] === "--scenario" && args[i + 1]) {
|
|
scenarioFilter = args[++i]!;
|
|
} else if (args[i] === "--iterations" && args[i + 1]) {
|
|
iterations = parseInt(args[++i]!, 10);
|
|
}
|
|
}
|
|
|
|
// Check prereqs
|
|
if (!process.env.LETTA_API_KEY) {
|
|
console.error("Error: LETTA_API_KEY environment variable is required");
|
|
process.exit(1);
|
|
}
|
|
|
|
// Filter scenarios
|
|
const scenariosToRun = scenarioFilter
|
|
? SCENARIOS.filter((s) => s.name === scenarioFilter)
|
|
: SCENARIOS;
|
|
|
|
if (scenariosToRun.length === 0) {
|
|
console.error(`Error: Unknown scenario "${scenarioFilter}"`);
|
|
console.error(`Available scenarios: ${SCENARIOS.map((s) => s.name).join(", ")}`);
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log("Running latency benchmarks...");
|
|
console.log(`Scenarios: ${scenariosToRun.map((s) => s.name).join(", ")}`);
|
|
console.log(`Iterations: ${iterations}`);
|
|
console.log("");
|
|
|
|
const allResults: BenchmarkResult[] = [];
|
|
|
|
for (let iter = 0; iter < iterations; iter++) {
|
|
if (iterations > 1) {
|
|
console.log(`\n--- Iteration ${iter + 1} of ${iterations} ---`);
|
|
}
|
|
|
|
for (const scenario of scenariosToRun) {
|
|
console.log(`Running: ${scenario.name}...`);
|
|
const result = await runBenchmark(scenario);
|
|
allResults.push(result);
|
|
|
|
if (result.exitCode !== 0) {
|
|
console.warn(` Warning: ${scenario.name} exited with code ${result.exitCode}`);
|
|
} else {
|
|
console.log(` Completed in ${formatMs(result.totalMs)}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
printResults(allResults);
|
|
}
|
|
|
|
main().catch((err) => {
|
|
console.error(err);
|
|
process.exit(1);
|
|
});
|