feat: add LETTA_DEBUG_TIMINGS env var for request timing diagnostics (#502)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2026-01-08 22:56:56 -08:00
committed by GitHub
parent 2610b4594f
commit 1275bce752
6 changed files with 160 additions and 31 deletions

View File

@@ -3,7 +3,9 @@ import type { Stream } from "@letta-ai/letta-client/core/streaming";
import type { LettaStreamingResponse } from "@letta-ai/letta-client/resources/agents/messages";
import type { StopReasonType } from "@letta-ai/letta-client/resources/runs/runs";
import { getClient } from "../../agent/client";
import { STREAM_REQUEST_START_TIME } from "../../agent/message";
import { debugWarn } from "../../utils/debug";
import { formatDuration, logTiming } from "../../utils/timing";
import {
type createBuffers,
@@ -37,6 +39,12 @@ export async function drainStream(
): Promise<DrainResult> {
const startTime = performance.now();
// Extract request start time for TTFT logging (attached by sendMessageStream)
const requestStartTime = (
stream as unknown as Record<symbol, number | undefined>
)[STREAM_REQUEST_START_TIME];
let hasLoggedTTFT = false;
let _approvalRequestId: string | null = null;
const pendingApprovals = new Map<
string,
@@ -129,6 +137,18 @@ export async function drainStream(
queueMicrotask(() => onFirstMessage());
}
// Log TTFT (time-to-first-token) when first content chunk arrives
if (
!hasLoggedTTFT &&
requestStartTime !== undefined &&
(chunk.message_type === "reasoning_message" ||
chunk.message_type === "assistant_message")
) {
hasLoggedTTFT = true;
const ttft = performance.now() - requestStartTime;
logTiming(`TTFT: ${formatDuration(ttft)} (from POST to first content)`);
}
// Remove tool from pending approvals when it completes (server-side execution finished)
// This means the tool was executed server-side and doesn't need approval
if (chunk.message_type === "tool_return_message") {