From 423215fd565c37c031f408f6547811438440bdff Mon Sep 17 00:00:00 2001 From: jnjpng Date: Tue, 24 Feb 2026 22:20:46 -0800 Subject: [PATCH] feat: add telemetry tracking for retry-inducing errors (#1131) Co-authored-by: Letta --- src/cli/App.tsx | 33 ++++++++++++++++++++++++++++++--- src/cli/helpers/stream.ts | 10 +++++----- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/cli/App.tsx b/src/cli/App.tsx index bbafdeb..7690c84 100644 --- a/src/cli/App.tsx +++ b/src/cli/App.tsx @@ -3491,6 +3491,20 @@ export default function App({ : null; const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1); + // Log the error that triggered the retry + telemetry.trackError( + "retry_pre_stream_transient", + errorDetail || "Pre-stream transient error", + "pre_stream_retry", + { + httpStatus: + preStreamError instanceof APIError + ? preStreamError.status + : undefined, + modelId: currentModelId || undefined, + }, + ); + const statusId = uid("status"); buffersRef.current.byId.set(statusId, { kind: "status", @@ -4674,6 +4688,19 @@ export default function App({ const attempt = llmApiErrorRetriesRef.current; const delayMs = 1000 * 2 ** (attempt - 1); // 1s, 2s, 4s + // Log the error that triggered the retry + telemetry.trackError( + "retry_post_stream_error", + detailFromRun || + fallbackError || + `Stream stopped: ${stopReasonToHandle}`, + "post_stream_retry", + { + modelId: currentModelId || undefined, + runId: lastRunId ?? undefined, + }, + ); + // Show subtle grey status message const statusId = uid("status"); const statusLines = [getRetryStatusMessage(detailFromRun)]; @@ -4740,9 +4767,9 @@ export default function App({ }, ); - // If we have a client-side stream error (e.g., JSON parse error), show it directly - // Fallback error: no run_id available, show whatever error message we have - if (fallbackError) { + // If we have a client-side stream error with no run_id, show it directly. + // When lastRunId is present, prefer the richer server-side error details below. + if (fallbackError && !lastRunId) { setNetworkPhase("error"); const errorMsg = lastRunId ? `Stream error: ${fallbackError}\n(run_id: ${lastRunId})` diff --git a/src/cli/helpers/stream.ts b/src/cli/helpers/stream.ts index 02cdb96..2dce382 100644 --- a/src/cli/helpers/stream.ts +++ b/src/cli/helpers/stream.ts @@ -223,11 +223,11 @@ export async function drainStream( } } - // Only set fallbackError if we don't have a run_id - if we have a run_id, - // App.tsx will fetch detailed error info from the server which is better - if (!streamProcessor.lastRunId) { - fallbackError = errorMessage; - } + // Always capture the client-side error message. Even when we have a run_id + // (and App.tsx can fetch server-side detail), the client-side exception is + // valuable for telemetry — e.g. stream disconnections where the server run + // is still in-progress and has no error metadata yet. + fallbackError = errorMessage; // Preserve a stop reason already parsed from stream chunks (e.g. llm_api_error) // and only fall back to generic "error" when none is available.