feat: add telemetry tracking for retry-inducing errors (#1131)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
jnjpng
2026-02-24 22:20:46 -08:00
committed by GitHub
parent 35920fbc91
commit 423215fd56
2 changed files with 35 additions and 8 deletions

View File

@@ -3491,6 +3491,20 @@ export default function App({
: null;
const delayMs = retryAfterMs ?? 1000 * 2 ** (attempt - 1);
// Log the error that triggered the retry
telemetry.trackError(
"retry_pre_stream_transient",
errorDetail || "Pre-stream transient error",
"pre_stream_retry",
{
httpStatus:
preStreamError instanceof APIError
? preStreamError.status
: undefined,
modelId: currentModelId || undefined,
},
);
const statusId = uid("status");
buffersRef.current.byId.set(statusId, {
kind: "status",
@@ -4674,6 +4688,19 @@ export default function App({
const attempt = llmApiErrorRetriesRef.current;
const delayMs = 1000 * 2 ** (attempt - 1); // 1s, 2s, 4s
// Log the error that triggered the retry
telemetry.trackError(
"retry_post_stream_error",
detailFromRun ||
fallbackError ||
`Stream stopped: ${stopReasonToHandle}`,
"post_stream_retry",
{
modelId: currentModelId || undefined,
runId: lastRunId ?? undefined,
},
);
// Show subtle grey status message
const statusId = uid("status");
const statusLines = [getRetryStatusMessage(detailFromRun)];
@@ -4740,9 +4767,9 @@ export default function App({
},
);
// If we have a client-side stream error (e.g., JSON parse error), show it directly
// Fallback error: no run_id available, show whatever error message we have
if (fallbackError) {
// If we have a client-side stream error with no run_id, show it directly.
// When lastRunId is present, prefer the richer server-side error details below.
if (fallbackError && !lastRunId) {
setNetworkPhase("error");
const errorMsg = lastRunId
? `Stream error: ${fallbackError}\n(run_id: ${lastRunId})`

View File

@@ -223,11 +223,11 @@ export async function drainStream(
}
}
// Only set fallbackError if we don't have a run_id - if we have a run_id,
// App.tsx will fetch detailed error info from the server which is better
if (!streamProcessor.lastRunId) {
fallbackError = errorMessage;
}
// Always capture the client-side error message. Even when we have a run_id
// (and App.tsx can fetch server-side detail), the client-side exception is
// valuable for telemetry — e.g. stream disconnections where the server run
// is still in-progress and has no error metadata yet.
fallbackError = errorMessage;
// Preserve a stop reason already parsed from stream chunks (e.g. llm_api_error)
// and only fall back to generic "error" when none is available.