From 5f5252e5a2b0c13d3ada7b7173d0f9eca42630d1 Mon Sep 17 00:00:00 2001
From: cthomas <carenthomas@gmail.com>
Date: Wed, 28 Jan 2026 14:41:27 -0800
Subject: [PATCH] fix: don't retry llm_error for 4xx client errors (#725)

Co-authored-by: Letta <noreply@letta.com>
---
 src/cli/App.tsx | 14 +++++++++++---
 src/headless.ts | 10 +++++++++-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/cli/App.tsx b/src/cli/App.tsx
index 311d02d..1f6e622 100644
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -375,12 +375,17 @@ async function isRetriableError(
 
       // Check for llm_error at top level or nested (handles error.error nesting)
       const errorType = metaError?.error_type ?? metaError?.error?.error_type;
-      if (errorType === "llm_error") return true;
+      const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
+
+      // Don't retry 4xx client errors (validation, auth, malformed requests)
+      // These are not transient and won't succeed on retry
+      const is4xxError = /Error code: 4\d{2}/.test(detail);
+
+      if (errorType === "llm_error" && !is4xxError) return true;
 
       // Fallback: detect LLM provider errors from detail even if misclassified
       // This handles edge cases where streaming errors weren't properly converted to LLMError
       // Patterns are derived from handle_llm_error() message formats in the backend
-      const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
       const llmProviderPatterns = [
         "Anthropic API error", // anthropic_client.py:759
         "OpenAI API error", // openai_client.py:1034
@@ -390,7 +395,10 @@ async function isRetriableError(
         "Network error", // Transient network failures during streaming
         "Connection error during Anthropic streaming", // Peer disconnections, incomplete chunked reads
       ];
-      if (llmProviderPatterns.some((pattern) => detail.includes(pattern))) {
+      if (
+        llmProviderPatterns.some((pattern) => detail.includes(pattern)) &&
+        !is4xxError
+      ) {
         return true;
       }
 
diff --git a/src/headless.ts b/src/headless.ts
index eb5bd83..3452196 100644
--- a/src/headless.ts
+++ b/src/headless.ts
@@ -1507,6 +1507,11 @@ export async function handleHeadlessCommand(
           // Fallback: detect LLM provider errors from detail even if misclassified
           // Patterns are derived from handle_llm_error() message formats in the backend
           const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
+
+          // Don't retry 4xx client errors (validation, auth, malformed requests)
+          // These are not transient and won't succeed on retry
+          const is4xxError = /Error code: 4\d{2}/.test(detail);
+
           const llmProviderPatterns = [
             "Anthropic API error", // anthropic_client.py:759
             "OpenAI API error", // openai_client.py:1034
@@ -1520,7 +1525,10 @@ export async function handleHeadlessCommand(
             detail.includes(pattern),
           );
 
-          if (errorType === "llm_error" || isLlmErrorFromDetail) {
+          if (
+            (errorType === "llm_error" || isLlmErrorFromDetail) &&
+            !is4xxError
+          ) {
             const attempt = llmApiErrorRetries + 1;
             const baseDelayMs = 1000;
             const delayMs = baseDelayMs * 2 ** (attempt - 1);