fix: don't retry llm_error for 4xx client errors (#725)

Co-authored-by: Letta <noreply@letta.com>
2026-01-28 14:41:27 -08:00
parent 03db8545ad
commit 5f5252e5a2
2 changed files with 20 additions and 4 deletions
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -375,12 +375,17 @@ async function isRetriableError(

      // Check for llm_error at top level or nested (handles error.error nesting)
      const errorType = metaError?.error_type ?? metaError?.error?.error_type;
-      if (errorType === "llm_error") return true;
+      const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
+
+      // Don't retry 4xx client errors (validation, auth, malformed requests)
+      // These are not transient and won't succeed on retry
+      const is4xxError = /Error code: 4\d{2}/.test(detail);
+
+      if (errorType === "llm_error" && !is4xxError) return true;

      // Fallback: detect LLM provider errors from detail even if misclassified
      // This handles edge cases where streaming errors weren't properly converted to LLMError
      // Patterns are derived from handle_llm_error() message formats in the backend
-      const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
      const llmProviderPatterns = [
        "Anthropic API error", // anthropic_client.py:759
        "OpenAI API error", // openai_client.py:1034
@@ -390,7 +395,10 @@ async function isRetriableError(
        "Network error", // Transient network failures during streaming
        "Connection error during Anthropic streaming", // Peer disconnections, incomplete chunked reads
      ];
-      if (llmProviderPatterns.some((pattern) => detail.includes(pattern))) {
+      if (
+        llmProviderPatterns.some((pattern) => detail.includes(pattern)) &&
+        !is4xxError
+      ) {
        return true;
      }

--- a/src/headless.ts
+++ b/src/headless.ts
@@ -1507,6 +1507,11 @@ export async function handleHeadlessCommand(
          // Fallback: detect LLM provider errors from detail even if misclassified
          // Patterns are derived from handle_llm_error() message formats in the backend
          const detail = metaError?.detail ?? metaError?.error?.detail ?? "";
+
+          // Don't retry 4xx client errors (validation, auth, malformed requests)
+          // These are not transient and won't succeed on retry
+          const is4xxError = /Error code: 4\d{2}/.test(detail);
+
          const llmProviderPatterns = [
            "Anthropic API error", // anthropic_client.py:759
            "OpenAI API error", // openai_client.py:1034
@@ -1520,7 +1525,10 @@ export async function handleHeadlessCommand(
            detail.includes(pattern),
          );

-          if (errorType === "llm_error" || isLlmErrorFromDetail) {
+          if (
+            (errorType === "llm_error" || isLlmErrorFromDetail) &&
+            !is4xxError
+          ) {
            const attempt = llmApiErrorRetries + 1;
            const baseDelayMs = 1000;
            const delayMs = baseDelayMs * 2 ** (attempt - 1);