fix: detect and recover from stuck 409 PENDING_APPROVAL errors (#478)

2026-03-03 15:37:36 -08:00
parent 94b7eea127
commit cb563d617b
6 changed files with 238 additions and 7 deletions
--- a/src/core/bot.ts
+++ b/src/core/bot.ts
@@ -977,7 +977,7 @@ export class LettaBot implements AgentSession {
      let sentAnyMessage = false;
      let receivedAnyData = false;
      let sawNonAssistantSinceLastUuid = false;
-      let lastErrorDetail: { message: string; stopReason: string; apiError?: Record<string, unknown> } | null = null;
+      let lastErrorDetail: { message: string; stopReason: string; apiError?: Record<string, unknown>; isApprovalError?: boolean } | null = null;
      let retryInfo: { attempt: number; maxAttempts: number; reason: string } | null = null;
      let reasoningBuffer = '';
      const msgTypeCounts: Record<string, number> = {};
@@ -1301,7 +1301,11 @@ export class LettaBot implements AgentSession {
                (!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
              const enriched = await getLatestRunError(this.store.agentId, retryConvId);
              if (enriched) {
-                lastErrorDetail = { message: enriched.message, stopReason: enriched.stopReason };
+                lastErrorDetail = {
+                  message: enriched.message,
+                  stopReason: enriched.stopReason,
+                  isApprovalError: enriched.isApprovalError,
+                };
              }
            }

@@ -1311,8 +1315,12 @@ export class LettaBot implements AgentSession {

            // For approval-specific conflicts, attempt recovery directly (don't
            // enter the generic retry path which would just get another CONFLICT).
-            const isApprovalConflict = isConflictError &&
-              lastErrorDetail?.message?.toLowerCase().includes('waiting for approval');
+            // Use isApprovalError from run metadata as a fallback when the
+            // error message doesn't contain the expected strings (e.g. when
+            // the type=error event was lost and enrichment detected a stuck run).
+            const isApprovalConflict = (isConflictError &&
+              lastErrorDetail?.message?.toLowerCase().includes('waiting for approval')) ||
+              lastErrorDetail?.isApprovalError === true;
            if (isApprovalConflict && !retried && this.store.agentId) {
              if (retryConvId) {
                log.info('Approval conflict detected -- attempting targeted recovery...');
@@ -1327,6 +1335,8 @@ export class LettaBot implements AgentSession {
                  return this.processMessage(msg, adapter, true);
                }
                log.warn(`Approval recovery failed: ${convResult.details}`);
+                log.info('Retrying once with a fresh session after approval conflict...');
+                return this.processMessage(msg, adapter, true);
              }
            }

@@ -1573,8 +1583,9 @@ export class LettaBot implements AgentSession {
        try {
          let response = '';
          let sawStaleDuplicateResult = false;
+          let approvalRetryPending = false;
          let usedMessageCli = false;
-          let lastErrorDetail: { message: string; stopReason: string; apiError?: Record<string, unknown> } | undefined;
+          let lastErrorDetail: { message: string; stopReason: string; apiError?: Record<string, unknown>; isApprovalError?: boolean } | undefined;
          for await (const msg of stream()) {
            if (msg.type === 'tool_call') {
              this.sessionManager.syncTodoToolCall(msg);
@@ -1608,9 +1619,23 @@ export class LettaBot implements AgentSession {
                    (!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
                  const enriched = await getLatestRunError(this.store.agentId, convId);
                  if (enriched) {
-                    lastErrorDetail = { message: enriched.message, stopReason: enriched.stopReason };
+                    lastErrorDetail = {
+                      message: enriched.message,
+                      stopReason: enriched.stopReason,
+                      isApprovalError: enriched.isApprovalError,
+                    };
                  }
                }
+                const isApprovalIssue = lastErrorDetail?.isApprovalError === true
+                  || ((lastErrorDetail?.message?.toLowerCase().includes('conflict') || false)
+                  && (lastErrorDetail?.message?.toLowerCase().includes('waiting for approval') || false));
+                if (isApprovalIssue && !retried) {
+                  log.info('sendToAgent: approval issue detected -- retrying once with fresh session...');
+                  this.sessionManager.invalidateSession(convKey);
+                  retried = true;
+                  approvalRetryPending = true;
+                  break;
+                }
                const errMsg = lastErrorDetail?.message || msg.error || 'error';
                const errReason = lastErrorDetail?.stopReason || msg.error || 'error';
                const detail = typeof msg.result === 'string' ? msg.result.trim() : '';
@@ -1620,6 +1645,10 @@ export class LettaBot implements AgentSession {
            }
          }

+          if (approvalRetryPending) {
+            continue;
+          }
+
          if (sawStaleDuplicateResult) {
            this.sessionManager.invalidateSession(convKey);
            if (retried) {
--- a/src/core/errors.test.ts
+++ b/src/core/errors.test.ts
@@ -80,6 +80,34 @@ describe('formatApiErrorForUser', () => {
      .toContain('server error');
  });

+  it('maps approval-specific 409 conflict to stuck-approval guidance', () => {
+    const msg = formatApiErrorForUser({
+      message: 'CONFLICT: Cannot send a new message: The agent is waiting for approval on a tool call.',
+      stopReason: 'error',
+    });
+    expect(msg).toContain('stuck tool approval');
+    expect(msg).toContain('reset-conversation');
+    // Should NOT match the generic conflict message
+    expect(msg).not.toContain('Another request is still processing');
+  });
+
+  it('maps pending_approval variant to stuck-approval guidance', () => {
+    const msg = formatApiErrorForUser({
+      message: '409 pending_approval: run is waiting for approval',
+      stopReason: 'error',
+    });
+    expect(msg).toContain('stuck tool approval');
+  });
+
+  it('maps requires_approval stop_reason enrichment message to stuck-approval guidance', () => {
+    const msg = formatApiErrorForUser({
+      message: 'Run run-stuck stuck waiting for tool approval (status=created)',
+      stopReason: 'requires_approval',
+    });
+    expect(msg).toContain('stuck tool approval');
+    expect(msg).toContain('reset-conversation');
+  });
+
  it('falls back to sanitized original message when no mapping matches', () => {
    const msg = formatApiErrorForUser({
      message: `${'x'.repeat(205)}.   `,
--- a/src/core/errors.ts
+++ b/src/core/errors.ts
@@ -65,6 +65,7 @@ export function isAgentMissingFromInitError(error: unknown): boolean {
 */
 export function formatApiErrorForUser(error: { message: string; stopReason: string; apiError?: Record<string, unknown> }): string {
  const msg = error.message.toLowerCase();
+  const stopReason = error.stopReason.toLowerCase();
  const apiError = error.apiError || {};
  const apiMsg = (typeof apiError.message === 'string' ? apiError.message : '').toLowerCase();
  const reasons: string[] = Array.isArray(apiError.reasons) ? apiError.reasons : [];
@@ -84,6 +85,22 @@ export function formatApiErrorForUser(error: { message: string; stopReason: stri
    return `(Rate limited${reasonStr}. Try again in a moment.)`;
  }

+  // 409 CONFLICT -- approval-specific (stuck tool approval blocking messages)
+  const hasApprovalSignal = stopReason === 'requires_approval'
+    || msg.includes('waiting for approval')
+    || msg.includes('pending_approval')
+    || msg.includes('stuck waiting for tool approval')
+    || apiMsg.includes('waiting for approval')
+    || apiMsg.includes('pending_approval');
+  const hasConflictSignal = msg.includes('conflict')
+    || msg.includes('409')
+    || apiMsg.includes('conflict')
+    || apiMsg.includes('409')
+    || stopReason === 'requires_approval';
+  if (hasApprovalSignal && hasConflictSignal) {
+    return '(A stuck tool approval is blocking this conversation. Run `lettabot reset-conversation` to clear it, or approve/deny the pending request at app.letta.com.)';
+  }
+
  // 409 CONFLICT (concurrent request on same conversation)
  if (msg.includes('conflict') || msg.includes('409') || msg.includes('another request is currently being processed')) {
    return '(Another request is still processing on this conversation. Wait a moment and try again.)';
--- a/src/core/sdk-session-contract.test.ts
+++ b/src/core/sdk-session-contract.test.ts
@@ -36,7 +36,7 @@ vi.mock('./system-prompt.js', () => ({
 }));

 import { createAgent, createSession, resumeSession } from '@letta-ai/letta-code-sdk';
-import { getLatestRunError } from '../tools/letta-api.js';
+import { getLatestRunError, recoverOrphanedConversationApproval } from '../tools/letta-api.js';
 import { LettaBot } from './bot.js';

 function deferred<T>() {
@@ -649,6 +649,111 @@ describe('SDK session contract', () => {
    );
  });

+  it('retries sendToAgent once after approval-stuck result error and succeeds', async () => {
+    let streamCall = 0;
+    const mockSession = {
+      initialize: vi.fn(async () => undefined),
+      send: vi.fn(async (_message: unknown) => undefined),
+      stream: vi.fn(() => {
+        const call = streamCall++;
+        return (async function* () {
+          if (call === 0) {
+            yield { type: 'result', success: false, error: 'error', conversationId: 'conv-approval' };
+            return;
+          }
+          yield { type: 'assistant', content: 'recovered response' };
+          yield { type: 'result', success: true, result: 'done', conversationId: 'conv-approval' };
+        })();
+      }),
+      close: vi.fn(() => undefined),
+      agentId: 'agent-contract-test',
+      conversationId: 'conv-approval',
+    };
+
+    vi.mocked(resumeSession).mockReturnValue(mockSession as never);
+    vi.mocked(getLatestRunError).mockResolvedValueOnce({
+      message: 'Run run-stuck stuck waiting for tool approval (status=created)',
+      stopReason: 'requires_approval',
+      isApprovalError: true,
+    });
+
+    const bot = new LettaBot({
+      workingDir: join(dataDir, 'working'),
+      allowedTools: [],
+    });
+
+    const response = await bot.sendToAgent('trigger approval retry');
+    expect(response).toBe('recovered response');
+    expect(mockSession.send).toHaveBeenCalledTimes(2);
+    expect(getLatestRunError).toHaveBeenCalledWith('agent-contract-test', 'conv-approval');
+  });
+
+  it('retries processMessage once after approval conflict even when orphan scan finds nothing', async () => {
+    const bot = new LettaBot({
+      workingDir: join(dataDir, 'working'),
+      allowedTools: [],
+    });
+
+    let runCall = 0;
+    (bot as any).sessionManager.runSession = vi.fn(async () => ({
+      session: { abort: vi.fn(async () => undefined) },
+      stream: async function* () {
+        if (runCall++ === 0) {
+          yield { type: 'result', success: false, error: 'error', conversationId: 'conv-approval' };
+          return;
+        }
+        yield { type: 'assistant', content: 'after retry' };
+        yield { type: 'result', success: true, result: 'after retry', conversationId: 'conv-approval' };
+      },
+    }));
+
+    vi.mocked(getLatestRunError).mockResolvedValueOnce({
+      message: 'Run run-stuck stuck waiting for tool approval (status=created)',
+      stopReason: 'requires_approval',
+      isApprovalError: true,
+    });
+    vi.mocked(recoverOrphanedConversationApproval).mockResolvedValueOnce({
+      recovered: false,
+      details: 'No unresolved approval requests found',
+    });
+
+    const adapter = {
+      id: 'mock',
+      name: 'Mock',
+      start: vi.fn(async () => {}),
+      stop: vi.fn(async () => {}),
+      isRunning: vi.fn(() => true),
+      sendMessage: vi.fn(async (_payload: unknown) => ({ messageId: 'msg-1' })),
+      editMessage: vi.fn(async () => {}),
+      sendTypingIndicator: vi.fn(async () => {}),
+      stopTypingIndicator: vi.fn(async () => {}),
+      supportsEditing: vi.fn(() => false),
+      sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
+    };
+
+    const msg = {
+      channel: 'discord',
+      chatId: 'chat-1',
+      userId: 'user-1',
+      text: 'hello',
+      timestamp: new Date(),
+    };
+
+    await (bot as any).processMessage(msg, adapter);
+
+    expect((bot as any).sessionManager.runSession).toHaveBeenCalledTimes(2);
+    expect(recoverOrphanedConversationApproval).toHaveBeenCalledWith(
+      'agent-contract-test',
+      'conv-approval',
+      true
+    );
+    const sentTexts = adapter.sendMessage.mock.calls.map((call) => {
+      const payload = call[0] as { text?: string };
+      return payload.text;
+    });
+    expect(sentTexts).toContain('after retry');
+  });
+
  it('passes tags: [origin:lettabot] to createAgent when creating a new agent', async () => {
    delete process.env.LETTA_AGENT_ID;

--- a/src/tools/letta-api.test.ts
+++ b/src/tools/letta-api.test.ts
@@ -307,4 +307,41 @@ describe('getLatestRunError', () => {

    expect(result).toBeNull();
  });
+
+  it('detects approval-stuck run via stop_reason when no metadata error', async () => {
+    mockRunsList.mockReturnValue(mockPageIterator([
+      {
+        id: 'run-stuck',
+        conversation_id: 'conv-1',
+        status: 'created',
+        stop_reason: 'requires_approval',
+        metadata: {},
+      },
+    ]));
+
+    const result = await getLatestRunError('agent-1', 'conv-1');
+
+    expect(result).not.toBeNull();
+    expect(result?.isApprovalError).toBe(true);
+    expect(result?.message).toContain('stuck waiting for tool approval');
+    expect(result?.stopReason).toBe('requires_approval');
+  });
+
+  it('returns null for created run with no stop_reason (not an approval issue)', async () => {
+    mockRunsList.mockReturnValue(mockPageIterator([
+      {
+        id: 'run-limbo',
+        conversation_id: 'conv-1',
+        status: 'created',
+        stop_reason: undefined,
+        metadata: {},
+      },
+    ]));
+
+    const result = await getLatestRunError('agent-1', 'conv-1');
+
+    // A created run with no stop_reason could be legitimately new,
+    // so we don't treat it as an approval issue.
+    expect(result).toBeNull();
+  });
 });
--- a/src/tools/letta-api.ts
+++ b/src/tools/letta-api.ts
@@ -525,6 +525,21 @@ export async function getLatestRunError(
    const detail = typeof err?.detail === 'string' ? err.detail : '';
    const stopReason = typeof run.stop_reason === 'string' ? run.stop_reason : 'error';

+    // Run has no metadata error but is stuck waiting for approval.
+    // This happens when the 409 prevents a new run from starting --
+    // the latest run is the one blocking, and it has no error, just a
+    // stop_reason indicating it needs approval.
+    const status = typeof run.status === 'string' ? run.status : '';
+    if (!detail && stopReason === 'requires_approval') {
+      const runId = typeof run.id === 'string' ? run.id : 'unknown';
+      log.info(`Latest run stuck on approval: run=${runId} status=${status} stop_reason=${stopReason}`);
+      return {
+        message: `Run ${runId} stuck waiting for tool approval (status=${status})`,
+        stopReason,
+        isApprovalError: true,
+      };
+    }
+
    if (!detail) return null;

    const isApprovalError = detail.toLowerCase().includes('waiting for approval')