From b94d4908e117cf070f76c6153a9ee7a981869345 Mon Sep 17 00:00:00 2001 From: cpacker Date: Wed, 11 Feb 2026 15:52:13 -0800 Subject: [PATCH] fix: retry prompt in lazy approval recovery test when model skips tool call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test flaked on Linux x64 when the model responded with text instead of calling the bash tool. Without a tool call, no approval is generated and the test fails. Now retries the prompt up to 3 times (same pattern as the prestream approval recovery test). 👾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta --- .../lazy-approval-recovery.test.ts | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/integration-tests/lazy-approval-recovery.test.ts b/src/integration-tests/lazy-approval-recovery.test.ts index 8247831..3820820 100644 --- a/src/integration-tests/lazy-approval-recovery.test.ts +++ b/src/integration-tests/lazy-approval-recovery.test.ts @@ -80,6 +80,17 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{ let resultCount = 0; let closing = false; let pendingToolCallId: string | undefined; + let promptAttempts = 0; + + const sendPrompt = () => { + if (promptAttempts >= 3) return; + promptAttempts++; + const userMsg = JSON.stringify({ + type: "user", + message: { role: "user", content: BASH_TRIGGER_PROMPT }, + }); + proc.stdin?.write(`${userMsg}\n`); + }; const timeout = setTimeout(() => { if (!closing) { @@ -153,11 +164,7 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{ // Step 1: Wait for init, then send bash trigger prompt if (msg.type === "system" && msg.subtype === "init" && !initReceived) { initReceived = true; - const userMsg = JSON.stringify({ - type: "user", - message: { role: "user", content: BASH_TRIGGER_PROMPT }, - }); - proc.stdin?.write(`${userMsg}\n`); + sendPrompt(); return; } @@ -217,6 +224,11 @@ async function runLazyRecoveryTest(timeoutMs = 180000): Promise<{ // Track results and complete once we prove the pending-approval flow unblocks. if (msg.type === "result") { resultCount++; + // If model responded without calling a tool, retry prompt (up to 3 attempts) + if (!approvalSeen && promptAttempts < 3) { + sendPrompt(); + return; + } if (resultCount >= 1 && !approvalSeen) { cleanup(); resolve({ messages, success: false, errorSeen });