From bcbe6dff9b3a7457e57cc6076d8d68f888d3e246 Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 6 Mar 2026 10:14:16 -0800 Subject: [PATCH] fix: prevent reasoning text from leaking as response on LLM API errors (#509) --- package-lock.json | 79 ++++++++++------------------------- package.json | 2 +- src/core/bot.ts | 7 +++- src/core/result-guard.test.ts | 54 ++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 59 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0b20734..43c8c1c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,7 @@ "@clack/prompts": "^0.11.0", "@hapi/boom": "^10.0.1", "@letta-ai/letta-client": "^1.7.11", - "@letta-ai/letta-code-sdk": "^0.1.6", + "@letta-ai/letta-code-sdk": "^0.1.9", "@types/express": "^5.0.6", "@types/node": "^25.0.10", "@types/node-schedule": "^2.1.8", @@ -1266,13 +1266,13 @@ "license": "Apache-2.0" }, "node_modules/@letta-ai/letta-code": { - "version": "0.16.9", - "resolved": "https://registry.npmjs.org/@letta-ai/letta-code/-/letta-code-0.16.9.tgz", - "integrity": "sha512-Rsw0guXuMYxBESrLfz0ZgtP/vRlGsjIOkbThCy9LDPGYxgqhtVK4nDwrYsSbvSVXV9LWMpVvjLSRuVHg2/xDhQ==", + "version": "0.17.1", + "resolved": "https://registry.npmjs.org/@letta-ai/letta-code/-/letta-code-0.17.1.tgz", + "integrity": "sha512-iLOWfh1ccmkdrx8j4y/Aop4H5D5PAfjxNVGM28TukcS0FZNPbnmDFGA0tcNudi6wslH6BT5X53/gkAIabuIujg==", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { - "@letta-ai/letta-client": "^1.7.8", + "@letta-ai/letta-client": "^1.7.11", "glob": "^13.0.0", "ink-link": "^5.0.0", "open": "^10.2.0", @@ -1290,12 +1290,12 @@ } }, "node_modules/@letta-ai/letta-code-sdk": { - "version": "0.1.8", - "resolved": "https://registry.npmjs.org/@letta-ai/letta-code-sdk/-/letta-code-sdk-0.1.8.tgz", - "integrity": "sha512-/y6yFEmwdW3MC303LK4rIQu7ZPgMhkijUmshRZ2ZhLUtyBxqEw0G6EY60Gcf66wEzU6y5YfL+yCukIieKJgekQ==", + "version": "0.1.9", + "resolved": "https://registry.npmjs.org/@letta-ai/letta-code-sdk/-/letta-code-sdk-0.1.9.tgz", + "integrity": "sha512-bk/Q9g9ob9RqQDge4aObPbWbmufaz71XhhApgORwkNh+OaMgbhHLJ5mye+ocHEGG4b/a6odRvWqNzIEX94aX+A==", "license": "Apache-2.0", "dependencies": { - "@letta-ai/letta-code": "0.16.9" + "@letta-ai/letta-code": "0.17.1" } }, "node_modules/@letta-ai/letta-code/node_modules/balanced-match": { @@ -1308,9 +1308,9 @@ } }, "node_modules/@letta-ai/letta-code/node_modules/brace-expansion": { - "version": "5.0.3", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz", - "integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==", + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz", + "integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==", "license": "MIT", "dependencies": { "balanced-match": "^4.0.2" @@ -1346,9 +1346,9 @@ } }, "node_modules/@letta-ai/letta-code/node_modules/minimatch": { - "version": "10.2.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.2.tgz", - "integrity": "sha512-+G4CpNBxa5MprY+04MbgOw1v7So6n5JY166pFi9KfYwT78fxScCeSNQSNzp6dpPSW2rONOps6Ocam1wFhCgoVw==", + "version": "10.2.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz", + "integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==", "license": "BlueOak-1.0.0", "dependencies": { "brace-expansion": "^5.0.2" @@ -3183,14 +3183,14 @@ } }, "node_modules/cli-truncate": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/cli-truncate/-/cli-truncate-5.1.1.tgz", - "integrity": "sha512-SroPvNHxUnk+vIW/dOSfNqdy1sPEFkrTk6TUtqLCnBlo3N7TNYYkzzN7uSD6+jVjrdO4+p8nH7JzH6cIvUem6A==", + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/cli-truncate/-/cli-truncate-5.2.0.tgz", + "integrity": "sha512-xRwvIOMGrfOAnM1JYtqQImuaNtDEv9v6oIYAs4LIHwTiKee8uwvIi363igssOC0O5U04i4AlENs79LQLu9tEMw==", "license": "MIT", "peer": true, "dependencies": { - "slice-ansi": "^7.1.0", - "string-width": "^8.0.0" + "slice-ansi": "^8.0.0", + "string-width": "^8.2.0" }, "engines": { "node": ">=20" @@ -3199,39 +3199,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/cli-truncate/node_modules/is-fullwidth-code-point": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-5.1.0.tgz", - "integrity": "sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==", - "license": "MIT", - "peer": true, - "dependencies": { - "get-east-asian-width": "^1.3.1" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/cli-truncate/node_modules/slice-ansi": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-7.1.2.tgz", - "integrity": "sha512-iOBWFgUX7caIZiuutICxVgX1SdxwAVFFKwt1EvMYYec/NWO5meOJ6K5uQxhrYBdQJne4KxiqZc+KptFOWFSI9w==", - "license": "MIT", - "peer": true, - "dependencies": { - "ansi-styles": "^6.2.1", - "is-fullwidth-code-point": "^5.0.0" - }, - "engines": { - "node": ">=18" - }, - "funding": { - "url": "https://github.com/chalk/slice-ansi?sponsor=1" - } - }, "node_modules/cli-truncate/node_modules/string-width": { "version": "8.2.0", "resolved": "https://registry.npmjs.org/string-width/-/string-width-8.2.0.tgz", @@ -3759,9 +3726,9 @@ } }, "node_modules/es-toolkit": { - "version": "1.44.0", - "resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.44.0.tgz", - "integrity": "sha512-6penXeZalaV88MM3cGkFZZfOoLGWshWWfdy0tWw/RlVVyhvMaWSBTOvXNeiW3e5FwdS5ePW0LGEu17zT139ktg==", + "version": "1.45.1", + "resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.45.1.tgz", + "integrity": "sha512-/jhoOj/Fx+A+IIyDNOvO3TItGmlMKhtX8ISAHKE90c4b/k1tqaqEZ+uUqfpU8DMnW5cgNJv606zS55jGvza0Xw==", "license": "MIT", "peer": true, "workspaces": [ diff --git a/package.json b/package.json index ec0a66a..47d8ec1 100644 --- a/package.json +++ b/package.json @@ -67,7 +67,7 @@ "@clack/prompts": "^0.11.0", "@hapi/boom": "^10.0.1", "@letta-ai/letta-client": "^1.7.11", - "@letta-ai/letta-code-sdk": "^0.1.6", + "@letta-ai/letta-code-sdk": "^0.1.9", "@types/express": "^5.0.6", "@types/node": "^25.0.10", "@types/node-schedule": "^2.1.8", diff --git a/src/core/bot.ts b/src/core/bot.ts index dae0a18..196e09c 100644 --- a/src/core/bot.ts +++ b/src/core/bot.ts @@ -1253,12 +1253,15 @@ export class LettaBot implements AgentSession { `(resultLen=${resultText.length}, streamLen=${streamedAssistantText.length}). ` + `Preferring streamed content to avoid n-1 desync.` ); - } else if (streamedTextTrimmed.length === 0) { + } else if (streamedTextTrimmed.length === 0 && streamMsg.success !== false && !streamMsg.error) { // Fallback for models/providers that only populate result text. + // Skip on error results -- the result field may contain reasoning + // text or other non-deliverable content (e.g. llm_api_error). response = resultText; - } else if (!sentAnyMessage && response.trim().length === 0) { + } else if (!sentAnyMessage && response.trim().length === 0 && streamMsg.success !== false && !streamMsg.error) { // Safety fallback: if we streamed text but nothing was // delivered yet, allow a single result-based resend. + // Skip on error results for the same reason as above. response = resultText; } } diff --git a/src/core/result-guard.test.ts b/src/core/result-guard.test.ts index 8d4df1e..2bfd5ec 100644 --- a/src/core/result-guard.test.ts +++ b/src/core/result-guard.test.ts @@ -104,4 +104,58 @@ describe('result divergence guard', () => { const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text); expect(sentTexts).toEqual(['streamed-segment']); }); + + it('does not deliver reasoning text from error results as the response', async () => { + const bot = new LettaBot({ + workingDir: workDir, + allowedTools: [], + }); + + const adapter = { + id: 'mock', + name: 'Mock', + start: vi.fn(async () => {}), + stop: vi.fn(async () => {}), + isRunning: vi.fn(() => true), + sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })), + editMessage: vi.fn(async () => {}), + sendTypingIndicator: vi.fn(async () => {}), + stopTypingIndicator: vi.fn(async () => {}), + supportsEditing: vi.fn(() => false), + sendFile: vi.fn(async () => ({ messageId: 'file-1' })), + }; + + (bot as any).sessionManager.runSession = vi.fn(async () => ({ + session: { abort: vi.fn(async () => {}) }, + stream: async function* () { + // Reproduce the exact bug path: reasoning tokens only, then an error + // result whose result field contains the leaked reasoning text. + yield { type: 'reasoning', content: '**Evaluating response protocol**\n\nI\'m trying to figure out how to respond...' }; + yield { + type: 'result', + success: false, + error: 'error', + stopReason: 'llm_api_error', + result: '**Evaluating response protocol**\n\nI\'m trying to figure out how to respond...', + }; + }, + })); + + const msg: InboundMessage = { + channel: 'discord', + chatId: 'chat-1', + userId: 'user-1', + text: 'hello', + timestamp: new Date(), + }; + + await (bot as any).processMessage(msg, adapter); + + const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text); + // Must show a formatted error message, never the raw reasoning text. + expect(sentTexts.length).toBeGreaterThanOrEqual(1); + const lastSent = sentTexts[sentTexts.length - 1]; + expect(lastSent).not.toContain('Evaluating response protocol'); + expect(lastSent).toMatch(/\(.*\)/); // Parenthesized system message + }); });