fix: prevent reasoning text from leaking as response on LLM API errors (#509)
This commit is contained in:
79
package-lock.json
generated
79
package-lock.json
generated
@@ -12,7 +12,7 @@
|
||||
"@clack/prompts": "^0.11.0",
|
||||
"@hapi/boom": "^10.0.1",
|
||||
"@letta-ai/letta-client": "^1.7.11",
|
||||
"@letta-ai/letta-code-sdk": "^0.1.6",
|
||||
"@letta-ai/letta-code-sdk": "^0.1.9",
|
||||
"@types/express": "^5.0.6",
|
||||
"@types/node": "^25.0.10",
|
||||
"@types/node-schedule": "^2.1.8",
|
||||
@@ -1266,13 +1266,13 @@
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@letta-ai/letta-code": {
|
||||
"version": "0.16.9",
|
||||
"resolved": "https://registry.npmjs.org/@letta-ai/letta-code/-/letta-code-0.16.9.tgz",
|
||||
"integrity": "sha512-Rsw0guXuMYxBESrLfz0ZgtP/vRlGsjIOkbThCy9LDPGYxgqhtVK4nDwrYsSbvSVXV9LWMpVvjLSRuVHg2/xDhQ==",
|
||||
"version": "0.17.1",
|
||||
"resolved": "https://registry.npmjs.org/@letta-ai/letta-code/-/letta-code-0.17.1.tgz",
|
||||
"integrity": "sha512-iLOWfh1ccmkdrx8j4y/Aop4H5D5PAfjxNVGM28TukcS0FZNPbnmDFGA0tcNudi6wslH6BT5X53/gkAIabuIujg==",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@letta-ai/letta-client": "^1.7.8",
|
||||
"@letta-ai/letta-client": "^1.7.11",
|
||||
"glob": "^13.0.0",
|
||||
"ink-link": "^5.0.0",
|
||||
"open": "^10.2.0",
|
||||
@@ -1290,12 +1290,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@letta-ai/letta-code-sdk": {
|
||||
"version": "0.1.8",
|
||||
"resolved": "https://registry.npmjs.org/@letta-ai/letta-code-sdk/-/letta-code-sdk-0.1.8.tgz",
|
||||
"integrity": "sha512-/y6yFEmwdW3MC303LK4rIQu7ZPgMhkijUmshRZ2ZhLUtyBxqEw0G6EY60Gcf66wEzU6y5YfL+yCukIieKJgekQ==",
|
||||
"version": "0.1.9",
|
||||
"resolved": "https://registry.npmjs.org/@letta-ai/letta-code-sdk/-/letta-code-sdk-0.1.9.tgz",
|
||||
"integrity": "sha512-bk/Q9g9ob9RqQDge4aObPbWbmufaz71XhhApgORwkNh+OaMgbhHLJ5mye+ocHEGG4b/a6odRvWqNzIEX94aX+A==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@letta-ai/letta-code": "0.16.9"
|
||||
"@letta-ai/letta-code": "0.17.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@letta-ai/letta-code/node_modules/balanced-match": {
|
||||
@@ -1308,9 +1308,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@letta-ai/letta-code/node_modules/brace-expansion": {
|
||||
"version": "5.0.3",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz",
|
||||
"integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==",
|
||||
"version": "5.0.4",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz",
|
||||
"integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^4.0.2"
|
||||
@@ -1346,9 +1346,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@letta-ai/letta-code/node_modules/minimatch": {
|
||||
"version": "10.2.2",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.2.tgz",
|
||||
"integrity": "sha512-+G4CpNBxa5MprY+04MbgOw1v7So6n5JY166pFi9KfYwT78fxScCeSNQSNzp6dpPSW2rONOps6Ocam1wFhCgoVw==",
|
||||
"version": "10.2.4",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz",
|
||||
"integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==",
|
||||
"license": "BlueOak-1.0.0",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^5.0.2"
|
||||
@@ -3183,14 +3183,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/cli-truncate": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/cli-truncate/-/cli-truncate-5.1.1.tgz",
|
||||
"integrity": "sha512-SroPvNHxUnk+vIW/dOSfNqdy1sPEFkrTk6TUtqLCnBlo3N7TNYYkzzN7uSD6+jVjrdO4+p8nH7JzH6cIvUem6A==",
|
||||
"version": "5.2.0",
|
||||
"resolved": "https://registry.npmjs.org/cli-truncate/-/cli-truncate-5.2.0.tgz",
|
||||
"integrity": "sha512-xRwvIOMGrfOAnM1JYtqQImuaNtDEv9v6oIYAs4LIHwTiKee8uwvIi363igssOC0O5U04i4AlENs79LQLu9tEMw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"slice-ansi": "^7.1.0",
|
||||
"string-width": "^8.0.0"
|
||||
"slice-ansi": "^8.0.0",
|
||||
"string-width": "^8.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
@@ -3199,39 +3199,6 @@
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/cli-truncate/node_modules/is-fullwidth-code-point": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-5.1.0.tgz",
|
||||
"integrity": "sha512-5XHYaSyiqADb4RnZ1Bdad6cPp8Toise4TzEjcOYDHZkTCbKgiUl7WTUCpNWHuxmDt91wnsZBc9xinNzopv3JMQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"get-east-asian-width": "^1.3.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/cli-truncate/node_modules/slice-ansi": {
|
||||
"version": "7.1.2",
|
||||
"resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-7.1.2.tgz",
|
||||
"integrity": "sha512-iOBWFgUX7caIZiuutICxVgX1SdxwAVFFKwt1EvMYYec/NWO5meOJ6K5uQxhrYBdQJne4KxiqZc+KptFOWFSI9w==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"ansi-styles": "^6.2.1",
|
||||
"is-fullwidth-code-point": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/chalk/slice-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/cli-truncate/node_modules/string-width": {
|
||||
"version": "8.2.0",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-8.2.0.tgz",
|
||||
@@ -3759,9 +3726,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/es-toolkit": {
|
||||
"version": "1.44.0",
|
||||
"resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.44.0.tgz",
|
||||
"integrity": "sha512-6penXeZalaV88MM3cGkFZZfOoLGWshWWfdy0tWw/RlVVyhvMaWSBTOvXNeiW3e5FwdS5ePW0LGEu17zT139ktg==",
|
||||
"version": "1.45.1",
|
||||
"resolved": "https://registry.npmjs.org/es-toolkit/-/es-toolkit-1.45.1.tgz",
|
||||
"integrity": "sha512-/jhoOj/Fx+A+IIyDNOvO3TItGmlMKhtX8ISAHKE90c4b/k1tqaqEZ+uUqfpU8DMnW5cgNJv606zS55jGvza0Xw==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"workspaces": [
|
||||
|
||||
@@ -67,7 +67,7 @@
|
||||
"@clack/prompts": "^0.11.0",
|
||||
"@hapi/boom": "^10.0.1",
|
||||
"@letta-ai/letta-client": "^1.7.11",
|
||||
"@letta-ai/letta-code-sdk": "^0.1.6",
|
||||
"@letta-ai/letta-code-sdk": "^0.1.9",
|
||||
"@types/express": "^5.0.6",
|
||||
"@types/node": "^25.0.10",
|
||||
"@types/node-schedule": "^2.1.8",
|
||||
|
||||
@@ -1253,12 +1253,15 @@ export class LettaBot implements AgentSession {
|
||||
`(resultLen=${resultText.length}, streamLen=${streamedAssistantText.length}). ` +
|
||||
`Preferring streamed content to avoid n-1 desync.`
|
||||
);
|
||||
} else if (streamedTextTrimmed.length === 0) {
|
||||
} else if (streamedTextTrimmed.length === 0 && streamMsg.success !== false && !streamMsg.error) {
|
||||
// Fallback for models/providers that only populate result text.
|
||||
// Skip on error results -- the result field may contain reasoning
|
||||
// text or other non-deliverable content (e.g. llm_api_error).
|
||||
response = resultText;
|
||||
} else if (!sentAnyMessage && response.trim().length === 0) {
|
||||
} else if (!sentAnyMessage && response.trim().length === 0 && streamMsg.success !== false && !streamMsg.error) {
|
||||
// Safety fallback: if we streamed text but nothing was
|
||||
// delivered yet, allow a single result-based resend.
|
||||
// Skip on error results for the same reason as above.
|
||||
response = resultText;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,4 +104,58 @@ describe('result divergence guard', () => {
|
||||
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
|
||||
expect(sentTexts).toEqual(['streamed-segment']);
|
||||
});
|
||||
|
||||
it('does not deliver reasoning text from error results as the response', async () => {
|
||||
const bot = new LettaBot({
|
||||
workingDir: workDir,
|
||||
allowedTools: [],
|
||||
});
|
||||
|
||||
const adapter = {
|
||||
id: 'mock',
|
||||
name: 'Mock',
|
||||
start: vi.fn(async () => {}),
|
||||
stop: vi.fn(async () => {}),
|
||||
isRunning: vi.fn(() => true),
|
||||
sendMessage: vi.fn(async (_msg: OutboundMessage) => ({ messageId: 'msg-1' })),
|
||||
editMessage: vi.fn(async () => {}),
|
||||
sendTypingIndicator: vi.fn(async () => {}),
|
||||
stopTypingIndicator: vi.fn(async () => {}),
|
||||
supportsEditing: vi.fn(() => false),
|
||||
sendFile: vi.fn(async () => ({ messageId: 'file-1' })),
|
||||
};
|
||||
|
||||
(bot as any).sessionManager.runSession = vi.fn(async () => ({
|
||||
session: { abort: vi.fn(async () => {}) },
|
||||
stream: async function* () {
|
||||
// Reproduce the exact bug path: reasoning tokens only, then an error
|
||||
// result whose result field contains the leaked reasoning text.
|
||||
yield { type: 'reasoning', content: '**Evaluating response protocol**\n\nI\'m trying to figure out how to respond...' };
|
||||
yield {
|
||||
type: 'result',
|
||||
success: false,
|
||||
error: 'error',
|
||||
stopReason: 'llm_api_error',
|
||||
result: '**Evaluating response protocol**\n\nI\'m trying to figure out how to respond...',
|
||||
};
|
||||
},
|
||||
}));
|
||||
|
||||
const msg: InboundMessage = {
|
||||
channel: 'discord',
|
||||
chatId: 'chat-1',
|
||||
userId: 'user-1',
|
||||
text: 'hello',
|
||||
timestamp: new Date(),
|
||||
};
|
||||
|
||||
await (bot as any).processMessage(msg, adapter);
|
||||
|
||||
const sentTexts = adapter.sendMessage.mock.calls.map(([payload]) => payload.text);
|
||||
// Must show a formatted error message, never the raw reasoning text.
|
||||
expect(sentTexts.length).toBeGreaterThanOrEqual(1);
|
||||
const lastSent = sentTexts[sentTexts.length - 1];
|
||||
expect(lastSent).not.toContain('Evaluating response protocol');
|
||||
expect(lastSent).toMatch(/\(.*\)/); // Parenthesized system message
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user