fix(core): resize images before sending to LLM to prevent llm_api_error (#593)

This commit is contained in:
Cameron
2026-03-13 14:10:05 -07:00
committed by GitHub
parent 68056ed21b
commit f5005c33a0
6 changed files with 87 additions and 11 deletions

1
package-lock.json generated
View File

@@ -28,6 +28,7 @@
"openai": "^6.17.0",
"pino": "^10.3.1",
"qrcode-terminal": "^0.12.0",
"sharp": "^0.34.1",
"telegramify-markdown": "^1.0.0",
"tsx": "^4.21.0",
"typescript": "^5.9.3",

View File

@@ -85,6 +85,7 @@
"openai": "^6.17.0",
"pino": "^10.3.1",
"qrcode-terminal": "^0.12.0",
"sharp": "^0.34.1",
"telegramify-markdown": "^1.0.0",
"tsx": "^4.21.0",
"typescript": "^5.9.3",

View File

@@ -4,9 +4,10 @@
* Single agent, single conversation - chat continues across all channels.
*/
import { imageFromFile, imageFromURL, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk';
import { imageFromBase64, type ImageContent, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk';
import { mkdirSync, existsSync } from 'node:fs';
import { access, unlink, realpath, stat, constants } from 'node:fs/promises';
import { readFile, access, unlink, realpath, stat, constants } from 'node:fs/promises';
import sharp from 'sharp';
import { execFile } from 'node:child_process';
import { extname, resolve, join } from 'node:path';
import type { ChannelAdapter } from '../channels/types.js';
@@ -49,6 +50,68 @@ const AUDIO_FILE_EXTENSIONS = new Set([
'.ogg', '.opus', '.mp3', '.m4a', '.wav', '.aac', '.flac',
]);
/** Anthropic recommends max 1568px on longest side; larger images waste bandwidth for no benefit. */
const MAX_IMAGE_DIMENSION = 1568;
const MIME_FROM_EXT: Record<string, ImageContent['source']['media_type']> = {
'.png': 'image/png',
'.gif': 'image/gif',
'.webp': 'image/webp',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
};
/**
* Read, resize (if needed), and base64-encode an image for the LLM.
* Returns null on any failure so the caller can skip gracefully.
*/
async function prepareImage(
source: { localPath?: string; url?: string; mimeType?: string; name?: string },
): Promise<ImageContent | null> {
let buffer: Buffer;
let mediaType: ImageContent['source']['media_type'];
// Resolve media type from attachment metadata or file extension
const resolveMime = (hint?: string, path?: string): ImageContent['source']['media_type'] => {
if (hint && SUPPORTED_IMAGE_MIMES.has(hint)) return hint as ImageContent['source']['media_type'];
if (path) {
const ext = extname(path).toLowerCase();
if (MIME_FROM_EXT[ext]) return MIME_FROM_EXT[ext];
}
return 'image/jpeg'; // safe default
};
if (source.localPath) {
buffer = await readFile(source.localPath);
mediaType = resolveMime(source.mimeType, source.localPath);
} else if (source.url) {
const response = await fetch(source.url);
if (!response.ok) {
log.warn(`Failed to fetch image from ${source.url}: HTTP ${response.status}`);
return null;
}
buffer = Buffer.from(await response.arrayBuffer());
const ct = response.headers.get('content-type') ?? undefined;
mediaType = resolveMime(ct ?? source.mimeType, source.url);
} else {
return null;
}
// Resize if the longest side exceeds the threshold
const metadata = await sharp(buffer).metadata();
const longest = Math.max(metadata.width ?? 0, metadata.height ?? 0);
if (longest > MAX_IMAGE_DIMENSION) {
log.info(`Resizing image ${source.name || 'unknown'} from ${metadata.width}x${metadata.height} (max side → ${MAX_IMAGE_DIMENSION}px)`);
buffer = await sharp(buffer)
.resize({ width: MAX_IMAGE_DIMENSION, height: MAX_IMAGE_DIMENSION, fit: 'inside', withoutEnlargement: true })
.toBuffer();
}
const data = buffer.toString('base64');
return imageFromBase64(data, mediaType);
}
type StreamErrorDetail = {
message: string;
stopReason: string;
@@ -125,11 +188,8 @@ async function buildMultimodalMessage(
for (const attachment of imageAttachments) {
try {
if (attachment.localPath) {
content.push(imageFromFile(attachment.localPath));
} else if (attachment.url) {
content.push(await imageFromURL(attachment.url));
}
const item = await prepareImage(attachment);
if (item) content.push(item);
} catch (err) {
log.warn(`Failed to load image ${attachment.name || 'unknown'}: ${err instanceof Error ? err.message : err}`);
}
@@ -1544,6 +1604,7 @@ export class LettaBot implements AgentSession {
(!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
const enriched = await getLatestRunError(this.store.agentId, retryConvId);
if (enriched) {
log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`);
lastErrorDetail = {
message: enriched.message,
stopReason: enriched.stopReason,
@@ -1875,6 +1936,7 @@ export class LettaBot implements AgentSession {
(!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
const enriched = await getLatestRunError(this.store.agentId, convId);
if (enriched) {
log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`);
lastErrorDetail = {
message: enriched.message,
stopReason: enriched.stopReason,

View File

@@ -101,7 +101,7 @@ describe('formatApiErrorForUser', () => {
stopReason: 'error',
});
expect(msg).toContain('stuck tool approval');
expect(msg).toContain('reset-conversation');
expect(msg).toContain('/reset');
// Should NOT match the generic conflict message
expect(msg).not.toContain('Another request is still processing');
});
@@ -120,7 +120,7 @@ describe('formatApiErrorForUser', () => {
stopReason: 'requires_approval',
});
expect(msg).toContain('stuck tool approval');
expect(msg).toContain('reset-conversation');
expect(msg).toContain('/reset');
});
it('falls back to sanitized original message when no mapping matches', () => {

View File

@@ -5,6 +5,19 @@ import { tmpdir } from 'node:os';
import { LettaBot } from './bot.js';
import type { InboundMessage, OutboundMessage } from './types.js';
vi.mock('../tools/letta-api.js', () => ({
getPendingApprovals: vi.fn(),
rejectApproval: vi.fn(),
cancelRuns: vi.fn(),
cancelConversation: vi.fn(),
recoverOrphanedConversationApproval: vi.fn().mockResolvedValue({ recovered: false }),
recoverPendingApprovalsForAgent: vi.fn(),
isRecoverableConversationId: vi.fn(() => false),
getLatestRunError: vi.fn().mockResolvedValue(null),
getAgentModel: vi.fn(),
updateAgentModel: vi.fn(),
}));
describe('result divergence guard', () => {
let workDir: string;

View File

@@ -7,8 +7,7 @@ vi.mock('@letta-ai/letta-code-sdk', () => ({
createAgent: vi.fn(),
createSession: vi.fn(),
resumeSession: vi.fn(),
imageFromFile: vi.fn(),
imageFromURL: vi.fn(),
imageFromBase64: vi.fn((_data: string, _type: string) => ({ type: 'image', source: { type: 'base64', media_type: _type, data: _data } })),
}));
vi.mock('../tools/letta-api.js', () => ({