fix(core): resize images before sending to LLM to prevent llm_api_error (#593)
This commit is contained in:
1
package-lock.json
generated
1
package-lock.json
generated
@@ -28,6 +28,7 @@
|
||||
"openai": "^6.17.0",
|
||||
"pino": "^10.3.1",
|
||||
"qrcode-terminal": "^0.12.0",
|
||||
"sharp": "^0.34.1",
|
||||
"telegramify-markdown": "^1.0.0",
|
||||
"tsx": "^4.21.0",
|
||||
"typescript": "^5.9.3",
|
||||
|
||||
@@ -85,6 +85,7 @@
|
||||
"openai": "^6.17.0",
|
||||
"pino": "^10.3.1",
|
||||
"qrcode-terminal": "^0.12.0",
|
||||
"sharp": "^0.34.1",
|
||||
"telegramify-markdown": "^1.0.0",
|
||||
"tsx": "^4.21.0",
|
||||
"typescript": "^5.9.3",
|
||||
|
||||
@@ -4,9 +4,10 @@
|
||||
* Single agent, single conversation - chat continues across all channels.
|
||||
*/
|
||||
|
||||
import { imageFromFile, imageFromURL, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk';
|
||||
import { imageFromBase64, type ImageContent, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk';
|
||||
import { mkdirSync, existsSync } from 'node:fs';
|
||||
import { access, unlink, realpath, stat, constants } from 'node:fs/promises';
|
||||
import { readFile, access, unlink, realpath, stat, constants } from 'node:fs/promises';
|
||||
import sharp from 'sharp';
|
||||
import { execFile } from 'node:child_process';
|
||||
import { extname, resolve, join } from 'node:path';
|
||||
import type { ChannelAdapter } from '../channels/types.js';
|
||||
@@ -49,6 +50,68 @@ const AUDIO_FILE_EXTENSIONS = new Set([
|
||||
'.ogg', '.opus', '.mp3', '.m4a', '.wav', '.aac', '.flac',
|
||||
]);
|
||||
|
||||
/** Anthropic recommends max 1568px on longest side; larger images waste bandwidth for no benefit. */
|
||||
const MAX_IMAGE_DIMENSION = 1568;
|
||||
|
||||
const MIME_FROM_EXT: Record<string, ImageContent['source']['media_type']> = {
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.webp': 'image/webp',
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
};
|
||||
|
||||
/**
|
||||
* Read, resize (if needed), and base64-encode an image for the LLM.
|
||||
* Returns null on any failure so the caller can skip gracefully.
|
||||
*/
|
||||
async function prepareImage(
|
||||
source: { localPath?: string; url?: string; mimeType?: string; name?: string },
|
||||
): Promise<ImageContent | null> {
|
||||
let buffer: Buffer;
|
||||
let mediaType: ImageContent['source']['media_type'];
|
||||
|
||||
// Resolve media type from attachment metadata or file extension
|
||||
const resolveMime = (hint?: string, path?: string): ImageContent['source']['media_type'] => {
|
||||
if (hint && SUPPORTED_IMAGE_MIMES.has(hint)) return hint as ImageContent['source']['media_type'];
|
||||
if (path) {
|
||||
const ext = extname(path).toLowerCase();
|
||||
if (MIME_FROM_EXT[ext]) return MIME_FROM_EXT[ext];
|
||||
}
|
||||
return 'image/jpeg'; // safe default
|
||||
};
|
||||
|
||||
if (source.localPath) {
|
||||
buffer = await readFile(source.localPath);
|
||||
mediaType = resolveMime(source.mimeType, source.localPath);
|
||||
} else if (source.url) {
|
||||
const response = await fetch(source.url);
|
||||
if (!response.ok) {
|
||||
log.warn(`Failed to fetch image from ${source.url}: HTTP ${response.status}`);
|
||||
return null;
|
||||
}
|
||||
buffer = Buffer.from(await response.arrayBuffer());
|
||||
const ct = response.headers.get('content-type') ?? undefined;
|
||||
mediaType = resolveMime(ct ?? source.mimeType, source.url);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Resize if the longest side exceeds the threshold
|
||||
const metadata = await sharp(buffer).metadata();
|
||||
const longest = Math.max(metadata.width ?? 0, metadata.height ?? 0);
|
||||
|
||||
if (longest > MAX_IMAGE_DIMENSION) {
|
||||
log.info(`Resizing image ${source.name || 'unknown'} from ${metadata.width}x${metadata.height} (max side → ${MAX_IMAGE_DIMENSION}px)`);
|
||||
buffer = await sharp(buffer)
|
||||
.resize({ width: MAX_IMAGE_DIMENSION, height: MAX_IMAGE_DIMENSION, fit: 'inside', withoutEnlargement: true })
|
||||
.toBuffer();
|
||||
}
|
||||
|
||||
const data = buffer.toString('base64');
|
||||
return imageFromBase64(data, mediaType);
|
||||
}
|
||||
|
||||
type StreamErrorDetail = {
|
||||
message: string;
|
||||
stopReason: string;
|
||||
@@ -125,11 +188,8 @@ async function buildMultimodalMessage(
|
||||
|
||||
for (const attachment of imageAttachments) {
|
||||
try {
|
||||
if (attachment.localPath) {
|
||||
content.push(imageFromFile(attachment.localPath));
|
||||
} else if (attachment.url) {
|
||||
content.push(await imageFromURL(attachment.url));
|
||||
}
|
||||
const item = await prepareImage(attachment);
|
||||
if (item) content.push(item);
|
||||
} catch (err) {
|
||||
log.warn(`Failed to load image ${attachment.name || 'unknown'}: ${err instanceof Error ? err.message : err}`);
|
||||
}
|
||||
@@ -1544,6 +1604,7 @@ export class LettaBot implements AgentSession {
|
||||
(!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
|
||||
const enriched = await getLatestRunError(this.store.agentId, retryConvId);
|
||||
if (enriched) {
|
||||
log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`);
|
||||
lastErrorDetail = {
|
||||
message: enriched.message,
|
||||
stopReason: enriched.stopReason,
|
||||
@@ -1875,6 +1936,7 @@ export class LettaBot implements AgentSession {
|
||||
(!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
|
||||
const enriched = await getLatestRunError(this.store.agentId, convId);
|
||||
if (enriched) {
|
||||
log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`);
|
||||
lastErrorDetail = {
|
||||
message: enriched.message,
|
||||
stopReason: enriched.stopReason,
|
||||
|
||||
@@ -101,7 +101,7 @@ describe('formatApiErrorForUser', () => {
|
||||
stopReason: 'error',
|
||||
});
|
||||
expect(msg).toContain('stuck tool approval');
|
||||
expect(msg).toContain('reset-conversation');
|
||||
expect(msg).toContain('/reset');
|
||||
// Should NOT match the generic conflict message
|
||||
expect(msg).not.toContain('Another request is still processing');
|
||||
});
|
||||
@@ -120,7 +120,7 @@ describe('formatApiErrorForUser', () => {
|
||||
stopReason: 'requires_approval',
|
||||
});
|
||||
expect(msg).toContain('stuck tool approval');
|
||||
expect(msg).toContain('reset-conversation');
|
||||
expect(msg).toContain('/reset');
|
||||
});
|
||||
|
||||
it('falls back to sanitized original message when no mapping matches', () => {
|
||||
|
||||
@@ -5,6 +5,19 @@ import { tmpdir } from 'node:os';
|
||||
import { LettaBot } from './bot.js';
|
||||
import type { InboundMessage, OutboundMessage } from './types.js';
|
||||
|
||||
vi.mock('../tools/letta-api.js', () => ({
|
||||
getPendingApprovals: vi.fn(),
|
||||
rejectApproval: vi.fn(),
|
||||
cancelRuns: vi.fn(),
|
||||
cancelConversation: vi.fn(),
|
||||
recoverOrphanedConversationApproval: vi.fn().mockResolvedValue({ recovered: false }),
|
||||
recoverPendingApprovalsForAgent: vi.fn(),
|
||||
isRecoverableConversationId: vi.fn(() => false),
|
||||
getLatestRunError: vi.fn().mockResolvedValue(null),
|
||||
getAgentModel: vi.fn(),
|
||||
updateAgentModel: vi.fn(),
|
||||
}));
|
||||
|
||||
describe('result divergence guard', () => {
|
||||
let workDir: string;
|
||||
|
||||
|
||||
@@ -7,8 +7,7 @@ vi.mock('@letta-ai/letta-code-sdk', () => ({
|
||||
createAgent: vi.fn(),
|
||||
createSession: vi.fn(),
|
||||
resumeSession: vi.fn(),
|
||||
imageFromFile: vi.fn(),
|
||||
imageFromURL: vi.fn(),
|
||||
imageFromBase64: vi.fn((_data: string, _type: string) => ({ type: 'image', source: { type: 'base64', media_type: _type, data: _data } })),
|
||||
}));
|
||||
|
||||
vi.mock('../tools/letta-api.js', () => ({
|
||||
|
||||
Reference in New Issue
Block a user