fix(core): resize images before sending to LLM to prevent llm_api_error (#593)
This commit is contained in:
1
package-lock.json
generated
1
package-lock.json
generated
@@ -28,6 +28,7 @@
|
|||||||
"openai": "^6.17.0",
|
"openai": "^6.17.0",
|
||||||
"pino": "^10.3.1",
|
"pino": "^10.3.1",
|
||||||
"qrcode-terminal": "^0.12.0",
|
"qrcode-terminal": "^0.12.0",
|
||||||
|
"sharp": "^0.34.1",
|
||||||
"telegramify-markdown": "^1.0.0",
|
"telegramify-markdown": "^1.0.0",
|
||||||
"tsx": "^4.21.0",
|
"tsx": "^4.21.0",
|
||||||
"typescript": "^5.9.3",
|
"typescript": "^5.9.3",
|
||||||
|
|||||||
@@ -85,6 +85,7 @@
|
|||||||
"openai": "^6.17.0",
|
"openai": "^6.17.0",
|
||||||
"pino": "^10.3.1",
|
"pino": "^10.3.1",
|
||||||
"qrcode-terminal": "^0.12.0",
|
"qrcode-terminal": "^0.12.0",
|
||||||
|
"sharp": "^0.34.1",
|
||||||
"telegramify-markdown": "^1.0.0",
|
"telegramify-markdown": "^1.0.0",
|
||||||
"tsx": "^4.21.0",
|
"tsx": "^4.21.0",
|
||||||
"typescript": "^5.9.3",
|
"typescript": "^5.9.3",
|
||||||
|
|||||||
@@ -4,9 +4,10 @@
|
|||||||
* Single agent, single conversation - chat continues across all channels.
|
* Single agent, single conversation - chat continues across all channels.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { imageFromFile, imageFromURL, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk';
|
import { imageFromBase64, type ImageContent, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk';
|
||||||
import { mkdirSync, existsSync } from 'node:fs';
|
import { mkdirSync, existsSync } from 'node:fs';
|
||||||
import { access, unlink, realpath, stat, constants } from 'node:fs/promises';
|
import { readFile, access, unlink, realpath, stat, constants } from 'node:fs/promises';
|
||||||
|
import sharp from 'sharp';
|
||||||
import { execFile } from 'node:child_process';
|
import { execFile } from 'node:child_process';
|
||||||
import { extname, resolve, join } from 'node:path';
|
import { extname, resolve, join } from 'node:path';
|
||||||
import type { ChannelAdapter } from '../channels/types.js';
|
import type { ChannelAdapter } from '../channels/types.js';
|
||||||
@@ -49,6 +50,68 @@ const AUDIO_FILE_EXTENSIONS = new Set([
|
|||||||
'.ogg', '.opus', '.mp3', '.m4a', '.wav', '.aac', '.flac',
|
'.ogg', '.opus', '.mp3', '.m4a', '.wav', '.aac', '.flac',
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
/** Anthropic recommends max 1568px on longest side; larger images waste bandwidth for no benefit. */
|
||||||
|
const MAX_IMAGE_DIMENSION = 1568;
|
||||||
|
|
||||||
|
const MIME_FROM_EXT: Record<string, ImageContent['source']['media_type']> = {
|
||||||
|
'.png': 'image/png',
|
||||||
|
'.gif': 'image/gif',
|
||||||
|
'.webp': 'image/webp',
|
||||||
|
'.jpg': 'image/jpeg',
|
||||||
|
'.jpeg': 'image/jpeg',
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read, resize (if needed), and base64-encode an image for the LLM.
|
||||||
|
* Returns null on any failure so the caller can skip gracefully.
|
||||||
|
*/
|
||||||
|
async function prepareImage(
|
||||||
|
source: { localPath?: string; url?: string; mimeType?: string; name?: string },
|
||||||
|
): Promise<ImageContent | null> {
|
||||||
|
let buffer: Buffer;
|
||||||
|
let mediaType: ImageContent['source']['media_type'];
|
||||||
|
|
||||||
|
// Resolve media type from attachment metadata or file extension
|
||||||
|
const resolveMime = (hint?: string, path?: string): ImageContent['source']['media_type'] => {
|
||||||
|
if (hint && SUPPORTED_IMAGE_MIMES.has(hint)) return hint as ImageContent['source']['media_type'];
|
||||||
|
if (path) {
|
||||||
|
const ext = extname(path).toLowerCase();
|
||||||
|
if (MIME_FROM_EXT[ext]) return MIME_FROM_EXT[ext];
|
||||||
|
}
|
||||||
|
return 'image/jpeg'; // safe default
|
||||||
|
};
|
||||||
|
|
||||||
|
if (source.localPath) {
|
||||||
|
buffer = await readFile(source.localPath);
|
||||||
|
mediaType = resolveMime(source.mimeType, source.localPath);
|
||||||
|
} else if (source.url) {
|
||||||
|
const response = await fetch(source.url);
|
||||||
|
if (!response.ok) {
|
||||||
|
log.warn(`Failed to fetch image from ${source.url}: HTTP ${response.status}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
buffer = Buffer.from(await response.arrayBuffer());
|
||||||
|
const ct = response.headers.get('content-type') ?? undefined;
|
||||||
|
mediaType = resolveMime(ct ?? source.mimeType, source.url);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resize if the longest side exceeds the threshold
|
||||||
|
const metadata = await sharp(buffer).metadata();
|
||||||
|
const longest = Math.max(metadata.width ?? 0, metadata.height ?? 0);
|
||||||
|
|
||||||
|
if (longest > MAX_IMAGE_DIMENSION) {
|
||||||
|
log.info(`Resizing image ${source.name || 'unknown'} from ${metadata.width}x${metadata.height} (max side → ${MAX_IMAGE_DIMENSION}px)`);
|
||||||
|
buffer = await sharp(buffer)
|
||||||
|
.resize({ width: MAX_IMAGE_DIMENSION, height: MAX_IMAGE_DIMENSION, fit: 'inside', withoutEnlargement: true })
|
||||||
|
.toBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = buffer.toString('base64');
|
||||||
|
return imageFromBase64(data, mediaType);
|
||||||
|
}
|
||||||
|
|
||||||
type StreamErrorDetail = {
|
type StreamErrorDetail = {
|
||||||
message: string;
|
message: string;
|
||||||
stopReason: string;
|
stopReason: string;
|
||||||
@@ -125,11 +188,8 @@ async function buildMultimodalMessage(
|
|||||||
|
|
||||||
for (const attachment of imageAttachments) {
|
for (const attachment of imageAttachments) {
|
||||||
try {
|
try {
|
||||||
if (attachment.localPath) {
|
const item = await prepareImage(attachment);
|
||||||
content.push(imageFromFile(attachment.localPath));
|
if (item) content.push(item);
|
||||||
} else if (attachment.url) {
|
|
||||||
content.push(await imageFromURL(attachment.url));
|
|
||||||
}
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
log.warn(`Failed to load image ${attachment.name || 'unknown'}: ${err instanceof Error ? err.message : err}`);
|
log.warn(`Failed to load image ${attachment.name || 'unknown'}: ${err instanceof Error ? err.message : err}`);
|
||||||
}
|
}
|
||||||
@@ -1544,6 +1604,7 @@ export class LettaBot implements AgentSession {
|
|||||||
(!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
|
(!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
|
||||||
const enriched = await getLatestRunError(this.store.agentId, retryConvId);
|
const enriched = await getLatestRunError(this.store.agentId, retryConvId);
|
||||||
if (enriched) {
|
if (enriched) {
|
||||||
|
log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`);
|
||||||
lastErrorDetail = {
|
lastErrorDetail = {
|
||||||
message: enriched.message,
|
message: enriched.message,
|
||||||
stopReason: enriched.stopReason,
|
stopReason: enriched.stopReason,
|
||||||
@@ -1875,6 +1936,7 @@ export class LettaBot implements AgentSession {
|
|||||||
(!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
|
(!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
|
||||||
const enriched = await getLatestRunError(this.store.agentId, convId);
|
const enriched = await getLatestRunError(this.store.agentId, convId);
|
||||||
if (enriched) {
|
if (enriched) {
|
||||||
|
log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`);
|
||||||
lastErrorDetail = {
|
lastErrorDetail = {
|
||||||
message: enriched.message,
|
message: enriched.message,
|
||||||
stopReason: enriched.stopReason,
|
stopReason: enriched.stopReason,
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ describe('formatApiErrorForUser', () => {
|
|||||||
stopReason: 'error',
|
stopReason: 'error',
|
||||||
});
|
});
|
||||||
expect(msg).toContain('stuck tool approval');
|
expect(msg).toContain('stuck tool approval');
|
||||||
expect(msg).toContain('reset-conversation');
|
expect(msg).toContain('/reset');
|
||||||
// Should NOT match the generic conflict message
|
// Should NOT match the generic conflict message
|
||||||
expect(msg).not.toContain('Another request is still processing');
|
expect(msg).not.toContain('Another request is still processing');
|
||||||
});
|
});
|
||||||
@@ -120,7 +120,7 @@ describe('formatApiErrorForUser', () => {
|
|||||||
stopReason: 'requires_approval',
|
stopReason: 'requires_approval',
|
||||||
});
|
});
|
||||||
expect(msg).toContain('stuck tool approval');
|
expect(msg).toContain('stuck tool approval');
|
||||||
expect(msg).toContain('reset-conversation');
|
expect(msg).toContain('/reset');
|
||||||
});
|
});
|
||||||
|
|
||||||
it('falls back to sanitized original message when no mapping matches', () => {
|
it('falls back to sanitized original message when no mapping matches', () => {
|
||||||
|
|||||||
@@ -5,6 +5,19 @@ import { tmpdir } from 'node:os';
|
|||||||
import { LettaBot } from './bot.js';
|
import { LettaBot } from './bot.js';
|
||||||
import type { InboundMessage, OutboundMessage } from './types.js';
|
import type { InboundMessage, OutboundMessage } from './types.js';
|
||||||
|
|
||||||
|
vi.mock('../tools/letta-api.js', () => ({
|
||||||
|
getPendingApprovals: vi.fn(),
|
||||||
|
rejectApproval: vi.fn(),
|
||||||
|
cancelRuns: vi.fn(),
|
||||||
|
cancelConversation: vi.fn(),
|
||||||
|
recoverOrphanedConversationApproval: vi.fn().mockResolvedValue({ recovered: false }),
|
||||||
|
recoverPendingApprovalsForAgent: vi.fn(),
|
||||||
|
isRecoverableConversationId: vi.fn(() => false),
|
||||||
|
getLatestRunError: vi.fn().mockResolvedValue(null),
|
||||||
|
getAgentModel: vi.fn(),
|
||||||
|
updateAgentModel: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
describe('result divergence guard', () => {
|
describe('result divergence guard', () => {
|
||||||
let workDir: string;
|
let workDir: string;
|
||||||
|
|
||||||
|
|||||||
@@ -7,8 +7,7 @@ vi.mock('@letta-ai/letta-code-sdk', () => ({
|
|||||||
createAgent: vi.fn(),
|
createAgent: vi.fn(),
|
||||||
createSession: vi.fn(),
|
createSession: vi.fn(),
|
||||||
resumeSession: vi.fn(),
|
resumeSession: vi.fn(),
|
||||||
imageFromFile: vi.fn(),
|
imageFromBase64: vi.fn((_data: string, _type: string) => ({ type: 'image', source: { type: 'base64', media_type: _type, data: _data } })),
|
||||||
imageFromURL: vi.fn(),
|
|
||||||
}));
|
}));
|
||||||
|
|
||||||
vi.mock('../tools/letta-api.js', () => ({
|
vi.mock('../tools/letta-api.js', () => ({
|
||||||
|
|||||||
Reference in New Issue
Block a user