fix(core): resize images before sending to LLM to prevent llm_api_error (#593)

2026-03-13 14:10:05 -07:00
parent 68056ed21b
commit f5005c33a0
6 changed files with 87 additions and 11 deletions
--- a/package-lock.json
+++ b/package-lock.json
@@ -28,6 +28,7 @@
        "openai": "^6.17.0",
        "pino": "^10.3.1",
        "qrcode-terminal": "^0.12.0",
        "sharp": "^0.34.1",
        "telegramify-markdown": "^1.0.0",
        "tsx": "^4.21.0",
        "typescript": "^5.9.3",
--- a/package.json
+++ b/package.json
@@ -85,6 +85,7 @@
    "openai": "^6.17.0",
    "pino": "^10.3.1",
    "qrcode-terminal": "^0.12.0",
    "sharp": "^0.34.1",
    "telegramify-markdown": "^1.0.0",
    "tsx": "^4.21.0",
    "typescript": "^5.9.3",
--- a/src/core/bot.ts
+++ b/src/core/bot.ts
@@ -4,9 +4,10 @@
 * Single agent, single conversation - chat continues across all channels.
 */
-import { imageFromFile, imageFromURL, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk';
+import { imageFromBase64, type ImageContent, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk';
 import { mkdirSync, existsSync } from 'node:fs';
-import { access, unlink, realpath, stat, constants } from 'node:fs/promises';
+import { readFile, access, unlink, realpath, stat, constants } from 'node:fs/promises';
 import sharp from 'sharp';
 import { execFile } from 'node:child_process';
 import { extname, resolve, join } from 'node:path';
 import type { ChannelAdapter } from '../channels/types.js';
@@ -49,6 +50,68 @@ const AUDIO_FILE_EXTENSIONS = new Set([
  '.ogg', '.opus', '.mp3', '.m4a', '.wav', '.aac', '.flac',
 ]);
 /** Anthropic recommends max 1568px on longest side; larger images waste bandwidth for no benefit. */
 const MAX_IMAGE_DIMENSION = 1568;
 const MIME_FROM_EXT: Record<string, ImageContent['source']['media_type']> = {
  '.png': 'image/png',
  '.gif': 'image/gif',
  '.webp': 'image/webp',
  '.jpg': 'image/jpeg',
  '.jpeg': 'image/jpeg',
 };
 /**
 * Read, resize (if needed), and base64-encode an image for the LLM.
 * Returns null on any failure so the caller can skip gracefully.
 */
 async function prepareImage(
  source: { localPath?: string; url?: string; mimeType?: string; name?: string },
 ): Promise<ImageContent | null> {
  let buffer: Buffer;
  let mediaType: ImageContent['source']['media_type'];
  // Resolve media type from attachment metadata or file extension
  const resolveMime = (hint?: string, path?: string): ImageContent['source']['media_type'] => {
    if (hint && SUPPORTED_IMAGE_MIMES.has(hint)) return hint as ImageContent['source']['media_type'];
    if (path) {
      const ext = extname(path).toLowerCase();
      if (MIME_FROM_EXT[ext]) return MIME_FROM_EXT[ext];
    }
    return 'image/jpeg'; // safe default
  };
  if (source.localPath) {
    buffer = await readFile(source.localPath);
    mediaType = resolveMime(source.mimeType, source.localPath);
  } else if (source.url) {
    const response = await fetch(source.url);
    if (!response.ok) {
      log.warn(`Failed to fetch image from ${source.url}: HTTP ${response.status}`);
      return null;
    }
    buffer = Buffer.from(await response.arrayBuffer());
    const ct = response.headers.get('content-type') ?? undefined;
    mediaType = resolveMime(ct ?? source.mimeType, source.url);
  } else {
    return null;
  }
  // Resize if the longest side exceeds the threshold
  const metadata = await sharp(buffer).metadata();
  const longest = Math.max(metadata.width ?? 0, metadata.height ?? 0);
  if (longest > MAX_IMAGE_DIMENSION) {
    log.info(`Resizing image ${source.name || 'unknown'} from ${metadata.width}x${metadata.height} (max side → ${MAX_IMAGE_DIMENSION}px)`);
    buffer = await sharp(buffer)
      .resize({ width: MAX_IMAGE_DIMENSION, height: MAX_IMAGE_DIMENSION, fit: 'inside', withoutEnlargement: true })
      .toBuffer();
  }
  const data = buffer.toString('base64');
  return imageFromBase64(data, mediaType);
 }
 type StreamErrorDetail = {
  message: string;
  stopReason: string;
@@ -125,11 +188,8 @@ async function buildMultimodalMessage(
  for (const attachment of imageAttachments) {
    try {
-      if (attachment.localPath) {
+      const item = await prepareImage(attachment);
-        content.push(imageFromFile(attachment.localPath));
+      if (item) content.push(item);
      } else if (attachment.url) {
        content.push(await imageFromURL(attachment.url));
      }
    } catch (err) {
      log.warn(`Failed to load image ${attachment.name || 'unknown'}: ${err instanceof Error ? err.message : err}`);
    }
@@ -1544,6 +1604,7 @@ export class LettaBot implements AgentSession {
                  (!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
                const enriched = await getLatestRunError(this.store.agentId, retryConvId);
                if (enriched) {
                  log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`);
                  lastErrorDetail = {
                    message: enriched.message,
                    stopReason: enriched.stopReason,
@@ -1875,6 +1936,7 @@ export class LettaBot implements AgentSession {
                    (!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) {
                  const enriched = await getLatestRunError(this.store.agentId, convId);
                  if (enriched) {
                    log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`);
                    lastErrorDetail = {
                      message: enriched.message,
                      stopReason: enriched.stopReason,
--- a/src/core/errors.test.ts
+++ b/src/core/errors.test.ts
@@ -101,7 +101,7 @@ describe('formatApiErrorForUser', () => {
      stopReason: 'error',
    });
    expect(msg).toContain('stuck tool approval');
-    expect(msg).toContain('reset-conversation');
+    expect(msg).toContain('/reset');
    // Should NOT match the generic conflict message
    expect(msg).not.toContain('Another request is still processing');
  });
@@ -120,7 +120,7 @@ describe('formatApiErrorForUser', () => {
      stopReason: 'requires_approval',
    });
    expect(msg).toContain('stuck tool approval');
-    expect(msg).toContain('reset-conversation');
+    expect(msg).toContain('/reset');
  });
  it('falls back to sanitized original message when no mapping matches', () => {
--- a/src/core/result-guard.test.ts
+++ b/src/core/result-guard.test.ts
@@ -5,6 +5,19 @@ import { tmpdir } from 'node:os';
 import { LettaBot } from './bot.js';
 import type { InboundMessage, OutboundMessage } from './types.js';
 vi.mock('../tools/letta-api.js', () => ({
  getPendingApprovals: vi.fn(),
  rejectApproval: vi.fn(),
  cancelRuns: vi.fn(),
  cancelConversation: vi.fn(),
  recoverOrphanedConversationApproval: vi.fn().mockResolvedValue({ recovered: false }),
  recoverPendingApprovalsForAgent: vi.fn(),
  isRecoverableConversationId: vi.fn(() => false),
  getLatestRunError: vi.fn().mockResolvedValue(null),
  getAgentModel: vi.fn(),
  updateAgentModel: vi.fn(),
 }));
 describe('result divergence guard', () => {
  let workDir: string;
--- a/src/core/sdk-session-contract.test.ts
+++ b/src/core/sdk-session-contract.test.ts
@@ -7,8 +7,7 @@ vi.mock('@letta-ai/letta-code-sdk', () => ({
  createAgent: vi.fn(),
  createSession: vi.fn(),
  resumeSession: vi.fn(),
-  imageFromFile: vi.fn(),
+  imageFromBase64: vi.fn((_data: string, _type: string) => ({ type: 'image', source: { type: 'base64', media_type: _type, data: _data } })),
  imageFromURL: vi.fn(),
 }));
 vi.mock('../tools/letta-api.js', () => ({