diff --git a/package-lock.json b/package-lock.json index d2000f3..9b63160 100644 --- a/package-lock.json +++ b/package-lock.json @@ -28,6 +28,7 @@ "openai": "^6.17.0", "pino": "^10.3.1", "qrcode-terminal": "^0.12.0", + "sharp": "^0.34.1", "telegramify-markdown": "^1.0.0", "tsx": "^4.21.0", "typescript": "^5.9.3", diff --git a/package.json b/package.json index bd12d4b..b9e4fa2 100644 --- a/package.json +++ b/package.json @@ -85,6 +85,7 @@ "openai": "^6.17.0", "pino": "^10.3.1", "qrcode-terminal": "^0.12.0", + "sharp": "^0.34.1", "telegramify-markdown": "^1.0.0", "tsx": "^4.21.0", "typescript": "^5.9.3", diff --git a/src/core/bot.ts b/src/core/bot.ts index 9c94ac9..7509eab 100644 --- a/src/core/bot.ts +++ b/src/core/bot.ts @@ -4,9 +4,10 @@ * Single agent, single conversation - chat continues across all channels. */ -import { imageFromFile, imageFromURL, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk'; +import { imageFromBase64, type ImageContent, type Session, type MessageContentItem, type SendMessage, type CanUseToolCallback } from '@letta-ai/letta-code-sdk'; import { mkdirSync, existsSync } from 'node:fs'; -import { access, unlink, realpath, stat, constants } from 'node:fs/promises'; +import { readFile, access, unlink, realpath, stat, constants } from 'node:fs/promises'; +import sharp from 'sharp'; import { execFile } from 'node:child_process'; import { extname, resolve, join } from 'node:path'; import type { ChannelAdapter } from '../channels/types.js'; @@ -49,6 +50,68 @@ const AUDIO_FILE_EXTENSIONS = new Set([ '.ogg', '.opus', '.mp3', '.m4a', '.wav', '.aac', '.flac', ]); +/** Anthropic recommends max 1568px on longest side; larger images waste bandwidth for no benefit. */ +const MAX_IMAGE_DIMENSION = 1568; + +const MIME_FROM_EXT: Record = { + '.png': 'image/png', + '.gif': 'image/gif', + '.webp': 'image/webp', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', +}; + +/** + * Read, resize (if needed), and base64-encode an image for the LLM. + * Returns null on any failure so the caller can skip gracefully. + */ +async function prepareImage( + source: { localPath?: string; url?: string; mimeType?: string; name?: string }, +): Promise { + let buffer: Buffer; + let mediaType: ImageContent['source']['media_type']; + + // Resolve media type from attachment metadata or file extension + const resolveMime = (hint?: string, path?: string): ImageContent['source']['media_type'] => { + if (hint && SUPPORTED_IMAGE_MIMES.has(hint)) return hint as ImageContent['source']['media_type']; + if (path) { + const ext = extname(path).toLowerCase(); + if (MIME_FROM_EXT[ext]) return MIME_FROM_EXT[ext]; + } + return 'image/jpeg'; // safe default + }; + + if (source.localPath) { + buffer = await readFile(source.localPath); + mediaType = resolveMime(source.mimeType, source.localPath); + } else if (source.url) { + const response = await fetch(source.url); + if (!response.ok) { + log.warn(`Failed to fetch image from ${source.url}: HTTP ${response.status}`); + return null; + } + buffer = Buffer.from(await response.arrayBuffer()); + const ct = response.headers.get('content-type') ?? undefined; + mediaType = resolveMime(ct ?? source.mimeType, source.url); + } else { + return null; + } + + // Resize if the longest side exceeds the threshold + const metadata = await sharp(buffer).metadata(); + const longest = Math.max(metadata.width ?? 0, metadata.height ?? 0); + + if (longest > MAX_IMAGE_DIMENSION) { + log.info(`Resizing image ${source.name || 'unknown'} from ${metadata.width}x${metadata.height} (max side → ${MAX_IMAGE_DIMENSION}px)`); + buffer = await sharp(buffer) + .resize({ width: MAX_IMAGE_DIMENSION, height: MAX_IMAGE_DIMENSION, fit: 'inside', withoutEnlargement: true }) + .toBuffer(); + } + + const data = buffer.toString('base64'); + return imageFromBase64(data, mediaType); +} + type StreamErrorDetail = { message: string; stopReason: string; @@ -125,11 +188,8 @@ async function buildMultimodalMessage( for (const attachment of imageAttachments) { try { - if (attachment.localPath) { - content.push(imageFromFile(attachment.localPath)); - } else if (attachment.url) { - content.push(await imageFromURL(attachment.url)); - } + const item = await prepareImage(attachment); + if (item) content.push(item); } catch (err) { log.warn(`Failed to load image ${attachment.name || 'unknown'}: ${err instanceof Error ? err.message : err}`); } @@ -1544,6 +1604,7 @@ export class LettaBot implements AgentSession { (!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) { const enriched = await getLatestRunError(this.store.agentId, retryConvId); if (enriched) { + log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`); lastErrorDetail = { message: enriched.message, stopReason: enriched.stopReason, @@ -1875,6 +1936,7 @@ export class LettaBot implements AgentSession { (!lastErrorDetail || lastErrorDetail.message === 'Agent stopped: error')) { const enriched = await getLatestRunError(this.store.agentId, convId); if (enriched) { + log.info(`Enriched error detail: ${enriched.message} [${enriched.stopReason}]`); lastErrorDetail = { message: enriched.message, stopReason: enriched.stopReason, diff --git a/src/core/errors.test.ts b/src/core/errors.test.ts index 5108ff1..d7800dc 100644 --- a/src/core/errors.test.ts +++ b/src/core/errors.test.ts @@ -101,7 +101,7 @@ describe('formatApiErrorForUser', () => { stopReason: 'error', }); expect(msg).toContain('stuck tool approval'); - expect(msg).toContain('reset-conversation'); + expect(msg).toContain('/reset'); // Should NOT match the generic conflict message expect(msg).not.toContain('Another request is still processing'); }); @@ -120,7 +120,7 @@ describe('formatApiErrorForUser', () => { stopReason: 'requires_approval', }); expect(msg).toContain('stuck tool approval'); - expect(msg).toContain('reset-conversation'); + expect(msg).toContain('/reset'); }); it('falls back to sanitized original message when no mapping matches', () => { diff --git a/src/core/result-guard.test.ts b/src/core/result-guard.test.ts index ce9bb5e..f2f12ad 100644 --- a/src/core/result-guard.test.ts +++ b/src/core/result-guard.test.ts @@ -5,6 +5,19 @@ import { tmpdir } from 'node:os'; import { LettaBot } from './bot.js'; import type { InboundMessage, OutboundMessage } from './types.js'; +vi.mock('../tools/letta-api.js', () => ({ + getPendingApprovals: vi.fn(), + rejectApproval: vi.fn(), + cancelRuns: vi.fn(), + cancelConversation: vi.fn(), + recoverOrphanedConversationApproval: vi.fn().mockResolvedValue({ recovered: false }), + recoverPendingApprovalsForAgent: vi.fn(), + isRecoverableConversationId: vi.fn(() => false), + getLatestRunError: vi.fn().mockResolvedValue(null), + getAgentModel: vi.fn(), + updateAgentModel: vi.fn(), +})); + describe('result divergence guard', () => { let workDir: string; diff --git a/src/core/sdk-session-contract.test.ts b/src/core/sdk-session-contract.test.ts index 118fa01..e1c0f03 100644 --- a/src/core/sdk-session-contract.test.ts +++ b/src/core/sdk-session-contract.test.ts @@ -7,8 +7,7 @@ vi.mock('@letta-ai/letta-code-sdk', () => ({ createAgent: vi.fn(), createSession: vi.fn(), resumeSession: vi.fn(), - imageFromFile: vi.fn(), - imageFromURL: vi.fn(), + imageFromBase64: vi.fn((_data: string, _type: string) => ({ type: 'image', source: { type: 'base64', media_type: _type, data: _data } })), })); vi.mock('../tools/letta-api.js', () => ({