fix: improve image paste handling with resizing and error feedback (#601)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2026-01-19 21:57:39 -08:00
committed by GitHub
parent 86553db606
commit acc134027b
7 changed files with 372 additions and 80 deletions

View File

@@ -1,9 +1,22 @@
// Clipboard utilities for detecting and importing images from system clipboard
import { execFileSync } from "node:child_process";
import { existsSync, readFileSync, statSync } from "node:fs";
import { basename, extname, isAbsolute, resolve } from "node:path";
import { existsSync, readFileSync, statSync, unlinkSync } from "node:fs";
import { tmpdir } from "node:os";
import { basename, extname, isAbsolute, join, resolve } from "node:path";
import { resizeImageIfNeeded } from "./imageResize";
import { allocateImage } from "./pasteRegistry";
/**
* Result type for clipboard image import.
* - placeholder: Successfully imported, contains [Image #N]
* - error: Failed with an error message
* - null: No image in clipboard
*/
export type ClipboardImageResult =
| { placeholder: string; resized: boolean; width: number; height: number }
| { error: string }
| null;
/**
* Copy text to system clipboard
* Returns true if successful, false otherwise
@@ -158,50 +171,116 @@ export function translatePasteForImages(paste: string): string {
return s;
}
// Attempt to import an image directly from OS clipboard on macOS via JXA (built-in)
export function tryImportClipboardImageMac(): string | null {
/**
* Read image from macOS clipboard to a temp file.
* Returns the temp file path and UTI, or null if no image in clipboard.
*/
function getClipboardImageToTempFile(): {
tempPath: string;
uti: string;
} | null {
if (process.platform !== "darwin") return null;
const tempPath = join(tmpdir(), `letta-clipboard-${Date.now()}.bin`);
try {
// JXA script that writes clipboard image to temp file and returns UTI
// This avoids stdout buffer limits for large images
const jxa = `
ObjC.import('AppKit');
ObjC.import('Foundation');
(function() {
var pb = $.NSPasteboard.generalPasteboard;
var types = ['public.png','public.jpeg','public.tiff','public.heic','public.heif','public.bmp','public.gif','public.svg-image'];
var types = ['public.png','public.jpeg','public.tiff','public.heic','public.heif','public.bmp','public.gif'];
for (var i = 0; i < types.length; i++) {
var t = types[i];
var d = pb.dataForType(t);
if (d) {
var b64 = d.base64EncodedStringWithOptions(0).js;
return t + '|' + b64;
if (d && d.length > 0) {
d.writeToFileAtomically($('${tempPath}'), true);
return t;
}
}
return '';
})();
`;
const out = execFileSync("osascript", ["-l", "JavaScript", "-e", jxa], {
const uti = execFileSync("osascript", ["-l", "JavaScript", "-e", jxa], {
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
}).trim();
if (!out) return null;
const idx = out.indexOf("|");
if (idx <= 0) return null;
const uti = out.slice(0, idx);
const b64 = out.slice(idx + 1);
if (!b64) return null;
const map: Record<string, string> = {
"public.png": "image/png",
"public.jpeg": "image/jpeg",
"public.tiff": "image/tiff",
"public.heic": "image/heic",
"public.heif": "image/heif",
"public.bmp": "image/bmp",
"public.gif": "image/gif",
"public.svg-image": "image/svg+xml",
};
const mediaType = map[uti] || "image/png";
const id = allocateImage({ data: b64, mediaType });
return `[Image #${id}]`;
if (!uti || !existsSync(tempPath)) return null;
return { tempPath, uti };
} catch {
// Clean up temp file on error
if (existsSync(tempPath)) {
try {
unlinkSync(tempPath);
} catch {}
}
return null;
}
}
const UTI_TO_MEDIA_TYPE: Record<string, string> = {
"public.png": "image/png",
"public.jpeg": "image/jpeg",
"public.tiff": "image/tiff",
"public.heic": "image/heic",
"public.heif": "image/heif",
"public.bmp": "image/bmp",
"public.gif": "image/gif",
};
/**
* Import image from macOS clipboard, resize if needed, return placeholder.
* Uses temp file approach to avoid stdout buffer limits.
* Resizes large images to fit within API limits (2048x2048).
*/
export async function tryImportClipboardImageMac(): Promise<ClipboardImageResult> {
if (process.platform !== "darwin") return null;
const clipboardResult = getClipboardImageToTempFile();
if (!clipboardResult) return null;
const { tempPath, uti } = clipboardResult;
try {
// Read the temp file
const buffer = readFileSync(tempPath);
// Clean up temp file immediately after reading
try {
unlinkSync(tempPath);
} catch {}
const mediaType = UTI_TO_MEDIA_TYPE[uti] || "image/png";
// Resize if needed (handles large retina screenshots, HEIC conversion, etc.)
const resized = await resizeImageIfNeeded(buffer, mediaType);
// Store in registry
const id = allocateImage({
data: resized.data,
mediaType: resized.mediaType,
});
return {
placeholder: `[Image #${id}]`,
resized: resized.resized,
width: resized.width,
height: resized.height,
};
} catch (err) {
// Clean up temp file on error
if (existsSync(tempPath)) {
try {
unlinkSync(tempPath);
} catch {}
}
const message = err instanceof Error ? err.message : String(err);
return { error: `Image paste failed: ${message}` };
}
}

View File

@@ -0,0 +1,90 @@
// Image resizing utilities for clipboard paste
// Follows Codex CLI's approach (codex-rs/utils/image/src/lib.rs)
import sharp from "sharp";
// Conservative limits that work with Anthropic's API (max 8000x8000)
// Codex uses 2048x768, we use 2048x2048 for more flexibility with tall screenshots
export const MAX_IMAGE_WIDTH = 2048;
export const MAX_IMAGE_HEIGHT = 2048;
export interface ResizeResult {
data: string; // base64 encoded
mediaType: string;
width: number;
height: number;
resized: boolean;
}
/**
* Resize image if it exceeds MAX_IMAGE_WIDTH or MAX_IMAGE_HEIGHT.
* Uses 'inside' fit to preserve aspect ratio (like Codex's resize behavior).
* Returns original if already within limits and format is supported.
*/
export async function resizeImageIfNeeded(
buffer: Buffer,
inputMediaType: string,
): Promise<ResizeResult> {
const image = sharp(buffer);
const metadata = await image.metadata();
const width = metadata.width ?? 0;
const height = metadata.height ?? 0;
const format = metadata.format;
const needsResize = width > MAX_IMAGE_WIDTH || height > MAX_IMAGE_HEIGHT;
// Determine if we can pass through the original format
const isPassthroughFormat = format === "png" || format === "jpeg";
if (!needsResize && isPassthroughFormat) {
// No resize needed and format is supported - return original bytes
return {
data: buffer.toString("base64"),
mediaType: inputMediaType,
width,
height,
resized: false,
};
}
if (needsResize) {
// Resize preserving aspect ratio
// Use 'inside' fit which is equivalent to Codex's resize behavior
const resized = image.resize(MAX_IMAGE_WIDTH, MAX_IMAGE_HEIGHT, {
fit: "inside",
withoutEnlargement: true,
});
// Output as PNG for lossless quality (or JPEG if input was JPEG)
let outputBuffer: Buffer;
let outputMediaType: string;
if (format === "jpeg") {
// Preserve JPEG format with good quality (Codex uses 85)
outputBuffer = await resized.jpeg({ quality: 85 }).toBuffer();
outputMediaType = "image/jpeg";
} else {
// Default to PNG for everything else
outputBuffer = await resized.png().toBuffer();
outputMediaType = "image/png";
}
const resizedMeta = await sharp(outputBuffer).metadata();
return {
data: outputBuffer.toString("base64"),
mediaType: outputMediaType,
width: resizedMeta.width ?? 0,
height: resizedMeta.height ?? 0,
resized: true,
};
}
// No resize needed but format needs conversion (e.g., HEIC, TIFF, etc.)
const outputBuffer = await image.png().toBuffer();
return {
data: outputBuffer.toString("base64"),
mediaType: "image/png",
width,
height,
resized: false,
};
}