fix: improve image paste handling with resizing and error feedback (#601)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -2957,6 +2957,21 @@ export default function App({
|
||||
setMessageQueue([]);
|
||||
}, []);
|
||||
|
||||
// Handle paste errors (e.g., image too large)
|
||||
const handlePasteError = useCallback(
|
||||
(message: string) => {
|
||||
const statusId = uid("status");
|
||||
buffersRef.current.byId.set(statusId, {
|
||||
kind: "status",
|
||||
id: statusId,
|
||||
lines: [`⚠️ ${message}`],
|
||||
});
|
||||
buffersRef.current.order.push(statusId);
|
||||
refreshDerived();
|
||||
},
|
||||
[refreshDerived],
|
||||
);
|
||||
|
||||
const handleInterrupt = useCallback(async () => {
|
||||
// If we're executing client-side tools, abort them AND the main stream
|
||||
const hasTrackedTools =
|
||||
@@ -8028,6 +8043,7 @@ Plan file path: ${planFilePath}`;
|
||||
ralphPendingYolo={pendingRalphConfig?.isYolo ?? false}
|
||||
onRalphExit={handleRalphExit}
|
||||
conversationId={conversationId}
|
||||
onPasteError={handlePasteError}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
|
||||
@@ -139,6 +139,7 @@ export function Input({
|
||||
ralphPendingYolo = false,
|
||||
onRalphExit,
|
||||
conversationId,
|
||||
onPasteError,
|
||||
}: {
|
||||
visible?: boolean;
|
||||
streaming: boolean;
|
||||
@@ -163,6 +164,7 @@ export function Input({
|
||||
ralphPendingYolo?: boolean;
|
||||
onRalphExit?: () => void;
|
||||
conversationId?: string;
|
||||
onPasteError?: (message: string) => void;
|
||||
}) {
|
||||
const [value, setValue] = useState("");
|
||||
const [escapePressed, setEscapePressed] = useState(false);
|
||||
@@ -815,6 +817,7 @@ export function Input({
|
||||
focus={!onEscapeCancel}
|
||||
onBangAtEmpty={handleBangAtEmpty}
|
||||
onBackspaceAtEmpty={handleBackspaceAtEmpty}
|
||||
onPasteError={onPasteError}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
@@ -41,6 +41,11 @@ interface PasteAwareTextInputProps {
|
||||
* Return true to consume the keystroke.
|
||||
*/
|
||||
onBackspaceAtEmpty?: () => boolean;
|
||||
|
||||
/**
|
||||
* Called when an image paste fails (e.g., image too large).
|
||||
*/
|
||||
onPasteError?: (message: string) => void;
|
||||
}
|
||||
|
||||
function countLines(text: string): number {
|
||||
@@ -122,6 +127,7 @@ export function PasteAwareTextInput({
|
||||
onCursorMove,
|
||||
onBangAtEmpty,
|
||||
onBackspaceAtEmpty,
|
||||
onPasteError,
|
||||
}: PasteAwareTextInputProps) {
|
||||
const { internal_eventEmitter } = useStdin();
|
||||
const [displayValue, setDisplayValue] = useState(value);
|
||||
@@ -209,26 +215,34 @@ export function PasteAwareTextInput({
|
||||
// Native terminals don't send image data via bracketed paste, so we need
|
||||
// to explicitly check the clipboard when Ctrl+V is pressed.
|
||||
if (key.ctrl && input === "v") {
|
||||
const clip = tryImportClipboardImageMac();
|
||||
if (clip) {
|
||||
const at = Math.max(
|
||||
0,
|
||||
Math.min(caretOffsetRef.current, displayValueRef.current.length),
|
||||
);
|
||||
const newDisplay =
|
||||
displayValueRef.current.slice(0, at) +
|
||||
clip +
|
||||
displayValueRef.current.slice(at);
|
||||
displayValueRef.current = newDisplay;
|
||||
setDisplayValue(newDisplay);
|
||||
setActualValue(newDisplay);
|
||||
onChangeRef.current(newDisplay);
|
||||
const nextCaret = at + clip.length;
|
||||
setNudgeCursorOffset(nextCaret);
|
||||
caretOffsetRef.current = nextCaret;
|
||||
}
|
||||
// Don't return - let it fall through to normal paste handling
|
||||
// in case there's also text in the clipboard
|
||||
// Fire async handler (can't await in useInput callback)
|
||||
(async () => {
|
||||
const result = await tryImportClipboardImageMac();
|
||||
if (result) {
|
||||
if ("error" in result) {
|
||||
// Report the error via callback
|
||||
onPasteErrorRef.current?.(result.error);
|
||||
return;
|
||||
}
|
||||
// Success - insert the placeholder
|
||||
const clip = result.placeholder;
|
||||
const at = Math.max(
|
||||
0,
|
||||
Math.min(caretOffsetRef.current, displayValueRef.current.length),
|
||||
);
|
||||
const newDisplay =
|
||||
displayValueRef.current.slice(0, at) +
|
||||
clip +
|
||||
displayValueRef.current.slice(at);
|
||||
displayValueRef.current = newDisplay;
|
||||
setDisplayValue(newDisplay);
|
||||
setActualValue(newDisplay);
|
||||
onChangeRef.current(newDisplay);
|
||||
const nextCaret = at + clip.length;
|
||||
setNudgeCursorOffset(nextCaret);
|
||||
caretOffsetRef.current = nextCaret;
|
||||
}
|
||||
})();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -239,27 +253,22 @@ export function PasteAwareTextInput({
|
||||
|
||||
const payload = typeof input === "string" ? input : "";
|
||||
// Translate any image payloads in the paste (OSC 1337, data URLs, file paths)
|
||||
let translated = translatePasteForImages(payload);
|
||||
// If paste event carried no text (common for image-only clipboard), try macOS import
|
||||
if ((!translated || translated.length === 0) && payload.length === 0) {
|
||||
const clip = tryImportClipboardImageMac();
|
||||
if (clip) translated = clip;
|
||||
}
|
||||
const translated = translatePasteForImages(payload);
|
||||
|
||||
if (translated && translated.length > 0) {
|
||||
// Insert at current caret position
|
||||
// Helper to insert translated content
|
||||
const insertTranslated = (text: string) => {
|
||||
const at = Math.max(
|
||||
0,
|
||||
Math.min(caretOffsetRef.current, displayValue.length),
|
||||
);
|
||||
const isLarge = countLines(translated) > 5 || translated.length > 500;
|
||||
const isLarge = countLines(text) > 5 || text.length > 500;
|
||||
if (isLarge) {
|
||||
const pasteId = allocatePaste(translated);
|
||||
const placeholder = `[Pasted text #${pasteId} +${countLines(translated)} lines]`;
|
||||
const pasteId = allocatePaste(text);
|
||||
const placeholder = `[Pasted text #${pasteId} +${countLines(text)} lines]`;
|
||||
const newDisplay =
|
||||
displayValue.slice(0, at) + placeholder + displayValue.slice(at);
|
||||
const newActual =
|
||||
actualValue.slice(0, at) + translated + actualValue.slice(at);
|
||||
actualValue.slice(0, at) + text + actualValue.slice(at);
|
||||
setDisplayValue(newDisplay);
|
||||
setActualValue(newActual);
|
||||
onChange(newDisplay);
|
||||
@@ -267,11 +276,11 @@ export function PasteAwareTextInput({
|
||||
setNudgeCursorOffset(nextCaret);
|
||||
caretOffsetRef.current = nextCaret;
|
||||
} else {
|
||||
const displayText = sanitizeForDisplay(translated);
|
||||
const displayText = sanitizeForDisplay(text);
|
||||
const newDisplay =
|
||||
displayValue.slice(0, at) + displayText + displayValue.slice(at);
|
||||
const newActual =
|
||||
actualValue.slice(0, at) + translated + actualValue.slice(at);
|
||||
actualValue.slice(0, at) + text + actualValue.slice(at);
|
||||
setDisplayValue(newDisplay);
|
||||
setActualValue(newActual);
|
||||
onChange(newDisplay);
|
||||
@@ -279,6 +288,26 @@ export function PasteAwareTextInput({
|
||||
setNudgeCursorOffset(nextCaret);
|
||||
caretOffsetRef.current = nextCaret;
|
||||
}
|
||||
};
|
||||
|
||||
// If paste event carried no text (common for image-only clipboard), try macOS import
|
||||
if ((!translated || translated.length === 0) && payload.length === 0) {
|
||||
// Fire async handler
|
||||
(async () => {
|
||||
const clipResult = await tryImportClipboardImageMac();
|
||||
if (clipResult) {
|
||||
if ("error" in clipResult) {
|
||||
onPasteErrorRef.current?.(clipResult.error);
|
||||
return;
|
||||
}
|
||||
insertTranslated(clipResult.placeholder);
|
||||
}
|
||||
})();
|
||||
return;
|
||||
}
|
||||
|
||||
if (translated && translated.length > 0) {
|
||||
insertTranslated(translated);
|
||||
return;
|
||||
}
|
||||
// If nothing to insert, fall through
|
||||
@@ -288,23 +317,31 @@ export function PasteAwareTextInput({
|
||||
(key.meta && (input === "v" || input === "V")) ||
|
||||
(key.ctrl && key.shift && (input === "v" || input === "V"))
|
||||
) {
|
||||
const placeholder = tryImportClipboardImageMac();
|
||||
if (placeholder) {
|
||||
const at = Math.max(
|
||||
0,
|
||||
Math.min(caretOffsetRef.current, displayValue.length),
|
||||
);
|
||||
const newDisplay =
|
||||
displayValue.slice(0, at) + placeholder + displayValue.slice(at);
|
||||
const newActual =
|
||||
actualValue.slice(0, at) + placeholder + actualValue.slice(at);
|
||||
setDisplayValue(newDisplay);
|
||||
setActualValue(newActual);
|
||||
onChange(newDisplay);
|
||||
const nextCaret = at + placeholder.length;
|
||||
setNudgeCursorOffset(nextCaret);
|
||||
caretOffsetRef.current = nextCaret;
|
||||
}
|
||||
// Fire async handler
|
||||
(async () => {
|
||||
const result = await tryImportClipboardImageMac();
|
||||
if (result) {
|
||||
if ("error" in result) {
|
||||
onPasteErrorRef.current?.(result.error);
|
||||
return;
|
||||
}
|
||||
const placeholder = result.placeholder;
|
||||
const at = Math.max(
|
||||
0,
|
||||
Math.min(caretOffsetRef.current, displayValue.length),
|
||||
);
|
||||
const newDisplay =
|
||||
displayValue.slice(0, at) + placeholder + displayValue.slice(at);
|
||||
const newActual =
|
||||
actualValue.slice(0, at) + placeholder + actualValue.slice(at);
|
||||
setDisplayValue(newDisplay);
|
||||
setActualValue(newActual);
|
||||
onChange(newDisplay);
|
||||
const nextCaret = at + placeholder.length;
|
||||
setNudgeCursorOffset(nextCaret);
|
||||
caretOffsetRef.current = nextCaret;
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
// Backspace on empty input - handle here since handleChange won't fire
|
||||
@@ -330,6 +367,11 @@ export function PasteAwareTextInput({
|
||||
onBackspaceAtEmptyRef.current = onBackspaceAtEmpty;
|
||||
}, [onBackspaceAtEmpty]);
|
||||
|
||||
const onPasteErrorRef = useRef(onPasteError);
|
||||
useEffect(() => {
|
||||
onPasteErrorRef.current = onPasteError;
|
||||
}, [onPasteError]);
|
||||
|
||||
// Consolidated raw stdin handler for Option+Arrow navigation and Option+Delete
|
||||
// Uses internal_eventEmitter (Ink's private API) for escape sequences that useInput doesn't parse correctly.
|
||||
// Falls back gracefully if internal_eventEmitter is unavailable (useInput handler above still works for some cases).
|
||||
|
||||
@@ -1,9 +1,22 @@
|
||||
// Clipboard utilities for detecting and importing images from system clipboard
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { existsSync, readFileSync, statSync } from "node:fs";
|
||||
import { basename, extname, isAbsolute, resolve } from "node:path";
|
||||
import { existsSync, readFileSync, statSync, unlinkSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { basename, extname, isAbsolute, join, resolve } from "node:path";
|
||||
import { resizeImageIfNeeded } from "./imageResize";
|
||||
import { allocateImage } from "./pasteRegistry";
|
||||
|
||||
/**
|
||||
* Result type for clipboard image import.
|
||||
* - placeholder: Successfully imported, contains [Image #N]
|
||||
* - error: Failed with an error message
|
||||
* - null: No image in clipboard
|
||||
*/
|
||||
export type ClipboardImageResult =
|
||||
| { placeholder: string; resized: boolean; width: number; height: number }
|
||||
| { error: string }
|
||||
| null;
|
||||
|
||||
/**
|
||||
* Copy text to system clipboard
|
||||
* Returns true if successful, false otherwise
|
||||
@@ -158,50 +171,116 @@ export function translatePasteForImages(paste: string): string {
|
||||
return s;
|
||||
}
|
||||
|
||||
// Attempt to import an image directly from OS clipboard on macOS via JXA (built-in)
|
||||
export function tryImportClipboardImageMac(): string | null {
|
||||
/**
|
||||
* Read image from macOS clipboard to a temp file.
|
||||
* Returns the temp file path and UTI, or null if no image in clipboard.
|
||||
*/
|
||||
function getClipboardImageToTempFile(): {
|
||||
tempPath: string;
|
||||
uti: string;
|
||||
} | null {
|
||||
if (process.platform !== "darwin") return null;
|
||||
|
||||
const tempPath = join(tmpdir(), `letta-clipboard-${Date.now()}.bin`);
|
||||
|
||||
try {
|
||||
// JXA script that writes clipboard image to temp file and returns UTI
|
||||
// This avoids stdout buffer limits for large images
|
||||
const jxa = `
|
||||
ObjC.import('AppKit');
|
||||
ObjC.import('Foundation');
|
||||
(function() {
|
||||
var pb = $.NSPasteboard.generalPasteboard;
|
||||
var types = ['public.png','public.jpeg','public.tiff','public.heic','public.heif','public.bmp','public.gif','public.svg-image'];
|
||||
var types = ['public.png','public.jpeg','public.tiff','public.heic','public.heif','public.bmp','public.gif'];
|
||||
for (var i = 0; i < types.length; i++) {
|
||||
var t = types[i];
|
||||
var d = pb.dataForType(t);
|
||||
if (d) {
|
||||
var b64 = d.base64EncodedStringWithOptions(0).js;
|
||||
return t + '|' + b64;
|
||||
if (d && d.length > 0) {
|
||||
d.writeToFileAtomically($('${tempPath}'), true);
|
||||
return t;
|
||||
}
|
||||
}
|
||||
return '';
|
||||
})();
|
||||
`;
|
||||
const out = execFileSync("osascript", ["-l", "JavaScript", "-e", jxa], {
|
||||
|
||||
const uti = execFileSync("osascript", ["-l", "JavaScript", "-e", jxa], {
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
}).trim();
|
||||
if (!out) return null;
|
||||
const idx = out.indexOf("|");
|
||||
if (idx <= 0) return null;
|
||||
const uti = out.slice(0, idx);
|
||||
const b64 = out.slice(idx + 1);
|
||||
if (!b64) return null;
|
||||
const map: Record<string, string> = {
|
||||
"public.png": "image/png",
|
||||
"public.jpeg": "image/jpeg",
|
||||
"public.tiff": "image/tiff",
|
||||
"public.heic": "image/heic",
|
||||
"public.heif": "image/heif",
|
||||
"public.bmp": "image/bmp",
|
||||
"public.gif": "image/gif",
|
||||
"public.svg-image": "image/svg+xml",
|
||||
};
|
||||
const mediaType = map[uti] || "image/png";
|
||||
const id = allocateImage({ data: b64, mediaType });
|
||||
return `[Image #${id}]`;
|
||||
|
||||
if (!uti || !existsSync(tempPath)) return null;
|
||||
|
||||
return { tempPath, uti };
|
||||
} catch {
|
||||
// Clean up temp file on error
|
||||
if (existsSync(tempPath)) {
|
||||
try {
|
||||
unlinkSync(tempPath);
|
||||
} catch {}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const UTI_TO_MEDIA_TYPE: Record<string, string> = {
|
||||
"public.png": "image/png",
|
||||
"public.jpeg": "image/jpeg",
|
||||
"public.tiff": "image/tiff",
|
||||
"public.heic": "image/heic",
|
||||
"public.heif": "image/heif",
|
||||
"public.bmp": "image/bmp",
|
||||
"public.gif": "image/gif",
|
||||
};
|
||||
|
||||
/**
|
||||
* Import image from macOS clipboard, resize if needed, return placeholder.
|
||||
* Uses temp file approach to avoid stdout buffer limits.
|
||||
* Resizes large images to fit within API limits (2048x2048).
|
||||
*/
|
||||
export async function tryImportClipboardImageMac(): Promise<ClipboardImageResult> {
|
||||
if (process.platform !== "darwin") return null;
|
||||
|
||||
const clipboardResult = getClipboardImageToTempFile();
|
||||
if (!clipboardResult) return null;
|
||||
|
||||
const { tempPath, uti } = clipboardResult;
|
||||
|
||||
try {
|
||||
// Read the temp file
|
||||
const buffer = readFileSync(tempPath);
|
||||
|
||||
// Clean up temp file immediately after reading
|
||||
try {
|
||||
unlinkSync(tempPath);
|
||||
} catch {}
|
||||
|
||||
const mediaType = UTI_TO_MEDIA_TYPE[uti] || "image/png";
|
||||
|
||||
// Resize if needed (handles large retina screenshots, HEIC conversion, etc.)
|
||||
const resized = await resizeImageIfNeeded(buffer, mediaType);
|
||||
|
||||
// Store in registry
|
||||
const id = allocateImage({
|
||||
data: resized.data,
|
||||
mediaType: resized.mediaType,
|
||||
});
|
||||
|
||||
return {
|
||||
placeholder: `[Image #${id}]`,
|
||||
resized: resized.resized,
|
||||
width: resized.width,
|
||||
height: resized.height,
|
||||
};
|
||||
} catch (err) {
|
||||
// Clean up temp file on error
|
||||
if (existsSync(tempPath)) {
|
||||
try {
|
||||
unlinkSync(tempPath);
|
||||
} catch {}
|
||||
}
|
||||
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
return { error: `Image paste failed: ${message}` };
|
||||
}
|
||||
}
|
||||
|
||||
90
src/cli/helpers/imageResize.ts
Normal file
90
src/cli/helpers/imageResize.ts
Normal file
@@ -0,0 +1,90 @@
|
||||
// Image resizing utilities for clipboard paste
|
||||
// Follows Codex CLI's approach (codex-rs/utils/image/src/lib.rs)
|
||||
import sharp from "sharp";
|
||||
|
||||
// Conservative limits that work with Anthropic's API (max 8000x8000)
|
||||
// Codex uses 2048x768, we use 2048x2048 for more flexibility with tall screenshots
|
||||
export const MAX_IMAGE_WIDTH = 2048;
|
||||
export const MAX_IMAGE_HEIGHT = 2048;
|
||||
|
||||
export interface ResizeResult {
|
||||
data: string; // base64 encoded
|
||||
mediaType: string;
|
||||
width: number;
|
||||
height: number;
|
||||
resized: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resize image if it exceeds MAX_IMAGE_WIDTH or MAX_IMAGE_HEIGHT.
|
||||
* Uses 'inside' fit to preserve aspect ratio (like Codex's resize behavior).
|
||||
* Returns original if already within limits and format is supported.
|
||||
*/
|
||||
export async function resizeImageIfNeeded(
|
||||
buffer: Buffer,
|
||||
inputMediaType: string,
|
||||
): Promise<ResizeResult> {
|
||||
const image = sharp(buffer);
|
||||
const metadata = await image.metadata();
|
||||
const width = metadata.width ?? 0;
|
||||
const height = metadata.height ?? 0;
|
||||
const format = metadata.format;
|
||||
|
||||
const needsResize = width > MAX_IMAGE_WIDTH || height > MAX_IMAGE_HEIGHT;
|
||||
|
||||
// Determine if we can pass through the original format
|
||||
const isPassthroughFormat = format === "png" || format === "jpeg";
|
||||
|
||||
if (!needsResize && isPassthroughFormat) {
|
||||
// No resize needed and format is supported - return original bytes
|
||||
return {
|
||||
data: buffer.toString("base64"),
|
||||
mediaType: inputMediaType,
|
||||
width,
|
||||
height,
|
||||
resized: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (needsResize) {
|
||||
// Resize preserving aspect ratio
|
||||
// Use 'inside' fit which is equivalent to Codex's resize behavior
|
||||
const resized = image.resize(MAX_IMAGE_WIDTH, MAX_IMAGE_HEIGHT, {
|
||||
fit: "inside",
|
||||
withoutEnlargement: true,
|
||||
});
|
||||
|
||||
// Output as PNG for lossless quality (or JPEG if input was JPEG)
|
||||
let outputBuffer: Buffer;
|
||||
let outputMediaType: string;
|
||||
|
||||
if (format === "jpeg") {
|
||||
// Preserve JPEG format with good quality (Codex uses 85)
|
||||
outputBuffer = await resized.jpeg({ quality: 85 }).toBuffer();
|
||||
outputMediaType = "image/jpeg";
|
||||
} else {
|
||||
// Default to PNG for everything else
|
||||
outputBuffer = await resized.png().toBuffer();
|
||||
outputMediaType = "image/png";
|
||||
}
|
||||
|
||||
const resizedMeta = await sharp(outputBuffer).metadata();
|
||||
return {
|
||||
data: outputBuffer.toString("base64"),
|
||||
mediaType: outputMediaType,
|
||||
width: resizedMeta.width ?? 0,
|
||||
height: resizedMeta.height ?? 0,
|
||||
resized: true,
|
||||
};
|
||||
}
|
||||
|
||||
// No resize needed but format needs conversion (e.g., HEIC, TIFF, etc.)
|
||||
const outputBuffer = await image.png().toBuffer();
|
||||
return {
|
||||
data: outputBuffer.toString("base64"),
|
||||
mediaType: "image/png",
|
||||
width,
|
||||
height,
|
||||
resized: false,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user