feat: add image reading support to Read tool (#603)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Charles Packer
2026-01-20 13:37:18 -08:00
committed by GitHub
parent e6661e7699
commit d34a65323c
7 changed files with 303 additions and 5 deletions

View File

@@ -158,6 +158,10 @@ import {
subscribe as subscribeToSubagents,
} from "./helpers/subagentState";
import { getRandomThinkingVerb } from "./helpers/thinkingMessages";
import {
clearQueuedToolImages,
getAndClearQueuedToolImages,
} from "./helpers/toolImageRegistry";
import {
isFileEditTool,
isFileWriteTool,
@@ -3239,6 +3243,9 @@ export default function App({
// Lock input for async operation (set before any await to prevent queue processing)
setCommandRunning(true);
// Clear any queued tool images from the previous agent context
clearQueuedToolImages();
const inputCmd = "/agents";
const cmdId = uid("cmd");
@@ -3717,9 +3724,44 @@ export default function App({
// Send all results to server if any
if (allResults.length > 0) {
toolResultsInFlightRef.current = true;
await processConversation([
// Check for queued tool images (from Read tool reading image files)
const toolImages = getAndClearQueuedToolImages();
const input: Array<MessageCreate | ApprovalCreate> = [
{ type: "approval", approvals: allResults },
]);
];
// If there are queued images, add them as a user message
if (toolImages.length > 0) {
const imageContentParts: Array<
| { type: "text"; text: string }
| {
type: "image";
source: { type: "base64"; media_type: string; data: string };
}
> = [];
for (const img of toolImages) {
imageContentParts.push({
type: "text",
text: `<system-reminder>Image read from ${img.filePath} (Read tool call: ${img.toolCallId}):</system-reminder>`,
});
imageContentParts.push({
type: "image",
source: {
type: "base64",
media_type: img.mediaType,
data: img.data,
},
});
}
input.push({
type: "message",
role: "user",
content: imageContentParts as unknown as MessageCreate["content"],
});
}
await processConversation(input);
toolResultsInFlightRef.current = false;
}
} finally {
@@ -4357,6 +4399,9 @@ export default function App({
setCommandRunning(true);
// Clear any queued tool images from the previous conversation
clearQueuedToolImages();
try {
const client = await getClient();
@@ -5577,7 +5622,37 @@ ${gitContext}
}
// Build message content from display value (handles placeholders for text/images)
const contentParts = buildMessageContentFromDisplay(msg);
let contentParts = buildMessageContentFromDisplay(msg);
// Prepend any queued tool images (from Read tool reading image files)
const queuedToolImages = getAndClearQueuedToolImages();
if (queuedToolImages.length > 0) {
const imageParts: Array<
| { type: "text"; text: string }
| {
type: "image";
source: { type: "base64"; media_type: string; data: string };
}
> = [];
for (const img of queuedToolImages) {
// Add system reminder text
imageParts.push({
type: "text",
text: `<system-reminder>Image read from ${img.filePath} (Read tool call: ${img.toolCallId}):</system-reminder>`,
});
// Add image content
imageParts.push({
type: "image",
source: {
type: "base64",
media_type: img.mediaType,
data: img.data,
},
});
}
// Prepend to contentParts
contentParts = [...imageParts, ...contentParts];
}
// Prepend plan mode reminder if in plan mode
const planModeReminder = getPlanModeReminder();

View File

@@ -0,0 +1,47 @@
// Registry for images read by tools that need to be sent in the next user message turn.
// This is needed because tool returns only support string content - we can't return
// image data directly in tool results to the Letta API.
export interface QueuedToolImage {
toolCallId: string;
filePath: string;
data: string; // base64
mediaType: string;
width: number;
height: number;
}
const queuedImages: QueuedToolImage[] = [];
/**
* Queue an image to be sent in the next user message.
* Called by the Read tool when reading an image file.
*/
export function queueToolImage(image: QueuedToolImage): void {
queuedImages.push(image);
}
/**
* Get and clear all queued images.
* Called when building the user message content.
*/
export function getAndClearQueuedToolImages(): QueuedToolImage[] {
const images = [...queuedImages];
queuedImages.length = 0;
return images;
}
/**
* Clear all queued images without returning them.
* Called on conversation/agent switch to prevent memory leaks.
*/
export function clearQueuedToolImages(): void {
queuedImages.length = 0;
}
/**
* Check if there are any queued images.
*/
export function hasQueuedToolImages(): boolean {
return queuedImages.length > 0;
}