feat: add image reading support to Read tool (#603)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -158,6 +158,10 @@ import {
|
||||
subscribe as subscribeToSubagents,
|
||||
} from "./helpers/subagentState";
|
||||
import { getRandomThinkingVerb } from "./helpers/thinkingMessages";
|
||||
import {
|
||||
clearQueuedToolImages,
|
||||
getAndClearQueuedToolImages,
|
||||
} from "./helpers/toolImageRegistry";
|
||||
import {
|
||||
isFileEditTool,
|
||||
isFileWriteTool,
|
||||
@@ -3239,6 +3243,9 @@ export default function App({
|
||||
// Lock input for async operation (set before any await to prevent queue processing)
|
||||
setCommandRunning(true);
|
||||
|
||||
// Clear any queued tool images from the previous agent context
|
||||
clearQueuedToolImages();
|
||||
|
||||
const inputCmd = "/agents";
|
||||
const cmdId = uid("cmd");
|
||||
|
||||
@@ -3717,9 +3724,44 @@ export default function App({
|
||||
// Send all results to server if any
|
||||
if (allResults.length > 0) {
|
||||
toolResultsInFlightRef.current = true;
|
||||
await processConversation([
|
||||
|
||||
// Check for queued tool images (from Read tool reading image files)
|
||||
const toolImages = getAndClearQueuedToolImages();
|
||||
const input: Array<MessageCreate | ApprovalCreate> = [
|
||||
{ type: "approval", approvals: allResults },
|
||||
]);
|
||||
];
|
||||
|
||||
// If there are queued images, add them as a user message
|
||||
if (toolImages.length > 0) {
|
||||
const imageContentParts: Array<
|
||||
| { type: "text"; text: string }
|
||||
| {
|
||||
type: "image";
|
||||
source: { type: "base64"; media_type: string; data: string };
|
||||
}
|
||||
> = [];
|
||||
for (const img of toolImages) {
|
||||
imageContentParts.push({
|
||||
type: "text",
|
||||
text: `<system-reminder>Image read from ${img.filePath} (Read tool call: ${img.toolCallId}):</system-reminder>`,
|
||||
});
|
||||
imageContentParts.push({
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: img.mediaType,
|
||||
data: img.data,
|
||||
},
|
||||
});
|
||||
}
|
||||
input.push({
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: imageContentParts as unknown as MessageCreate["content"],
|
||||
});
|
||||
}
|
||||
|
||||
await processConversation(input);
|
||||
toolResultsInFlightRef.current = false;
|
||||
}
|
||||
} finally {
|
||||
@@ -4357,6 +4399,9 @@ export default function App({
|
||||
|
||||
setCommandRunning(true);
|
||||
|
||||
// Clear any queued tool images from the previous conversation
|
||||
clearQueuedToolImages();
|
||||
|
||||
try {
|
||||
const client = await getClient();
|
||||
|
||||
@@ -5577,7 +5622,37 @@ ${gitContext}
|
||||
}
|
||||
|
||||
// Build message content from display value (handles placeholders for text/images)
|
||||
const contentParts = buildMessageContentFromDisplay(msg);
|
||||
let contentParts = buildMessageContentFromDisplay(msg);
|
||||
|
||||
// Prepend any queued tool images (from Read tool reading image files)
|
||||
const queuedToolImages = getAndClearQueuedToolImages();
|
||||
if (queuedToolImages.length > 0) {
|
||||
const imageParts: Array<
|
||||
| { type: "text"; text: string }
|
||||
| {
|
||||
type: "image";
|
||||
source: { type: "base64"; media_type: string; data: string };
|
||||
}
|
||||
> = [];
|
||||
for (const img of queuedToolImages) {
|
||||
// Add system reminder text
|
||||
imageParts.push({
|
||||
type: "text",
|
||||
text: `<system-reminder>Image read from ${img.filePath} (Read tool call: ${img.toolCallId}):</system-reminder>`,
|
||||
});
|
||||
// Add image content
|
||||
imageParts.push({
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: img.mediaType,
|
||||
data: img.data,
|
||||
},
|
||||
});
|
||||
}
|
||||
// Prepend to contentParts
|
||||
contentParts = [...imageParts, ...contentParts];
|
||||
}
|
||||
|
||||
// Prepend plan mode reminder if in plan mode
|
||||
const planModeReminder = getPlanModeReminder();
|
||||
|
||||
47
src/cli/helpers/toolImageRegistry.ts
Normal file
47
src/cli/helpers/toolImageRegistry.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
// Registry for images read by tools that need to be sent in the next user message turn.
|
||||
// This is needed because tool returns only support string content - we can't return
|
||||
// image data directly in tool results to the Letta API.
|
||||
|
||||
export interface QueuedToolImage {
|
||||
toolCallId: string;
|
||||
filePath: string;
|
||||
data: string; // base64
|
||||
mediaType: string;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
const queuedImages: QueuedToolImage[] = [];
|
||||
|
||||
/**
|
||||
* Queue an image to be sent in the next user message.
|
||||
* Called by the Read tool when reading an image file.
|
||||
*/
|
||||
export function queueToolImage(image: QueuedToolImage): void {
|
||||
queuedImages.push(image);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get and clear all queued images.
|
||||
* Called when building the user message content.
|
||||
*/
|
||||
export function getAndClearQueuedToolImages(): QueuedToolImage[] {
|
||||
const images = [...queuedImages];
|
||||
queuedImages.length = 0;
|
||||
return images;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all queued images without returning them.
|
||||
* Called on conversation/agent switch to prevent memory leaks.
|
||||
*/
|
||||
export function clearQueuedToolImages(): void {
|
||||
queuedImages.length = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if there are any queued images.
|
||||
*/
|
||||
export function hasQueuedToolImages(): boolean {
|
||||
return queuedImages.length > 0;
|
||||
}
|
||||
Reference in New Issue
Block a user