From 269cbd8fe22415858ce17951bde6aed432d1793c Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Tue, 20 Jan 2026 20:10:02 -0800 Subject: [PATCH] revert: revert "feat: add image reading support to Read tool" (#605) --- src/cli/App.tsx | 81 ++-------------------------- src/cli/helpers/toolImageRegistry.ts | 47 ---------------- src/headless.ts | 67 +---------------------- src/tools/descriptions/Read.md | 2 - src/tools/impl/Read.ts | 75 -------------------------- src/tools/manager.ts | 10 +--- src/tools/toolContext.ts | 26 --------- 7 files changed, 5 insertions(+), 303 deletions(-) delete mode 100644 src/cli/helpers/toolImageRegistry.ts delete mode 100644 src/tools/toolContext.ts diff --git a/src/cli/App.tsx b/src/cli/App.tsx index eec2231..f5e728e 100644 --- a/src/cli/App.tsx +++ b/src/cli/App.tsx @@ -158,10 +158,6 @@ import { subscribe as subscribeToSubagents, } from "./helpers/subagentState"; import { getRandomThinkingVerb } from "./helpers/thinkingMessages"; -import { - clearQueuedToolImages, - getAndClearQueuedToolImages, -} from "./helpers/toolImageRegistry"; import { isFileEditTool, isFileWriteTool, @@ -3250,9 +3246,6 @@ export default function App({ // Lock input for async operation (set before any await to prevent queue processing) setCommandRunning(true); - // Clear any queued tool images from the previous agent context - clearQueuedToolImages(); - const inputCmd = "/agents"; const cmdId = uid("cmd"); @@ -3740,44 +3733,9 @@ export default function App({ // Send all results to server if any if (allResults.length > 0) { toolResultsInFlightRef.current = true; - - // Check for queued tool images (from Read tool reading image files) - const toolImages = getAndClearQueuedToolImages(); - const input: Array = [ + await processConversation([ { type: "approval", approvals: allResults }, - ]; - - // If there are queued images, add them as a user message - if (toolImages.length > 0) { - const imageContentParts: Array< - | { type: "text"; text: string } - | { - type: "image"; - source: { type: "base64"; media_type: string; data: string }; - } - > = []; - for (const img of toolImages) { - imageContentParts.push({ - type: "text", - text: `Image read from ${img.filePath} (Read tool call: ${img.toolCallId}):`, - }); - imageContentParts.push({ - type: "image", - source: { - type: "base64", - media_type: img.mediaType, - data: img.data, - }, - }); - } - input.push({ - type: "message", - role: "user", - content: imageContentParts as unknown as MessageCreate["content"], - }); - } - - await processConversation(input); + ]); toolResultsInFlightRef.current = false; } } finally { @@ -4415,9 +4373,6 @@ export default function App({ setCommandRunning(true); - // Clear any queued tool images from the previous conversation - clearQueuedToolImages(); - try { const client = await getClient(); @@ -5643,37 +5598,7 @@ ${gitContext} } // Build message content from display value (handles placeholders for text/images) - let contentParts = buildMessageContentFromDisplay(msg); - - // Prepend any queued tool images (from Read tool reading image files) - const queuedToolImages = getAndClearQueuedToolImages(); - if (queuedToolImages.length > 0) { - const imageParts: Array< - | { type: "text"; text: string } - | { - type: "image"; - source: { type: "base64"; media_type: string; data: string }; - } - > = []; - for (const img of queuedToolImages) { - // Add system reminder text - imageParts.push({ - type: "text", - text: `Image read from ${img.filePath} (Read tool call: ${img.toolCallId}):`, - }); - // Add image content - imageParts.push({ - type: "image", - source: { - type: "base64", - media_type: img.mediaType, - data: img.data, - }, - }); - } - // Prepend to contentParts - contentParts = [...imageParts, ...contentParts]; - } + const contentParts = buildMessageContentFromDisplay(msg); // Prepend plan mode reminder if in plan mode const planModeReminder = getPlanModeReminder(); diff --git a/src/cli/helpers/toolImageRegistry.ts b/src/cli/helpers/toolImageRegistry.ts deleted file mode 100644 index fee46de..0000000 --- a/src/cli/helpers/toolImageRegistry.ts +++ /dev/null @@ -1,47 +0,0 @@ -// Registry for images read by tools that need to be sent in the next user message turn. -// This is needed because tool returns only support string content - we can't return -// image data directly in tool results to the Letta API. - -export interface QueuedToolImage { - toolCallId: string; - filePath: string; - data: string; // base64 - mediaType: string; - width: number; - height: number; -} - -const queuedImages: QueuedToolImage[] = []; - -/** - * Queue an image to be sent in the next user message. - * Called by the Read tool when reading an image file. - */ -export function queueToolImage(image: QueuedToolImage): void { - queuedImages.push(image); -} - -/** - * Get and clear all queued images. - * Called when building the user message content. - */ -export function getAndClearQueuedToolImages(): QueuedToolImage[] { - const images = [...queuedImages]; - queuedImages.length = 0; - return images; -} - -/** - * Clear all queued images without returning them. - * Called on conversation/agent switch to prevent memory leaks. - */ -export function clearQueuedToolImages(): void { - queuedImages.length = 0; -} - -/** - * Check if there are any queued images. - */ -export function hasQueuedToolImages(): boolean { - return queuedImages.length > 0; -} diff --git a/src/headless.ts b/src/headless.ts index c5df126..5961076 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -31,7 +31,6 @@ import { formatErrorDetails } from "./cli/helpers/errorFormatter"; import { safeJsonParseOr } from "./cli/helpers/safeJsonParse"; import { drainStreamWithResume } from "./cli/helpers/stream"; import { StreamProcessor } from "./cli/helpers/streamProcessor"; -import { getAndClearQueuedToolImages } from "./cli/helpers/toolImageRegistry"; import { settingsManager } from "./settings-manager"; import { checkToolPermission } from "./tools/manager"; import type { @@ -935,42 +934,11 @@ export async function handleHeadlessCommand( // Add user prompt messageContent += prompt; - // Build content parts (text + any queued tool images from Read tool) - type ContentPart = - | { type: "text"; text: string } - | { - type: "image"; - source: { type: "base64"; media_type: string; data: string }; - }; - const contentParts: ContentPart[] = []; - - // Check for queued tool images (from Read tool reading image files) - const queuedToolImages = getAndClearQueuedToolImages(); - if (queuedToolImages.length > 0) { - for (const img of queuedToolImages) { - contentParts.push({ - type: "text", - text: `Image read from ${img.filePath} (Read tool call: ${img.toolCallId}):`, - }); - contentParts.push({ - type: "image", - source: { - type: "base64", - media_type: img.mediaType, - data: img.data, - }, - }); - } - } - - // Add the text message content - contentParts.push({ type: "text", text: messageContent }); - // Start with the user message let currentInput: Array = [ { role: "user", - content: contentParts as unknown as MessageCreate["content"], + content: [{ type: "text", text: messageContent }], }, ]; @@ -1273,9 +1241,6 @@ export async function handleHeadlessCommand( ); const executedResults = await executeApprovalBatch(decisions); - // Check for queued tool images (from Read tool reading image files) - const toolImages = getAndClearQueuedToolImages(); - // Send all results in one batch currentInput = [ { @@ -1283,36 +1248,6 @@ export async function handleHeadlessCommand( approvals: executedResults as ApprovalResult[], }, ]; - - // If there are queued images, add them as a user message - if (toolImages.length > 0) { - const imageContentParts: Array< - | { type: "text"; text: string } - | { - type: "image"; - source: { type: "base64"; media_type: string; data: string }; - } - > = []; - for (const img of toolImages) { - imageContentParts.push({ - type: "text", - text: `Image read from ${img.filePath} (Read tool call: ${img.toolCallId}):`, - }); - imageContentParts.push({ - type: "image", - source: { - type: "base64", - media_type: img.mediaType, - data: img.data, - }, - }); - } - currentInput.push({ - role: "user", - content: imageContentParts as unknown as MessageCreate["content"], - }); - } - continue; } diff --git a/src/tools/descriptions/Read.md b/src/tools/descriptions/Read.md index 17e1d7f..d436428 100644 --- a/src/tools/descriptions/Read.md +++ b/src/tools/descriptions/Read.md @@ -9,8 +9,6 @@ Usage: - You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters - Any lines longer than 2000 characters will be truncated - Results are returned using cat -n format, with line numbers starting at 1 -- This tool allows Letta Code to read images (PNG, JPG, JPEG, GIF, WEBP, BMP). When reading an image file the contents are presented visually as Letta Code is a multimodal LLM. Large images are automatically resized to fit within API limits. -- You will regularly be asked to read screenshots. If the user provides a path to a screenshot, ALWAYS use this tool to view the file at the path. This tool will work with all temporary file paths. - This tool can only read files, not directories. To read a directory, use the ls command via Bash. - You can call multiple tools in a single response. It is always better to speculatively read multiple potentially useful files in parallel. - If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents. diff --git a/src/tools/impl/Read.ts b/src/tools/impl/Read.ts index 8c0e9cd..72ffcf1 100644 --- a/src/tools/impl/Read.ts +++ b/src/tools/impl/Read.ts @@ -1,78 +1,9 @@ import { promises as fs } from "node:fs"; import * as path from "node:path"; -import { resizeImageIfNeeded } from "../../cli/helpers/imageResize.js"; -import { queueToolImage } from "../../cli/helpers/toolImageRegistry.js"; -import { getToolExecutionContext } from "../toolContext.js"; import { OVERFLOW_CONFIG, writeOverflowFile } from "./overflow.js"; import { LIMITS } from "./truncation.js"; import { validateRequiredParams } from "./validation.js"; -// Supported image extensions (lowercase) -const IMAGE_EXTENSIONS = new Set([ - ".png", - ".jpg", - ".jpeg", - ".gif", - ".webp", - ".bmp", -]); - -/** - * Check if a file path is an image based on extension. - */ -function isImageFile(filePath: string): boolean { - const ext = path.extname(filePath).toLowerCase(); - return IMAGE_EXTENSIONS.has(ext); -} - -/** - * Get MIME type from file extension. - */ -function getMimeType(filePath: string): string { - const ext = path.extname(filePath).toLowerCase(); - const mimeTypes: Record = { - ".png": "image/png", - ".jpg": "image/jpeg", - ".jpeg": "image/jpeg", - ".gif": "image/gif", - ".webp": "image/webp", - ".bmp": "image/bmp", - }; - return mimeTypes[ext] || "image/png"; -} - -/** - * Read an image file, resize if needed, and queue for display. - * Returns a placeholder message - actual image is sent in the next user message. - */ -async function readImageFile(filePath: string): Promise { - const buffer = await fs.readFile(filePath); - const inputMimeType = getMimeType(filePath); - const resized = await resizeImageIfNeeded(buffer, inputMimeType); - - // Get tool call ID from execution context - const context = getToolExecutionContext(); - const toolCallId = context?.toolCallId || "unknown"; - - // Queue for next turn - queueToolImage({ - toolCallId, - filePath, - data: resized.data, - mediaType: resized.mediaType, - width: resized.width, - height: resized.height, - }); - - const resizeNote = resized.resized - ? ` (resized to ${resized.width}x${resized.height})` - : ` (${resized.width}x${resized.height})`; - - return { - content: `[Image: ${filePath}${resizeNote} - queued for display]`, - }; -} - interface ReadArgs { file_path: string; offset?: number; @@ -214,12 +145,6 @@ export async function read(args: ReadArgs): Promise { throw new Error( `File too large: ${stats.size} bytes (max ${maxSize} bytes)`, ); - - // Handle image files specially - read, resize, and queue for display - if (isImageFile(resolvedPath)) { - return await readImageFile(resolvedPath); - } - if (await isBinaryFile(resolvedPath)) throw new Error(`Cannot read binary file: ${resolvedPath}`); const content = await fs.readFile(resolvedPath, "utf-8"); diff --git a/src/tools/manager.ts b/src/tools/manager.ts index 6b6a5b0..1229e0d 100644 --- a/src/tools/manager.ts +++ b/src/tools/manager.ts @@ -2,7 +2,6 @@ import { getModelInfo } from "../agent/model"; import { getAllSubagentConfigs } from "../agent/subagents"; import { INTERRUPTED_BY_USER } from "../constants"; import { telemetry } from "../telemetry"; -import { setToolExecutionContext } from "./toolContext"; import { TOOL_DEFINITIONS, type ToolName } from "./toolDefinitions"; export const TOOL_NAMES = Object.keys(TOOL_DEFINITIONS) as ToolName[]; @@ -755,14 +754,7 @@ export async function executeTool( } } - // Set execution context for tools that need it (e.g., Read for image queuing) - setToolExecutionContext({ toolCallId: options?.toolCallId }); - let result: unknown; - try { - result = await tool.fn(enhancedArgs); - } finally { - setToolExecutionContext(null); - } + const result = await tool.fn(enhancedArgs); const duration = Date.now() - startTime; // Extract stdout/stderr if present (for bash tools) diff --git a/src/tools/toolContext.ts b/src/tools/toolContext.ts deleted file mode 100644 index f0729c4..0000000 --- a/src/tools/toolContext.ts +++ /dev/null @@ -1,26 +0,0 @@ -// Tool execution context - allows tools to access execution metadata -// Separate file to avoid circular dependencies with manager.ts - -interface ToolExecutionContext { - toolCallId?: string; -} - -let currentToolContext: ToolExecutionContext | null = null; - -/** - * Get the current tool execution context. - * Called by tools that need access to execution metadata (e.g., Read for image queuing). - */ -export function getToolExecutionContext(): ToolExecutionContext | null { - return currentToolContext; -} - -/** - * Set the current tool execution context. - * Called by manager.ts before executing a tool. - */ -export function setToolExecutionContext( - context: ToolExecutionContext | null, -): void { - currentToolContext = context; -}