diff --git a/src/agent/approval-execution.ts b/src/agent/approval-execution.ts index 907e70c..905b6c3 100644 --- a/src/agent/approval-execution.ts +++ b/src/agent/approval-execution.ts @@ -40,6 +40,7 @@ export function getDisplayableToolReturn(content: ToolReturnContent): string { const PARALLEL_SAFE_TOOLS = new Set([ // === Anthropic toolset (default) === "Read", + "view_image", "Grep", "Glob", diff --git a/src/cli/helpers/formatArgsDisplay.ts b/src/cli/helpers/formatArgsDisplay.ts index 39c00d4..a112425 100644 --- a/src/cli/helpers/formatArgsDisplay.ts +++ b/src/cli/helpers/formatArgsDisplay.ts @@ -248,14 +248,14 @@ export function formatArgsDisplay( } // Read tools: show file path + any other useful args (limit, offset) - if (isFileReadTool(toolName) && parsed.file_path) { - const filePath = String(parsed.file_path); + if (isFileReadTool(toolName) && (parsed.file_path || parsed.path)) { + const filePath = String(parsed.file_path || parsed.path); const relativePath = formatDisplayPath(filePath); // Collect other non-hidden args const otherArgs: string[] = []; for (const [k, v] of Object.entries(parsed)) { - if (k === "file_path") continue; + if (k === "file_path" || k === "path") continue; if (v === undefined || v === null) continue; if (typeof v === "boolean" || typeof v === "number") { otherArgs.push(`${k}: ${v}`); diff --git a/src/cli/helpers/toolNameMapping.ts b/src/cli/helpers/toolNameMapping.ts index 4b9d043..c755598 100644 --- a/src/cli/helpers/toolNameMapping.ts +++ b/src/cli/helpers/toolNameMapping.ts @@ -15,6 +15,7 @@ export function getDisplayToolName(rawName: string): string { if (rawName === "write") return "Write"; if (rawName === "edit" || rawName === "multi_edit") return "Update"; if (rawName === "read") return "Read"; + if (rawName === "view_image" || rawName === "ViewImage") return "View Image"; if (rawName === "bash") return "Bash"; if (rawName === "grep" || rawName === "Grep") return "Search"; if (rawName === "glob" || rawName === "Glob") return "Glob"; @@ -180,6 +181,8 @@ export function isFileReadTool(name: string): boolean { return ( name === "read" || name === "Read" || + name === "view_image" || + name === "ViewImage" || name === "ReadFile" || name === "read_file" || name === "read_file_gemini" || diff --git a/src/tools/descriptions/ViewImage.md b/src/tools/descriptions/ViewImage.md new file mode 100644 index 0000000..21bbec3 --- /dev/null +++ b/src/tools/descriptions/ViewImage.md @@ -0,0 +1,8 @@ +# ViewImage + +Attach a local image file to the conversation context for this turn. + +Usage: +- The `path` parameter must be an absolute path to a local image file +- Supported formats: PNG, JPG, JPEG, GIF, WEBP, BMP +- Large images are automatically resized to fit API limits diff --git a/src/tools/impl/ViewImage.ts b/src/tools/impl/ViewImage.ts new file mode 100644 index 0000000..6ea75cf --- /dev/null +++ b/src/tools/impl/ViewImage.ts @@ -0,0 +1,39 @@ +import * as path from "node:path"; +import { read, type ToolReturnContent } from "./Read"; +import { validateRequiredParams } from "./validation.js"; + +interface ViewImageArgs { + path: string; +} + +const IMAGE_EXTENSIONS = new Set([ + ".png", + ".jpg", + ".jpeg", + ".gif", + ".webp", + ".bmp", +]); + +function isImageFile(filePath: string): boolean { + const ext = path.extname(filePath).toLowerCase(); + return IMAGE_EXTENSIONS.has(ext); +} + +export async function view_image( + args: ViewImageArgs, +): Promise<{ content: ToolReturnContent }> { + validateRequiredParams(args, ["path"], "view_image"); + + const userCwd = process.env.USER_CWD || process.cwd(); + const resolvedPath = path.isAbsolute(args.path) + ? args.path + : path.resolve(userCwd, args.path); + + if (!isImageFile(resolvedPath)) { + throw new Error(`Unsupported image file type: ${resolvedPath}`); + } + + const result = await read({ file_path: resolvedPath }); + return { content: result.content }; +} diff --git a/src/tools/manager.ts b/src/tools/manager.ts index 0c34986..ae72d84 100644 --- a/src/tools/manager.ts +++ b/src/tools/manager.ts @@ -82,6 +82,7 @@ export const OPENAI_DEFAULT_TOOLS: ToolName[] = [ "grep_files", "apply_patch", "update_plan", + "view_image", "Skill", "Task", ]; @@ -112,6 +113,7 @@ export const OPENAI_PASCAL_TOOLS: ToolName[] = [ "ShellCommand", "Shell", "ReadFile", + "view_image", "ListDir", "GrepFiles", "ApplyPatch", @@ -151,6 +153,7 @@ const TOOL_PERMISSIONS: Record = { LS: { requiresApproval: false }, MultiEdit: { requiresApproval: true }, Read: { requiresApproval: false }, + view_image: { requiresApproval: false }, ReadLSP: { requiresApproval: false }, Skill: { requiresApproval: false }, Task: { requiresApproval: true }, diff --git a/src/tools/schemas/ViewImage.json b/src/tools/schemas/ViewImage.json new file mode 100644 index 0000000..ac9be3e --- /dev/null +++ b/src/tools/schemas/ViewImage.json @@ -0,0 +1,12 @@ +{ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "The absolute path to the image file" + } + }, + "required": ["path"], + "additionalProperties": false, + "$schema": "http://json-schema.org/draft-07/schema#" +} diff --git a/src/tools/toolDefinitions.ts b/src/tools/toolDefinitions.ts index 11c2d2e..2df31c3 100644 --- a/src/tools/toolDefinitions.ts +++ b/src/tools/toolDefinitions.ts @@ -29,6 +29,7 @@ import SkillDescription from "./descriptions/Skill.md"; import TaskDescription from "./descriptions/Task.md"; import TodoWriteDescription from "./descriptions/TodoWrite.md"; import UpdatePlanDescription from "./descriptions/UpdatePlan.md"; +import ViewImageDescription from "./descriptions/ViewImage.md"; import WriteDescription from "./descriptions/Write.md"; import WriteFileGeminiDescription from "./descriptions/WriteFileGemini.md"; import WriteTodosGeminiDescription from "./descriptions/WriteTodosGemini.md"; @@ -63,6 +64,7 @@ import { skill } from "./impl/Skill"; import { task } from "./impl/Task"; import { todo_write } from "./impl/TodoWrite"; import { update_plan } from "./impl/UpdatePlan"; +import { view_image } from "./impl/ViewImage"; import { write } from "./impl/Write"; import { write_file_gemini } from "./impl/WriteFileGemini"; import { write_todos } from "./impl/WriteTodosGemini"; @@ -97,6 +99,7 @@ import SkillSchema from "./schemas/Skill.json"; import TaskSchema from "./schemas/Task.json"; import TodoWriteSchema from "./schemas/TodoWrite.json"; import UpdatePlanSchema from "./schemas/UpdatePlan.json"; +import ViewImageSchema from "./schemas/ViewImage.json"; import WriteSchema from "./schemas/Write.json"; import WriteFileGeminiSchema from "./schemas/WriteFileGemini.json"; import WriteTodosGeminiSchema from "./schemas/WriteTodosGemini.json"; @@ -170,6 +173,11 @@ const toolDefinitions = { description: ReadDescription.trim(), impl: read as unknown as ToolImplementation, }, + view_image: { + schema: ViewImageSchema, + description: ViewImageDescription.trim(), + impl: view_image as unknown as ToolImplementation, + }, // LSP-enhanced Read - used when LETTA_ENABLE_LSP is set ReadLSP: { schema: ReadLSPSchema,