diff --git a/src/image.test.ts b/src/image.test.ts new file mode 100644 index 0000000..d0b3feb --- /dev/null +++ b/src/image.test.ts @@ -0,0 +1,221 @@ +import { describe, expect, test, mock, beforeAll, afterAll } from "bun:test"; +import { writeFileSync, unlinkSync } from "node:fs"; +import { join } from "node:path"; +import { imageFromFile, imageFromBase64, imageFromURL } from "./index.js"; + +describe("Image helpers", () => { + describe("imageFromFile", () => { + test("reads PNG file and returns correct structure", () => { + // Create a temp PNG file (1x1 red pixel) + const pngData = Buffer.from( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==", + "base64" + ); + const tempPath = join(import.meta.dir, "test-image.png"); + writeFileSync(tempPath, pngData); + + try { + const result = imageFromFile(tempPath); + + expect(result.type).toBe("image"); + expect(result.source.type).toBe("base64"); + expect(result.source.media_type).toBe("image/png"); + expect(typeof result.source.data).toBe("string"); + expect(result.source.data.length).toBeGreaterThan(0); + } finally { + unlinkSync(tempPath); + } + }); + + test("detects JPEG from extension", () => { + const jpegData = Buffer.from("/9j/4AAQSkZJRg==", "base64"); + const tempPath = join(import.meta.dir, "test-image.jpg"); + writeFileSync(tempPath, jpegData); + + try { + const result = imageFromFile(tempPath); + expect(result.source.media_type).toBe("image/jpeg"); + } finally { + unlinkSync(tempPath); + } + }); + + test("detects GIF from extension", () => { + const gifData = Buffer.from("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7", "base64"); + const tempPath = join(import.meta.dir, "test-image.gif"); + writeFileSync(tempPath, gifData); + + try { + const result = imageFromFile(tempPath); + expect(result.source.media_type).toBe("image/gif"); + } finally { + unlinkSync(tempPath); + } + }); + + test("detects WebP from extension", () => { + const webpData = Buffer.from("UklGRh4AAABXRUJQVlA4TBEAAAAvAAAAAAfQ//73v/+BiOh/AAA=", "base64"); + const tempPath = join(import.meta.dir, "test-image.webp"); + writeFileSync(tempPath, webpData); + + try { + const result = imageFromFile(tempPath); + expect(result.source.media_type).toBe("image/webp"); + } finally { + unlinkSync(tempPath); + } + }); + + test("defaults to JPEG for unknown extensions", () => { + const data = Buffer.from("test data"); + const tempPath = join(import.meta.dir, "test-image.unknown"); + writeFileSync(tempPath, data); + + try { + const result = imageFromFile(tempPath); + expect(result.source.media_type).toBe("image/jpeg"); + } finally { + unlinkSync(tempPath); + } + }); + }); + + describe("imageFromBase64", () => { + test("wraps base64 data with default PNG type", () => { + const data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="; + + const result = imageFromBase64(data); + + expect(result.type).toBe("image"); + expect(result.source.type).toBe("base64"); + expect(result.source.media_type).toBe("image/png"); + expect(result.source.data).toBe(data); + }); + + test("uses specified media type", () => { + const data = "somebase64data"; + + const result = imageFromBase64(data, "image/jpeg"); + + expect(result.source.media_type).toBe("image/jpeg"); + }); + + test("accepts all valid media types", () => { + const types = ["image/png", "image/jpeg", "image/gif", "image/webp"] as const; + + for (const mediaType of types) { + const result = imageFromBase64("data", mediaType); + expect(result.source.media_type).toBe(mediaType); + } + }); + }); + + describe("imageFromFile edge cases", () => { + test("throws on missing file", () => { + expect(() => imageFromFile("/nonexistent/path/image.png")).toThrow(); + }); + + test("handles .jpeg extension", () => { + const data = Buffer.from("/9j/4AAQSkZJRg==", "base64"); + const tempPath = join(import.meta.dir, "test-image.jpeg"); + writeFileSync(tempPath, data); + + try { + const result = imageFromFile(tempPath); + expect(result.source.media_type).toBe("image/jpeg"); + } finally { + unlinkSync(tempPath); + } + }); + + test("handles uppercase .PNG extension", () => { + const pngData = Buffer.from( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==", + "base64" + ); + const tempPath = join(import.meta.dir, "test-IMAGE.PNG"); + writeFileSync(tempPath, pngData); + + try { + const result = imageFromFile(tempPath); + expect(result.source.media_type).toBe("image/png"); + } finally { + unlinkSync(tempPath); + } + }); + }); + + describe("imageFromURL", () => { + // Mock fetch for URL tests + const originalFetch = globalThis.fetch; + + afterAll(() => { + globalThis.fetch = originalFetch; + }); + + test("fetches image and converts to base64", async () => { + const mockImageData = Buffer.from("fake image data"); + + globalThis.fetch = mock(async () => ({ + arrayBuffer: async () => mockImageData.buffer, + headers: new Headers({ "content-type": "image/png" }), + })) as any; + + const result = await imageFromURL("https://example.com/image.png"); + + expect(result.type).toBe("image"); + expect(result.source.type).toBe("base64"); + expect(result.source.data).toBe(mockImageData.toString("base64")); + }); + + test("detects PNG from content-type header", async () => { + globalThis.fetch = mock(async () => ({ + arrayBuffer: async () => new ArrayBuffer(0), + headers: new Headers({ "content-type": "image/png" }), + })) as any; + + const result = await imageFromURL("https://example.com/file"); + expect(result.source.media_type).toBe("image/png"); + }); + + test("detects JPEG from content-type header", async () => { + globalThis.fetch = mock(async () => ({ + arrayBuffer: async () => new ArrayBuffer(0), + headers: new Headers({ "content-type": "image/jpeg" }), + })) as any; + + const result = await imageFromURL("https://example.com/file"); + expect(result.source.media_type).toBe("image/jpeg"); + }); + + test("detects GIF from URL extension when no content-type", async () => { + globalThis.fetch = mock(async () => ({ + arrayBuffer: async () => new ArrayBuffer(0), + headers: new Headers(), + })) as any; + + const result = await imageFromURL("https://example.com/animation.gif"); + expect(result.source.media_type).toBe("image/gif"); + }); + + test("detects WebP from URL extension", async () => { + globalThis.fetch = mock(async () => ({ + arrayBuffer: async () => new ArrayBuffer(0), + headers: new Headers(), + })) as any; + + const result = await imageFromURL("https://example.com/photo.webp"); + expect(result.source.media_type).toBe("image/webp"); + }); + + test("defaults to PNG when no type info available", async () => { + globalThis.fetch = mock(async () => ({ + arrayBuffer: async () => new ArrayBuffer(0), + headers: new Headers(), + })) as any; + + const result = await imageFromURL("https://example.com/unknown"); + expect(result.source.media_type).toBe("image/png"); + }); + }); +}); diff --git a/src/index.ts b/src/index.ts index 2b76118..c72f0b1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -47,6 +47,11 @@ export type { CanUseToolResponse, CanUseToolResponseAllow, CanUseToolResponseDeny, + // Multimodal content types + TextContent, + ImageContent, + MessageContentItem, + SendMessage, } from "./types.js"; export { Session } from "./session.js"; @@ -169,3 +174,94 @@ export async function prompt( session.close(); } } + +// ═══════════════════════════════════════════════════════════════ +// IMAGE HELPERS +// ═══════════════════════════════════════════════════════════════ + +import { readFileSync } from "node:fs"; +import type { ImageContent } from "./types.js"; + +/** + * Create image content from a file path. + * + * @example + * ```typescript + * await session.send([ + * { type: "text", text: "What's in this image?" }, + * imageFromFile("./screenshot.png") + * ]); + * ``` + */ +export function imageFromFile(filePath: string): ImageContent { + const data = readFileSync(filePath).toString("base64"); + const ext = filePath.toLowerCase(); + const media_type: ImageContent["source"]["media_type"] = + ext.endsWith(".png") ? "image/png" + : ext.endsWith(".gif") ? "image/gif" + : ext.endsWith(".webp") ? "image/webp" + : "image/jpeg"; + + return { + type: "image", + source: { type: "base64", media_type, data } + }; +} + +/** + * Create image content from base64 data. + * + * @example + * ```typescript + * const base64 = fs.readFileSync("image.png").toString("base64"); + * await session.send([ + * { type: "text", text: "Describe this" }, + * imageFromBase64(base64, "image/png") + * ]); + * ``` + */ +export function imageFromBase64( + data: string, + media_type: ImageContent["source"]["media_type"] = "image/png" +): ImageContent { + return { + type: "image", + source: { type: "base64", media_type, data } + }; +} + +/** + * Create image content from a URL. + * Fetches the image and converts to base64. + * + * @example + * ```typescript + * const img = await imageFromURL("https://example.com/image.png"); + * await session.send([ + * { type: "text", text: "What's this?" }, + * img + * ]); + * ``` + */ +export async function imageFromURL(url: string): Promise { + const response = await fetch(url); + const buffer = await response.arrayBuffer(); + const data = Buffer.from(buffer).toString("base64"); + + // Detect media type from content-type header or URL + const contentType = response.headers.get("content-type"); + let media_type: ImageContent["source"]["media_type"] = "image/png"; + + if (contentType?.includes("jpeg") || contentType?.includes("jpg") || url.match(/\.jpe?g$/i)) { + media_type = "image/jpeg"; + } else if (contentType?.includes("gif") || url.endsWith(".gif")) { + media_type = "image/gif"; + } else if (contentType?.includes("webp") || url.endsWith(".webp")) { + media_type = "image/webp"; + } + + return { + type: "image", + source: { type: "base64", media_type, data } + }; +} diff --git a/src/session.ts b/src/session.ts index 5aceb3f..5faf2ce 100644 --- a/src/session.ts +++ b/src/session.ts @@ -18,6 +18,7 @@ import type { CanUseToolResponse, CanUseToolResponseAllow, CanUseToolResponseDeny, + SendMessage, } from "./types.js"; import { validateSessionOptions } from "./validation.js"; @@ -84,8 +85,21 @@ export class Session implements AsyncDisposable { /** * Send a message to the agent + * + * @param message - Text string or multimodal content array + * + * @example + * // Simple text + * await session.send("Hello!"); + * + * @example + * // With image + * await session.send([ + * { type: "text", text: "What's in this image?" }, + * { type: "image", source: { type: "base64", mediaType: "image/png", data: "..." } } + * ]); */ - async send(message: string): Promise { + async send(message: SendMessage): Promise { if (!this.initialized) { await this.initialize(); } diff --git a/src/types.ts b/src/types.ts index 897123c..1ce3141 100644 --- a/src/types.ts +++ b/src/types.ts @@ -27,6 +27,40 @@ export type { // Import types for use in this file import type { CreateBlock, CanUseToolResponse } from "@letta-ai/letta-code/protocol"; +// ═══════════════════════════════════════════════════════════════ +// MESSAGE CONTENT TYPES (for multimodal support) +// ═══════════════════════════════════════════════════════════════ + +/** + * Text content in a message + */ +export interface TextContent { + type: "text"; + text: string; +} + +/** + * Image content in a message (base64 encoded) + */ +export interface ImageContent { + type: "image"; + source: { + type: "base64"; + media_type: "image/png" | "image/jpeg" | "image/gif" | "image/webp"; + data: string; + }; +} + +/** + * A single content item (text or image) + */ +export type MessageContentItem = TextContent | ImageContent; + +/** + * What send() accepts - either a simple string or multimodal content array + */ +export type SendMessage = string | MessageContentItem[]; + // ═══════════════════════════════════════════════════════════════ // SYSTEM PROMPT TYPES // ═══════════════════════════════════════════════════════════════