feat: add image support to send() (#14)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Cameron
2026-02-02 17:59:40 -08:00
committed by GitHub
parent de14506f49
commit fa4a6340e7
4 changed files with 366 additions and 1 deletions

221
src/image.test.ts Normal file
View File

@@ -0,0 +1,221 @@
import { describe, expect, test, mock, beforeAll, afterAll } from "bun:test";
import { writeFileSync, unlinkSync } from "node:fs";
import { join } from "node:path";
import { imageFromFile, imageFromBase64, imageFromURL } from "./index.js";
describe("Image helpers", () => {
describe("imageFromFile", () => {
test("reads PNG file and returns correct structure", () => {
// Create a temp PNG file (1x1 red pixel)
const pngData = Buffer.from(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==",
"base64"
);
const tempPath = join(import.meta.dir, "test-image.png");
writeFileSync(tempPath, pngData);
try {
const result = imageFromFile(tempPath);
expect(result.type).toBe("image");
expect(result.source.type).toBe("base64");
expect(result.source.media_type).toBe("image/png");
expect(typeof result.source.data).toBe("string");
expect(result.source.data.length).toBeGreaterThan(0);
} finally {
unlinkSync(tempPath);
}
});
test("detects JPEG from extension", () => {
const jpegData = Buffer.from("/9j/4AAQSkZJRg==", "base64");
const tempPath = join(import.meta.dir, "test-image.jpg");
writeFileSync(tempPath, jpegData);
try {
const result = imageFromFile(tempPath);
expect(result.source.media_type).toBe("image/jpeg");
} finally {
unlinkSync(tempPath);
}
});
test("detects GIF from extension", () => {
const gifData = Buffer.from("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7", "base64");
const tempPath = join(import.meta.dir, "test-image.gif");
writeFileSync(tempPath, gifData);
try {
const result = imageFromFile(tempPath);
expect(result.source.media_type).toBe("image/gif");
} finally {
unlinkSync(tempPath);
}
});
test("detects WebP from extension", () => {
const webpData = Buffer.from("UklGRh4AAABXRUJQVlA4TBEAAAAvAAAAAAfQ//73v/+BiOh/AAA=", "base64");
const tempPath = join(import.meta.dir, "test-image.webp");
writeFileSync(tempPath, webpData);
try {
const result = imageFromFile(tempPath);
expect(result.source.media_type).toBe("image/webp");
} finally {
unlinkSync(tempPath);
}
});
test("defaults to JPEG for unknown extensions", () => {
const data = Buffer.from("test data");
const tempPath = join(import.meta.dir, "test-image.unknown");
writeFileSync(tempPath, data);
try {
const result = imageFromFile(tempPath);
expect(result.source.media_type).toBe("image/jpeg");
} finally {
unlinkSync(tempPath);
}
});
});
describe("imageFromBase64", () => {
test("wraps base64 data with default PNG type", () => {
const data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==";
const result = imageFromBase64(data);
expect(result.type).toBe("image");
expect(result.source.type).toBe("base64");
expect(result.source.media_type).toBe("image/png");
expect(result.source.data).toBe(data);
});
test("uses specified media type", () => {
const data = "somebase64data";
const result = imageFromBase64(data, "image/jpeg");
expect(result.source.media_type).toBe("image/jpeg");
});
test("accepts all valid media types", () => {
const types = ["image/png", "image/jpeg", "image/gif", "image/webp"] as const;
for (const mediaType of types) {
const result = imageFromBase64("data", mediaType);
expect(result.source.media_type).toBe(mediaType);
}
});
});
describe("imageFromFile edge cases", () => {
test("throws on missing file", () => {
expect(() => imageFromFile("/nonexistent/path/image.png")).toThrow();
});
test("handles .jpeg extension", () => {
const data = Buffer.from("/9j/4AAQSkZJRg==", "base64");
const tempPath = join(import.meta.dir, "test-image.jpeg");
writeFileSync(tempPath, data);
try {
const result = imageFromFile(tempPath);
expect(result.source.media_type).toBe("image/jpeg");
} finally {
unlinkSync(tempPath);
}
});
test("handles uppercase .PNG extension", () => {
const pngData = Buffer.from(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==",
"base64"
);
const tempPath = join(import.meta.dir, "test-IMAGE.PNG");
writeFileSync(tempPath, pngData);
try {
const result = imageFromFile(tempPath);
expect(result.source.media_type).toBe("image/png");
} finally {
unlinkSync(tempPath);
}
});
});
describe("imageFromURL", () => {
// Mock fetch for URL tests
const originalFetch = globalThis.fetch;
afterAll(() => {
globalThis.fetch = originalFetch;
});
test("fetches image and converts to base64", async () => {
const mockImageData = Buffer.from("fake image data");
globalThis.fetch = mock(async () => ({
arrayBuffer: async () => mockImageData.buffer,
headers: new Headers({ "content-type": "image/png" }),
})) as any;
const result = await imageFromURL("https://example.com/image.png");
expect(result.type).toBe("image");
expect(result.source.type).toBe("base64");
expect(result.source.data).toBe(mockImageData.toString("base64"));
});
test("detects PNG from content-type header", async () => {
globalThis.fetch = mock(async () => ({
arrayBuffer: async () => new ArrayBuffer(0),
headers: new Headers({ "content-type": "image/png" }),
})) as any;
const result = await imageFromURL("https://example.com/file");
expect(result.source.media_type).toBe("image/png");
});
test("detects JPEG from content-type header", async () => {
globalThis.fetch = mock(async () => ({
arrayBuffer: async () => new ArrayBuffer(0),
headers: new Headers({ "content-type": "image/jpeg" }),
})) as any;
const result = await imageFromURL("https://example.com/file");
expect(result.source.media_type).toBe("image/jpeg");
});
test("detects GIF from URL extension when no content-type", async () => {
globalThis.fetch = mock(async () => ({
arrayBuffer: async () => new ArrayBuffer(0),
headers: new Headers(),
})) as any;
const result = await imageFromURL("https://example.com/animation.gif");
expect(result.source.media_type).toBe("image/gif");
});
test("detects WebP from URL extension", async () => {
globalThis.fetch = mock(async () => ({
arrayBuffer: async () => new ArrayBuffer(0),
headers: new Headers(),
})) as any;
const result = await imageFromURL("https://example.com/photo.webp");
expect(result.source.media_type).toBe("image/webp");
});
test("defaults to PNG when no type info available", async () => {
globalThis.fetch = mock(async () => ({
arrayBuffer: async () => new ArrayBuffer(0),
headers: new Headers(),
})) as any;
const result = await imageFromURL("https://example.com/unknown");
expect(result.source.media_type).toBe("image/png");
});
});
});

View File

@@ -47,6 +47,11 @@ export type {
CanUseToolResponse,
CanUseToolResponseAllow,
CanUseToolResponseDeny,
// Multimodal content types
TextContent,
ImageContent,
MessageContentItem,
SendMessage,
} from "./types.js";
export { Session } from "./session.js";
@@ -169,3 +174,94 @@ export async function prompt(
session.close();
}
}
// ═══════════════════════════════════════════════════════════════
// IMAGE HELPERS
// ═══════════════════════════════════════════════════════════════
import { readFileSync } from "node:fs";
import type { ImageContent } from "./types.js";
/**
* Create image content from a file path.
*
* @example
* ```typescript
* await session.send([
* { type: "text", text: "What's in this image?" },
* imageFromFile("./screenshot.png")
* ]);
* ```
*/
export function imageFromFile(filePath: string): ImageContent {
const data = readFileSync(filePath).toString("base64");
const ext = filePath.toLowerCase();
const media_type: ImageContent["source"]["media_type"] =
ext.endsWith(".png") ? "image/png"
: ext.endsWith(".gif") ? "image/gif"
: ext.endsWith(".webp") ? "image/webp"
: "image/jpeg";
return {
type: "image",
source: { type: "base64", media_type, data }
};
}
/**
* Create image content from base64 data.
*
* @example
* ```typescript
* const base64 = fs.readFileSync("image.png").toString("base64");
* await session.send([
* { type: "text", text: "Describe this" },
* imageFromBase64(base64, "image/png")
* ]);
* ```
*/
export function imageFromBase64(
data: string,
media_type: ImageContent["source"]["media_type"] = "image/png"
): ImageContent {
return {
type: "image",
source: { type: "base64", media_type, data }
};
}
/**
* Create image content from a URL.
* Fetches the image and converts to base64.
*
* @example
* ```typescript
* const img = await imageFromURL("https://example.com/image.png");
* await session.send([
* { type: "text", text: "What's this?" },
* img
* ]);
* ```
*/
export async function imageFromURL(url: string): Promise<ImageContent> {
const response = await fetch(url);
const buffer = await response.arrayBuffer();
const data = Buffer.from(buffer).toString("base64");
// Detect media type from content-type header or URL
const contentType = response.headers.get("content-type");
let media_type: ImageContent["source"]["media_type"] = "image/png";
if (contentType?.includes("jpeg") || contentType?.includes("jpg") || url.match(/\.jpe?g$/i)) {
media_type = "image/jpeg";
} else if (contentType?.includes("gif") || url.endsWith(".gif")) {
media_type = "image/gif";
} else if (contentType?.includes("webp") || url.endsWith(".webp")) {
media_type = "image/webp";
}
return {
type: "image",
source: { type: "base64", media_type, data }
};
}

View File

@@ -18,6 +18,7 @@ import type {
CanUseToolResponse,
CanUseToolResponseAllow,
CanUseToolResponseDeny,
SendMessage,
} from "./types.js";
import { validateSessionOptions } from "./validation.js";
@@ -84,8 +85,21 @@ export class Session implements AsyncDisposable {
/**
* Send a message to the agent
*
* @param message - Text string or multimodal content array
*
* @example
* // Simple text
* await session.send("Hello!");
*
* @example
* // With image
* await session.send([
* { type: "text", text: "What's in this image?" },
* { type: "image", source: { type: "base64", mediaType: "image/png", data: "..." } }
* ]);
*/
async send(message: string): Promise<void> {
async send(message: SendMessage): Promise<void> {
if (!this.initialized) {
await this.initialize();
}

View File

@@ -27,6 +27,40 @@ export type {
// Import types for use in this file
import type { CreateBlock, CanUseToolResponse } from "@letta-ai/letta-code/protocol";
// ═══════════════════════════════════════════════════════════════
// MESSAGE CONTENT TYPES (for multimodal support)
// ═══════════════════════════════════════════════════════════════
/**
* Text content in a message
*/
export interface TextContent {
type: "text";
text: string;
}
/**
* Image content in a message (base64 encoded)
*/
export interface ImageContent {
type: "image";
source: {
type: "base64";
media_type: "image/png" | "image/jpeg" | "image/gif" | "image/webp";
data: string;
};
}
/**
* A single content item (text or image)
*/
export type MessageContentItem = TextContent | ImageContent;
/**
* What send() accepts - either a simple string or multimodal content array
*/
export type SendMessage = string | MessageContentItem[];
// ═══════════════════════════════════════════════════════════════
// SYSTEM PROMPT TYPES
// ═══════════════════════════════════════════════════════════════