feat: add image support to send() (#14)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
221
src/image.test.ts
Normal file
221
src/image.test.ts
Normal file
@@ -0,0 +1,221 @@
|
||||
import { describe, expect, test, mock, beforeAll, afterAll } from "bun:test";
|
||||
import { writeFileSync, unlinkSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { imageFromFile, imageFromBase64, imageFromURL } from "./index.js";
|
||||
|
||||
describe("Image helpers", () => {
|
||||
describe("imageFromFile", () => {
|
||||
test("reads PNG file and returns correct structure", () => {
|
||||
// Create a temp PNG file (1x1 red pixel)
|
||||
const pngData = Buffer.from(
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==",
|
||||
"base64"
|
||||
);
|
||||
const tempPath = join(import.meta.dir, "test-image.png");
|
||||
writeFileSync(tempPath, pngData);
|
||||
|
||||
try {
|
||||
const result = imageFromFile(tempPath);
|
||||
|
||||
expect(result.type).toBe("image");
|
||||
expect(result.source.type).toBe("base64");
|
||||
expect(result.source.media_type).toBe("image/png");
|
||||
expect(typeof result.source.data).toBe("string");
|
||||
expect(result.source.data.length).toBeGreaterThan(0);
|
||||
} finally {
|
||||
unlinkSync(tempPath);
|
||||
}
|
||||
});
|
||||
|
||||
test("detects JPEG from extension", () => {
|
||||
const jpegData = Buffer.from("/9j/4AAQSkZJRg==", "base64");
|
||||
const tempPath = join(import.meta.dir, "test-image.jpg");
|
||||
writeFileSync(tempPath, jpegData);
|
||||
|
||||
try {
|
||||
const result = imageFromFile(tempPath);
|
||||
expect(result.source.media_type).toBe("image/jpeg");
|
||||
} finally {
|
||||
unlinkSync(tempPath);
|
||||
}
|
||||
});
|
||||
|
||||
test("detects GIF from extension", () => {
|
||||
const gifData = Buffer.from("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7", "base64");
|
||||
const tempPath = join(import.meta.dir, "test-image.gif");
|
||||
writeFileSync(tempPath, gifData);
|
||||
|
||||
try {
|
||||
const result = imageFromFile(tempPath);
|
||||
expect(result.source.media_type).toBe("image/gif");
|
||||
} finally {
|
||||
unlinkSync(tempPath);
|
||||
}
|
||||
});
|
||||
|
||||
test("detects WebP from extension", () => {
|
||||
const webpData = Buffer.from("UklGRh4AAABXRUJQVlA4TBEAAAAvAAAAAAfQ//73v/+BiOh/AAA=", "base64");
|
||||
const tempPath = join(import.meta.dir, "test-image.webp");
|
||||
writeFileSync(tempPath, webpData);
|
||||
|
||||
try {
|
||||
const result = imageFromFile(tempPath);
|
||||
expect(result.source.media_type).toBe("image/webp");
|
||||
} finally {
|
||||
unlinkSync(tempPath);
|
||||
}
|
||||
});
|
||||
|
||||
test("defaults to JPEG for unknown extensions", () => {
|
||||
const data = Buffer.from("test data");
|
||||
const tempPath = join(import.meta.dir, "test-image.unknown");
|
||||
writeFileSync(tempPath, data);
|
||||
|
||||
try {
|
||||
const result = imageFromFile(tempPath);
|
||||
expect(result.source.media_type).toBe("image/jpeg");
|
||||
} finally {
|
||||
unlinkSync(tempPath);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("imageFromBase64", () => {
|
||||
test("wraps base64 data with default PNG type", () => {
|
||||
const data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==";
|
||||
|
||||
const result = imageFromBase64(data);
|
||||
|
||||
expect(result.type).toBe("image");
|
||||
expect(result.source.type).toBe("base64");
|
||||
expect(result.source.media_type).toBe("image/png");
|
||||
expect(result.source.data).toBe(data);
|
||||
});
|
||||
|
||||
test("uses specified media type", () => {
|
||||
const data = "somebase64data";
|
||||
|
||||
const result = imageFromBase64(data, "image/jpeg");
|
||||
|
||||
expect(result.source.media_type).toBe("image/jpeg");
|
||||
});
|
||||
|
||||
test("accepts all valid media types", () => {
|
||||
const types = ["image/png", "image/jpeg", "image/gif", "image/webp"] as const;
|
||||
|
||||
for (const mediaType of types) {
|
||||
const result = imageFromBase64("data", mediaType);
|
||||
expect(result.source.media_type).toBe(mediaType);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("imageFromFile edge cases", () => {
|
||||
test("throws on missing file", () => {
|
||||
expect(() => imageFromFile("/nonexistent/path/image.png")).toThrow();
|
||||
});
|
||||
|
||||
test("handles .jpeg extension", () => {
|
||||
const data = Buffer.from("/9j/4AAQSkZJRg==", "base64");
|
||||
const tempPath = join(import.meta.dir, "test-image.jpeg");
|
||||
writeFileSync(tempPath, data);
|
||||
|
||||
try {
|
||||
const result = imageFromFile(tempPath);
|
||||
expect(result.source.media_type).toBe("image/jpeg");
|
||||
} finally {
|
||||
unlinkSync(tempPath);
|
||||
}
|
||||
});
|
||||
|
||||
test("handles uppercase .PNG extension", () => {
|
||||
const pngData = Buffer.from(
|
||||
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg==",
|
||||
"base64"
|
||||
);
|
||||
const tempPath = join(import.meta.dir, "test-IMAGE.PNG");
|
||||
writeFileSync(tempPath, pngData);
|
||||
|
||||
try {
|
||||
const result = imageFromFile(tempPath);
|
||||
expect(result.source.media_type).toBe("image/png");
|
||||
} finally {
|
||||
unlinkSync(tempPath);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("imageFromURL", () => {
|
||||
// Mock fetch for URL tests
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
afterAll(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
});
|
||||
|
||||
test("fetches image and converts to base64", async () => {
|
||||
const mockImageData = Buffer.from("fake image data");
|
||||
|
||||
globalThis.fetch = mock(async () => ({
|
||||
arrayBuffer: async () => mockImageData.buffer,
|
||||
headers: new Headers({ "content-type": "image/png" }),
|
||||
})) as any;
|
||||
|
||||
const result = await imageFromURL("https://example.com/image.png");
|
||||
|
||||
expect(result.type).toBe("image");
|
||||
expect(result.source.type).toBe("base64");
|
||||
expect(result.source.data).toBe(mockImageData.toString("base64"));
|
||||
});
|
||||
|
||||
test("detects PNG from content-type header", async () => {
|
||||
globalThis.fetch = mock(async () => ({
|
||||
arrayBuffer: async () => new ArrayBuffer(0),
|
||||
headers: new Headers({ "content-type": "image/png" }),
|
||||
})) as any;
|
||||
|
||||
const result = await imageFromURL("https://example.com/file");
|
||||
expect(result.source.media_type).toBe("image/png");
|
||||
});
|
||||
|
||||
test("detects JPEG from content-type header", async () => {
|
||||
globalThis.fetch = mock(async () => ({
|
||||
arrayBuffer: async () => new ArrayBuffer(0),
|
||||
headers: new Headers({ "content-type": "image/jpeg" }),
|
||||
})) as any;
|
||||
|
||||
const result = await imageFromURL("https://example.com/file");
|
||||
expect(result.source.media_type).toBe("image/jpeg");
|
||||
});
|
||||
|
||||
test("detects GIF from URL extension when no content-type", async () => {
|
||||
globalThis.fetch = mock(async () => ({
|
||||
arrayBuffer: async () => new ArrayBuffer(0),
|
||||
headers: new Headers(),
|
||||
})) as any;
|
||||
|
||||
const result = await imageFromURL("https://example.com/animation.gif");
|
||||
expect(result.source.media_type).toBe("image/gif");
|
||||
});
|
||||
|
||||
test("detects WebP from URL extension", async () => {
|
||||
globalThis.fetch = mock(async () => ({
|
||||
arrayBuffer: async () => new ArrayBuffer(0),
|
||||
headers: new Headers(),
|
||||
})) as any;
|
||||
|
||||
const result = await imageFromURL("https://example.com/photo.webp");
|
||||
expect(result.source.media_type).toBe("image/webp");
|
||||
});
|
||||
|
||||
test("defaults to PNG when no type info available", async () => {
|
||||
globalThis.fetch = mock(async () => ({
|
||||
arrayBuffer: async () => new ArrayBuffer(0),
|
||||
headers: new Headers(),
|
||||
})) as any;
|
||||
|
||||
const result = await imageFromURL("https://example.com/unknown");
|
||||
expect(result.source.media_type).toBe("image/png");
|
||||
});
|
||||
});
|
||||
});
|
||||
96
src/index.ts
96
src/index.ts
@@ -47,6 +47,11 @@ export type {
|
||||
CanUseToolResponse,
|
||||
CanUseToolResponseAllow,
|
||||
CanUseToolResponseDeny,
|
||||
// Multimodal content types
|
||||
TextContent,
|
||||
ImageContent,
|
||||
MessageContentItem,
|
||||
SendMessage,
|
||||
} from "./types.js";
|
||||
|
||||
export { Session } from "./session.js";
|
||||
@@ -169,3 +174,94 @@ export async function prompt(
|
||||
session.close();
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// IMAGE HELPERS
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
import { readFileSync } from "node:fs";
|
||||
import type { ImageContent } from "./types.js";
|
||||
|
||||
/**
|
||||
* Create image content from a file path.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* await session.send([
|
||||
* { type: "text", text: "What's in this image?" },
|
||||
* imageFromFile("./screenshot.png")
|
||||
* ]);
|
||||
* ```
|
||||
*/
|
||||
export function imageFromFile(filePath: string): ImageContent {
|
||||
const data = readFileSync(filePath).toString("base64");
|
||||
const ext = filePath.toLowerCase();
|
||||
const media_type: ImageContent["source"]["media_type"] =
|
||||
ext.endsWith(".png") ? "image/png"
|
||||
: ext.endsWith(".gif") ? "image/gif"
|
||||
: ext.endsWith(".webp") ? "image/webp"
|
||||
: "image/jpeg";
|
||||
|
||||
return {
|
||||
type: "image",
|
||||
source: { type: "base64", media_type, data }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create image content from base64 data.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const base64 = fs.readFileSync("image.png").toString("base64");
|
||||
* await session.send([
|
||||
* { type: "text", text: "Describe this" },
|
||||
* imageFromBase64(base64, "image/png")
|
||||
* ]);
|
||||
* ```
|
||||
*/
|
||||
export function imageFromBase64(
|
||||
data: string,
|
||||
media_type: ImageContent["source"]["media_type"] = "image/png"
|
||||
): ImageContent {
|
||||
return {
|
||||
type: "image",
|
||||
source: { type: "base64", media_type, data }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create image content from a URL.
|
||||
* Fetches the image and converts to base64.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const img = await imageFromURL("https://example.com/image.png");
|
||||
* await session.send([
|
||||
* { type: "text", text: "What's this?" },
|
||||
* img
|
||||
* ]);
|
||||
* ```
|
||||
*/
|
||||
export async function imageFromURL(url: string): Promise<ImageContent> {
|
||||
const response = await fetch(url);
|
||||
const buffer = await response.arrayBuffer();
|
||||
const data = Buffer.from(buffer).toString("base64");
|
||||
|
||||
// Detect media type from content-type header or URL
|
||||
const contentType = response.headers.get("content-type");
|
||||
let media_type: ImageContent["source"]["media_type"] = "image/png";
|
||||
|
||||
if (contentType?.includes("jpeg") || contentType?.includes("jpg") || url.match(/\.jpe?g$/i)) {
|
||||
media_type = "image/jpeg";
|
||||
} else if (contentType?.includes("gif") || url.endsWith(".gif")) {
|
||||
media_type = "image/gif";
|
||||
} else if (contentType?.includes("webp") || url.endsWith(".webp")) {
|
||||
media_type = "image/webp";
|
||||
}
|
||||
|
||||
return {
|
||||
type: "image",
|
||||
source: { type: "base64", media_type, data }
|
||||
};
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ import type {
|
||||
CanUseToolResponse,
|
||||
CanUseToolResponseAllow,
|
||||
CanUseToolResponseDeny,
|
||||
SendMessage,
|
||||
} from "./types.js";
|
||||
import { validateSessionOptions } from "./validation.js";
|
||||
|
||||
@@ -84,8 +85,21 @@ export class Session implements AsyncDisposable {
|
||||
|
||||
/**
|
||||
* Send a message to the agent
|
||||
*
|
||||
* @param message - Text string or multimodal content array
|
||||
*
|
||||
* @example
|
||||
* // Simple text
|
||||
* await session.send("Hello!");
|
||||
*
|
||||
* @example
|
||||
* // With image
|
||||
* await session.send([
|
||||
* { type: "text", text: "What's in this image?" },
|
||||
* { type: "image", source: { type: "base64", mediaType: "image/png", data: "..." } }
|
||||
* ]);
|
||||
*/
|
||||
async send(message: string): Promise<void> {
|
||||
async send(message: SendMessage): Promise<void> {
|
||||
if (!this.initialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
34
src/types.ts
34
src/types.ts
@@ -27,6 +27,40 @@ export type {
|
||||
// Import types for use in this file
|
||||
import type { CreateBlock, CanUseToolResponse } from "@letta-ai/letta-code/protocol";
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// MESSAGE CONTENT TYPES (for multimodal support)
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* Text content in a message
|
||||
*/
|
||||
export interface TextContent {
|
||||
type: "text";
|
||||
text: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Image content in a message (base64 encoded)
|
||||
*/
|
||||
export interface ImageContent {
|
||||
type: "image";
|
||||
source: {
|
||||
type: "base64";
|
||||
media_type: "image/png" | "image/jpeg" | "image/gif" | "image/webp";
|
||||
data: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* A single content item (text or image)
|
||||
*/
|
||||
export type MessageContentItem = TextContent | ImageContent;
|
||||
|
||||
/**
|
||||
* What send() accepts - either a simple string or multimodal content array
|
||||
*/
|
||||
export type SendMessage = string | MessageContentItem[];
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// SYSTEM PROMPT TYPES
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
Reference in New Issue
Block a user