From 5f7cdd347124c6d1522a3e7f1c331bdecca2a7a3 Mon Sep 17 00:00:00 2001 From: Cameron Date: Mon, 9 Feb 2026 15:53:10 -0800 Subject: [PATCH] feat: XML response directives via wrapper block (#239) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agents can now include an block at the start of their text response to perform actions without tool calls. The block is stripped before the message is delivered to the user. Example: Great idea! → Sends "Great idea!", reacts with thumbsup - New directives parser (src/core/directives.ts) finds block at response start, parses self-closing child directives inside it - addReaction() added to ChannelAdapter interface (Telegram, Slack, WhatsApp already implement it) - Streaming holdback covers the full block duration (prefix check + incomplete block detection), preventing raw XML from flashing - Directive execution extracted to executeDirectives() helper (no duplication between finalizeMessage and final send paths) - Message envelope includes Response Directives section so all agents learn the feature regardless of system prompt - System prompt documents the block syntax - 19 unit tests for parser and stripping Significantly cheaper than the Bash tool call approach (lettabot-react) since no tool_call round trip is needed. Relates to #19, #39, #240. Subsumes #210. Written by Cameron ◯ Letta Code "The best code is no code at all." - Jeff Atwood --- src/channels/types.ts | 1 + src/channels/whatsapp/index.ts | 1 + src/core/bot.ts | 64 +++++++++++++++-- src/core/directives.test.ts | 121 +++++++++++++++++++++++++++++++++ src/core/directives.ts | 113 ++++++++++++++++++++++++++++++ src/core/formatter.test.ts | 10 ++- src/core/formatter.ts | 10 ++- src/core/system-prompt.ts | 30 ++++++++ 8 files changed, 342 insertions(+), 8 deletions(-) create mode 100644 src/core/directives.test.ts create mode 100644 src/core/directives.ts diff --git a/src/channels/types.ts b/src/channels/types.ts index 899a695..c75fcc3 100644 --- a/src/channels/types.ts +++ b/src/channels/types.ts @@ -26,6 +26,7 @@ export interface ChannelAdapter { // Capabilities (optional) supportsEditing?(): boolean; sendFile?(file: OutboundFile): Promise<{ messageId: string }>; + addReaction?(chatId: string, messageId: string, emoji: string): Promise; getDmPolicy?(): string; // Event handlers (set by bot core) diff --git a/src/channels/whatsapp/index.ts b/src/channels/whatsapp/index.ts index 42953fb..4c37a85 100644 --- a/src/channels/whatsapp/index.ts +++ b/src/channels/whatsapp/index.ts @@ -995,6 +995,7 @@ export class WhatsAppAdapter implements ChannelAdapter { async addReaction(_chatId: string, _messageId: string, _emoji: string): Promise { // WhatsApp reactions via Baileys are not supported here yet + console.warn('[WhatsApp] addReaction not implemented -- directive skipped'); } async sendFile(file: OutboundFile): Promise<{ messageId: string }> { diff --git a/src/core/bot.ts b/src/core/bot.ts index 2170bbe..d399099 100644 --- a/src/core/bot.ts +++ b/src/core/bot.ts @@ -16,6 +16,7 @@ import { formatMessageEnvelope, formatGroupBatchEnvelope, type SessionContextOpt import type { GroupBatcher } from './group-batcher.js'; import { loadMemoryBlocks } from './memory.js'; import { SYSTEM_PROMPT } from './system-prompt.js'; +import { parseDirectives, stripActionsBlock, type Directive } from './directives.js'; /** @@ -151,6 +152,38 @@ export class LettaBot implements AgentSession { // Session lifecycle helpers // ========================================================================= + /** + * Execute parsed directives (reactions, etc.) via the channel adapter. + * Returns true if any directive was successfully executed. + */ + private async executeDirectives( + directives: Directive[], + adapter: ChannelAdapter, + chatId: string, + fallbackMessageId?: string, + ): Promise { + let acted = false; + for (const directive of directives) { + if (directive.type === 'react') { + const targetId = directive.messageId || fallbackMessageId; + if (!adapter.addReaction) { + console.warn(`[Bot] Directive react skipped: ${adapter.name} does not support addReaction`); + continue; + } + if (targetId) { + try { + await adapter.addReaction(chatId, targetId, directive.emoji); + acted = true; + console.log(`[Bot] Directive: reacted with ${directive.emoji}`); + } catch (err) { + console.warn('[Bot] Directive react failed:', err instanceof Error ? err.message : err); + } + } + } + } + return acted; + } + /** * Create or resume a session with automatic fallback. * @@ -563,6 +596,14 @@ export class LettaBot implements AgentSession { lastUpdate = Date.now(); return; } + // Parse and execute XML directives before sending + if (response.trim()) { + const { cleanText, directives } = parseDirectives(response); + response = cleanText; + if (await this.executeDirectives(directives, adapter, msg.chatId, msg.messageId)) { + sentAnyMessage = true; + } + } if (response.trim()) { try { if (messageId) { @@ -628,14 +669,20 @@ export class LettaBot implements AgentSession { response += streamMsg.content || ''; // Live-edit streaming for channels that support it + // Hold back streaming edits while response could still be or block const canEdit = adapter.supportsEditing?.() ?? true; - const mayBeNoReply = ''.startsWith(response.trim()); - if (canEdit && !mayBeNoReply && Date.now() - lastUpdate > 500 && response.length > 0) { + const trimmed = response.trim(); + const mayBeHidden = ''.startsWith(trimmed) + || ''.startsWith(trimmed) + || (trimmed.startsWith('')); + // Strip any completed block from the streaming text + const streamText = stripActionsBlock(response).trim(); + if (canEdit && !mayBeHidden && streamText.length > 0 && Date.now() - lastUpdate > 500) { try { if (messageId) { - await adapter.editMessage(msg.chatId, messageId, response); + await adapter.editMessage(msg.chatId, messageId, streamText); } else { - const result = await adapter.sendMessage({ chatId: msg.chatId, text: response, threadId: msg.threadId }); + const result = await adapter.sendMessage({ chatId: msg.chatId, text: streamText, threadId: msg.threadId }); messageId = result.messageId; sentAnyMessage = true; } @@ -686,6 +733,15 @@ export class LettaBot implements AgentSession { response = ''; } + // Parse and execute XML directives (e.g. ) + if (response.trim()) { + const { cleanText, directives } = parseDirectives(response); + response = cleanText; + if (await this.executeDirectives(directives, adapter, msg.chatId, msg.messageId)) { + sentAnyMessage = true; + } + } + // Detect unsupported multimodal if (Array.isArray(messageToSend) && response.includes('[Image omitted]')) { console.warn('[Bot] Model does not support images -- consider a vision-capable model or features.inlineImages: false'); diff --git a/src/core/directives.test.ts b/src/core/directives.test.ts new file mode 100644 index 0000000..1614033 --- /dev/null +++ b/src/core/directives.test.ts @@ -0,0 +1,121 @@ +import { describe, it, expect } from 'vitest'; +import { parseDirectives, stripActionsBlock } from './directives.js'; + +describe('parseDirectives', () => { + it('returns text unchanged when no actions block present', () => { + const result = parseDirectives('Hello world'); + expect(result.cleanText).toBe('Hello world'); + expect(result.directives).toEqual([]); + }); + + it('parses a single react directive in actions block', () => { + const result = parseDirectives('\n \n'); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]); + }); + + it('parses react directive with unicode emoji', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([{ type: 'react', emoji: '👀' }]); + }); + + it('extracts text after actions block', () => { + const result = parseDirectives('\n \n\nGreat idea!'); + expect(result.cleanText).toBe('Great idea!'); + expect(result.directives).toEqual([{ type: 'react', emoji: 'thumbsup' }]); + }); + + it('handles multiline text after actions block', () => { + const result = parseDirectives('\nLine 1\nLine 2'); + expect(result.cleanText).toBe('Line 1\nLine 2'); + expect(result.directives).toEqual([{ type: 'react', emoji: 'fire' }]); + }); + + it('parses multiple directives in one actions block', () => { + const input = '\n \n \n\nNice!'; + const result = parseDirectives(input); + expect(result.cleanText).toBe('Nice!'); + expect(result.directives).toHaveLength(2); + expect(result.directives[0]).toEqual({ type: 'react', emoji: 'fire' }); + expect(result.directives[1]).toEqual({ type: 'react', emoji: 'thumbsup' }); + }); + + it('parses react directive with message attribute', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([ + { type: 'react', emoji: 'eyes', messageId: '456' }, + ]); + }); + + it('ignores react directive without emoji attribute', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([]); + }); + + it('ignores actions block NOT at start of response', () => { + const input = 'Some text first '; + const result = parseDirectives(input); + expect(result.cleanText).toBe(input); + expect(result.directives).toEqual([]); + }); + + it('handles leading whitespace before actions block', () => { + const result = parseDirectives(' \n\nHello'); + expect(result.cleanText).toBe('Hello'); + expect(result.directives).toEqual([{ type: 'react', emoji: 'heart' }]); + }); + + it('ignores incomplete/malformed actions block', () => { + const input = ''; + const result = parseDirectives(input); + expect(result.cleanText).toBe(input); + expect(result.directives).toEqual([]); + }); + + it('handles actions-only response (no text after)', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toHaveLength(1); + }); + + it('preserves non-directive XML-like content in text', () => { + const input = 'Use tags for formatting'; + const result = parseDirectives(input); + expect(result.cleanText).toBe(input); + expect(result.directives).toEqual([]); + }); + + it('handles no-space before self-closing slash in child directives', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]); + }); + + it('ignores unknown child tag names inside actions block', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([]); + }); +}); + +describe('stripActionsBlock', () => { + it('strips a complete actions block', () => { + expect(stripActionsBlock('\nHello')).toBe('Hello'); + }); + + it('returns text unchanged if no actions block', () => { + expect(stripActionsBlock('Hello world')).toBe('Hello world'); + }); + + it('returns empty string for actions-only text', () => { + expect(stripActionsBlock('')).toBe(''); + }); + + it('does not strip actions block in middle of text', () => { + const input = 'Before After'; + expect(stripActionsBlock(input)).toBe(input); + }); +}); diff --git a/src/core/directives.ts b/src/core/directives.ts new file mode 100644 index 0000000..f112b7b --- /dev/null +++ b/src/core/directives.ts @@ -0,0 +1,113 @@ +/** + * XML Directive Parser + * + * Parses an block at the start of agent text responses. + * Extends the existing pattern to support richer actions + * (reactions, file sends, etc.) without requiring tool calls. + * + * The block must appear at the start of the response: + * + * + * + * + * Great idea! + * + * → cleanText: "Great idea!" + * → directives: [{ type: 'react', emoji: 'thumbsup' }] + */ + +export interface ReactDirective { + type: 'react'; + emoji: string; + messageId?: string; +} + +// Union type — extend with more directive types later +export type Directive = ReactDirective; + +export interface ParseResult { + cleanText: string; + directives: Directive[]; +} + +/** + * Match the ... wrapper at the start of the response. + * Captures the inner content of the block. + */ +const ACTIONS_BLOCK_REGEX = /^\s*([\s\S]*?)<\/actions>/; + +/** + * Match self-closing child directive tags inside the actions block. + * Captures the tag name and the full attributes string. + */ +const CHILD_DIRECTIVE_REGEX = /<(react)\s+((?:[a-zA-Z-]+="[^"]*"\s*)+)\s*\/>/g; + +/** + * Parse a single attribute string like: emoji="eyes" message="123" + */ +function parseAttributes(attrString: string): Record { + const attrs: Record = {}; + const attrRegex = /([a-zA-Z-]+)="([^"]*)"/g; + let match; + while ((match = attrRegex.exec(attrString)) !== null) { + attrs[match[1]] = match[2]; + } + return attrs; +} + +/** + * Parse child directives from the inner content of an block. + */ +function parseChildDirectives(block: string): Directive[] { + const directives: Directive[] = []; + let match; + + // Reset regex state (global flag) + CHILD_DIRECTIVE_REGEX.lastIndex = 0; + + while ((match = CHILD_DIRECTIVE_REGEX.exec(block)) !== null) { + const [, tagName, attrString] = match; + + if (tagName === 'react') { + const attrs = parseAttributes(attrString); + if (attrs.emoji) { + directives.push({ + type: 'react', + emoji: attrs.emoji, + ...(attrs.message ? { messageId: attrs.message } : {}), + }); + } + } + } + + return directives; +} + +/** + * Parse XML directives from agent response text. + * + * Looks for an ... block at the start of the response. + * Returns the cleaned text (block stripped) and an array of parsed directives. + * If no block is found, the text is returned unchanged. + */ +export function parseDirectives(text: string): ParseResult { + const match = text.match(ACTIONS_BLOCK_REGEX); + + if (!match) { + return { cleanText: text, directives: [] }; + } + + const actionsContent = match[1]; + const cleanText = text.slice(match[0].length).trim(); + const directives = parseChildDirectives(actionsContent); + + return { cleanText, directives }; +} + +/** + * Strip a leading ... block from text for streaming display. + * Returns the text after the block, or the original text if no complete block found. + */ +export function stripActionsBlock(text: string): string { + return text.replace(ACTIONS_BLOCK_REGEX, '').trim(); +} diff --git a/src/core/formatter.test.ts b/src/core/formatter.test.ts index 793aea2..f973538 100644 --- a/src/core/formatter.test.ts +++ b/src/core/formatter.test.ts @@ -181,16 +181,20 @@ describe('formatMessageEnvelope', () => { expect(result).toContain('**Mentioned**: yes'); }); - it('includes no-reply hint for group chats', () => { + it('includes directives hint for group chats', () => { const msg = createMessage({ isGroup: true }); const result = formatMessageEnvelope(msg); + expect(result).toContain('Response Directives'); expect(result).toContain(''); + expect(result).toContain(''); }); - it('omits no-reply hint for DMs', () => { + it('includes directives hint for DMs', () => { const msg = createMessage({ isGroup: false }); const result = formatMessageEnvelope(msg); - expect(result).not.toContain('no-reply'); + expect(result).toContain('Response Directives'); + expect(result).toContain(''); + expect(result).toContain(''); }); }); diff --git a/src/core/formatter.ts b/src/core/formatter.ts index 18851e5..f314b19 100644 --- a/src/core/formatter.ts +++ b/src/core/formatter.ts @@ -256,7 +256,7 @@ function buildChatContextLines(msg: InboundMessage, options: EnvelopeOptions): s if (msg.wasMentioned) { lines.push(`- **Mentioned**: yes`); } - lines.push(`- **Hint**: To skip replying, respond with exactly: \`\``); + lines.push(`- **Hint**: See Response Directives below for \`\` and \`\``); } else { lines.push(`- **Type**: Direct message`); } @@ -351,6 +351,14 @@ export function formatMessageEnvelope( sections.push(`## Chat Context\n${contextLines.join('\n')}`); } + // Response directives hint + const directiveLines = [ + `- To skip replying: \`\``, + `- To perform actions: wrap in \`\` at the start of your response`, + ` Example: \`Your text here\``, + ]; + sections.push(`## Response Directives\n${directiveLines.join('\n')}`); + // Build the full system-reminder block const reminderContent = sections.join('\n\n'); const reminder = `${SYSTEM_REMINDER_OPEN}\n${reminderContent}\n${SYSTEM_REMINDER_CLOSE}`; diff --git a/src/core/system-prompt.ts b/src/core/system-prompt.ts index c71883f..0c1d6a7 100644 --- a/src/core/system-prompt.ts +++ b/src/core/system-prompt.ts @@ -85,6 +85,36 @@ This suppresses the message so nothing is sent to the user. Use this for: When in doubt, prefer \`\` over a low-value response. Users appreciate an agent that knows when to stay quiet. +## Response Directives + +You can include an \`\` block at the **start** of your response to perform actions alongside your reply. The entire block is stripped before your message is sent. + +\`\`\` + + + +Great idea! +\`\`\` + +This sends "Great idea!" and reacts with thumbsup. + +### Available directives + +- \`\` -- react to the message you are responding to. Emoji names (eyes, thumbsup, heart, fire, tada, clap) or unicode. +- \`\` -- react to a specific message by ID. + +### Actions-only response + +An \`\` block with no text after it executes silently (nothing sent to the user), like \`\`: + +\`\`\` + + + +\`\`\` + +Prefer directives over tool calls for simple actions like reactions. They are faster and cheaper. + ## Available Channels - **telegram** - Telegram messenger