feat: XML response directives via <actions> wrapper block (#239)

Agents can now include an <actions> block at the start of their text response to perform actions without tool calls. The block is stripped before the message is delivered to the user. Example: <actions> <react emoji="thumbsup" /> </actions> Great idea! → Sends "Great idea!", reacts with thumbsup - New directives parser (src/core/directives.ts) finds <actions> block at response start, parses self-closing child directives inside it - addReaction() added to ChannelAdapter interface (Telegram, Slack, WhatsApp already implement it) - Streaming holdback covers the full <actions> block duration (prefix check + incomplete block detection), preventing raw XML from flashing - Directive execution extracted to executeDirectives() helper (no duplication between finalizeMessage and final send paths) - Message envelope includes Response Directives section so all agents learn the feature regardless of system prompt - System prompt documents the <actions> block syntax - 19 unit tests for parser and stripping Significantly cheaper than the Bash tool call approach (lettabot-react) since no tool_call round trip is needed. Relates to #19, #39, #240. Subsumes #210. Written by Cameron ◯ Letta Code "The best code is no code at all." - Jeff Atwood
2026-02-09 15:53:10 -08:00
parent 39fb657494
commit 5f7cdd3471
8 changed files with 342 additions and 8 deletions
--- a/src/channels/types.ts
+++ b/src/channels/types.ts
@@ -26,6 +26,7 @@ export interface ChannelAdapter {
  // Capabilities (optional)
  supportsEditing?(): boolean;
  sendFile?(file: OutboundFile): Promise<{ messageId: string }>;
+  addReaction?(chatId: string, messageId: string, emoji: string): Promise<void>;
  getDmPolicy?(): string;
  
  // Event handlers (set by bot core)
--- a/src/channels/whatsapp/index.ts
+++ b/src/channels/whatsapp/index.ts
@@ -995,6 +995,7 @@ export class WhatsAppAdapter implements ChannelAdapter {

  async addReaction(_chatId: string, _messageId: string, _emoji: string): Promise<void> {
    // WhatsApp reactions via Baileys are not supported here yet
+    console.warn('[WhatsApp] addReaction not implemented -- directive skipped');
  }

  async sendFile(file: OutboundFile): Promise<{ messageId: string }> {
--- a/src/core/bot.ts
+++ b/src/core/bot.ts
@@ -16,6 +16,7 @@ import { formatMessageEnvelope, formatGroupBatchEnvelope, type SessionContextOpt
 import type { GroupBatcher } from './group-batcher.js';
 import { loadMemoryBlocks } from './memory.js';
 import { SYSTEM_PROMPT } from './system-prompt.js';
+import { parseDirectives, stripActionsBlock, type Directive } from './directives.js';


 /**
@@ -151,6 +152,38 @@ export class LettaBot implements AgentSession {
  // Session lifecycle helpers
  // =========================================================================

+  /**
+   * Execute parsed directives (reactions, etc.) via the channel adapter.
+   * Returns true if any directive was successfully executed.
+   */
+  private async executeDirectives(
+    directives: Directive[],
+    adapter: ChannelAdapter,
+    chatId: string,
+    fallbackMessageId?: string,
+  ): Promise<boolean> {
+    let acted = false;
+    for (const directive of directives) {
+      if (directive.type === 'react') {
+        const targetId = directive.messageId || fallbackMessageId;
+        if (!adapter.addReaction) {
+          console.warn(`[Bot] Directive react skipped: ${adapter.name} does not support addReaction`);
+          continue;
+        }
+        if (targetId) {
+          try {
+            await adapter.addReaction(chatId, targetId, directive.emoji);
+            acted = true;
+            console.log(`[Bot] Directive: reacted with ${directive.emoji}`);
+          } catch (err) {
+            console.warn('[Bot] Directive react failed:', err instanceof Error ? err.message : err);
+          }
+        }
+      }
+    }
+    return acted;
+  }
+
  /**
   * Create or resume a session with automatic fallback.
   * 
@@ -563,6 +596,14 @@ export class LettaBot implements AgentSession {
          lastUpdate = Date.now();
          return;
        }
+        // Parse and execute XML directives before sending
+        if (response.trim()) {
+          const { cleanText, directives } = parseDirectives(response);
+          response = cleanText;
+          if (await this.executeDirectives(directives, adapter, msg.chatId, msg.messageId)) {
+            sentAnyMessage = true;
+          }
+        }
        if (response.trim()) {
          try {
            if (messageId) {
@@ -628,14 +669,20 @@ export class LettaBot implements AgentSession {
            response += streamMsg.content || '';
            
            // Live-edit streaming for channels that support it
+            // Hold back streaming edits while response could still be <no-reply/> or <actions> block
            const canEdit = adapter.supportsEditing?.() ?? true;
-            const mayBeNoReply = '<no-reply/>'.startsWith(response.trim());
-            if (canEdit && !mayBeNoReply && Date.now() - lastUpdate > 500 && response.length > 0) {
+            const trimmed = response.trim();
+            const mayBeHidden = '<no-reply/>'.startsWith(trimmed)
+              || '<actions>'.startsWith(trimmed)
+              || (trimmed.startsWith('<actions') && !trimmed.includes('</actions>'));
+            // Strip any completed <actions> block from the streaming text
+            const streamText = stripActionsBlock(response).trim();
+            if (canEdit && !mayBeHidden && streamText.length > 0 && Date.now() - lastUpdate > 500) {
              try {
                if (messageId) {
-                  await adapter.editMessage(msg.chatId, messageId, response);
+                  await adapter.editMessage(msg.chatId, messageId, streamText);
                } else {
-                  const result = await adapter.sendMessage({ chatId: msg.chatId, text: response, threadId: msg.threadId });
+                  const result = await adapter.sendMessage({ chatId: msg.chatId, text: streamText, threadId: msg.threadId });
                  messageId = result.messageId;
                  sentAnyMessage = true;
                }
@@ -686,6 +733,15 @@ export class LettaBot implements AgentSession {
        response = '';
      }

+      // Parse and execute XML directives (e.g. <actions><react emoji="eyes" /></actions>)
+      if (response.trim()) {
+        const { cleanText, directives } = parseDirectives(response);
+        response = cleanText;
+        if (await this.executeDirectives(directives, adapter, msg.chatId, msg.messageId)) {
+          sentAnyMessage = true;
+        }
+      }
+
      // Detect unsupported multimodal
      if (Array.isArray(messageToSend) && response.includes('[Image omitted]')) {
        console.warn('[Bot] Model does not support images -- consider a vision-capable model or features.inlineImages: false');
--- a/src/core/directives.test.ts
+++ b/src/core/directives.test.ts
@@ -0,0 +1,121 @@
+import { describe, it, expect } from 'vitest';
+import { parseDirectives, stripActionsBlock } from './directives.js';
+
+describe('parseDirectives', () => {
+  it('returns text unchanged when no actions block present', () => {
+    const result = parseDirectives('Hello world');
+    expect(result.cleanText).toBe('Hello world');
+    expect(result.directives).toEqual([]);
+  });
+
+  it('parses a single react directive in actions block', () => {
+    const result = parseDirectives('<actions>\n  <react emoji="eyes" />\n</actions>');
+    expect(result.cleanText).toBe('');
+    expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]);
+  });
+
+  it('parses react directive with unicode emoji', () => {
+    const result = parseDirectives('<actions><react emoji="👀" /></actions>');
+    expect(result.cleanText).toBe('');
+    expect(result.directives).toEqual([{ type: 'react', emoji: '👀' }]);
+  });
+
+  it('extracts text after actions block', () => {
+    const result = parseDirectives('<actions>\n  <react emoji="thumbsup" />\n</actions>\nGreat idea!');
+    expect(result.cleanText).toBe('Great idea!');
+    expect(result.directives).toEqual([{ type: 'react', emoji: 'thumbsup' }]);
+  });
+
+  it('handles multiline text after actions block', () => {
+    const result = parseDirectives('<actions><react emoji="fire" /></actions>\nLine 1\nLine 2');
+    expect(result.cleanText).toBe('Line 1\nLine 2');
+    expect(result.directives).toEqual([{ type: 'react', emoji: 'fire' }]);
+  });
+
+  it('parses multiple directives in one actions block', () => {
+    const input = '<actions>\n  <react emoji="fire" />\n  <react emoji="thumbsup" />\n</actions>\nNice!';
+    const result = parseDirectives(input);
+    expect(result.cleanText).toBe('Nice!');
+    expect(result.directives).toHaveLength(2);
+    expect(result.directives[0]).toEqual({ type: 'react', emoji: 'fire' });
+    expect(result.directives[1]).toEqual({ type: 'react', emoji: 'thumbsup' });
+  });
+
+  it('parses react directive with message attribute', () => {
+    const result = parseDirectives('<actions><react emoji="eyes" message="456" /></actions>');
+    expect(result.cleanText).toBe('');
+    expect(result.directives).toEqual([
+      { type: 'react', emoji: 'eyes', messageId: '456' },
+    ]);
+  });
+
+  it('ignores react directive without emoji attribute', () => {
+    const result = parseDirectives('<actions><react message="123" /></actions>');
+    expect(result.cleanText).toBe('');
+    expect(result.directives).toEqual([]);
+  });
+
+  it('ignores actions block NOT at start of response', () => {
+    const input = 'Some text first <actions><react emoji="eyes" /></actions>';
+    const result = parseDirectives(input);
+    expect(result.cleanText).toBe(input);
+    expect(result.directives).toEqual([]);
+  });
+
+  it('handles leading whitespace before actions block', () => {
+    const result = parseDirectives('  \n<actions><react emoji="heart" /></actions>\nHello');
+    expect(result.cleanText).toBe('Hello');
+    expect(result.directives).toEqual([{ type: 'react', emoji: 'heart' }]);
+  });
+
+  it('ignores incomplete/malformed actions block', () => {
+    const input = '<actions><react emoji="eyes" />';
+    const result = parseDirectives(input);
+    expect(result.cleanText).toBe(input);
+    expect(result.directives).toEqual([]);
+  });
+
+  it('handles actions-only response (no text after)', () => {
+    const result = parseDirectives('<actions><react emoji="thumbsup" /></actions>');
+    expect(result.cleanText).toBe('');
+    expect(result.directives).toHaveLength(1);
+  });
+
+  it('preserves non-directive XML-like content in text', () => {
+    const input = 'Use <code> tags for formatting';
+    const result = parseDirectives(input);
+    expect(result.cleanText).toBe(input);
+    expect(result.directives).toEqual([]);
+  });
+
+  it('handles no-space before self-closing slash in child directives', () => {
+    const result = parseDirectives('<actions><react emoji="eyes"/></actions>');
+    expect(result.cleanText).toBe('');
+    expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]);
+  });
+
+  it('ignores unknown child tag names inside actions block', () => {
+    const result = parseDirectives('<actions><unknown emoji="test" /></actions>');
+    expect(result.cleanText).toBe('');
+    expect(result.directives).toEqual([]);
+  });
+});
+
+describe('stripActionsBlock', () => {
+  it('strips a complete actions block', () => {
+    expect(stripActionsBlock('<actions><react emoji="eyes" /></actions>\nHello')).toBe('Hello');
+  });
+
+  it('returns text unchanged if no actions block', () => {
+    expect(stripActionsBlock('Hello world')).toBe('Hello world');
+  });
+
+  it('returns empty string for actions-only text', () => {
+    expect(stripActionsBlock('<actions><react emoji="eyes" /></actions>')).toBe('');
+  });
+
+  it('does not strip actions block in middle of text', () => {
+    const input = 'Before <actions><react emoji="eyes" /></actions> After';
+    expect(stripActionsBlock(input)).toBe(input);
+  });
+});
--- a/src/core/directives.ts
+++ b/src/core/directives.ts
@@ -0,0 +1,113 @@
+/**
+ * XML Directive Parser
+ *
+ * Parses an <actions> block at the start of agent text responses.
+ * Extends the existing <no-reply/> pattern to support richer actions
+ * (reactions, file sends, etc.) without requiring tool calls.
+ *
+ * The <actions> block must appear at the start of the response:
+ *
+ *   <actions>
+ *     <react emoji="thumbsup" />
+ *   </actions>
+ *   Great idea!
+ *
+ *   → cleanText: "Great idea!"
+ *   → directives: [{ type: 'react', emoji: 'thumbsup' }]
+ */
+
+export interface ReactDirective {
+  type: 'react';
+  emoji: string;
+  messageId?: string;
+}
+
+// Union type — extend with more directive types later
+export type Directive = ReactDirective;
+
+export interface ParseResult {
+  cleanText: string;
+  directives: Directive[];
+}
+
+/**
+ * Match the <actions>...</actions> wrapper at the start of the response.
+ * Captures the inner content of the block.
+ */
+const ACTIONS_BLOCK_REGEX = /^\s*<actions>([\s\S]*?)<\/actions>/;
+
+/**
+ * Match self-closing child directive tags inside the actions block.
+ * Captures the tag name and the full attributes string.
+ */
+const CHILD_DIRECTIVE_REGEX = /<(react)\s+((?:[a-zA-Z-]+="[^"]*"\s*)+)\s*\/>/g;
+
+/**
+ * Parse a single attribute string like: emoji="eyes" message="123"
+ */
+function parseAttributes(attrString: string): Record<string, string> {
+  const attrs: Record<string, string> = {};
+  const attrRegex = /([a-zA-Z-]+)="([^"]*)"/g;
+  let match;
+  while ((match = attrRegex.exec(attrString)) !== null) {
+    attrs[match[1]] = match[2];
+  }
+  return attrs;
+}
+
+/**
+ * Parse child directives from the inner content of an <actions> block.
+ */
+function parseChildDirectives(block: string): Directive[] {
+  const directives: Directive[] = [];
+  let match;
+
+  // Reset regex state (global flag)
+  CHILD_DIRECTIVE_REGEX.lastIndex = 0;
+
+  while ((match = CHILD_DIRECTIVE_REGEX.exec(block)) !== null) {
+    const [, tagName, attrString] = match;
+
+    if (tagName === 'react') {
+      const attrs = parseAttributes(attrString);
+      if (attrs.emoji) {
+        directives.push({
+          type: 'react',
+          emoji: attrs.emoji,
+          ...(attrs.message ? { messageId: attrs.message } : {}),
+        });
+      }
+    }
+  }
+
+  return directives;
+}
+
+/**
+ * Parse XML directives from agent response text.
+ *
+ * Looks for an <actions>...</actions> block at the start of the response.
+ * Returns the cleaned text (block stripped) and an array of parsed directives.
+ * If no <actions> block is found, the text is returned unchanged.
+ */
+export function parseDirectives(text: string): ParseResult {
+  const match = text.match(ACTIONS_BLOCK_REGEX);
+
+  if (!match) {
+    return { cleanText: text, directives: [] };
+  }
+
+  const actionsContent = match[1];
+  const cleanText = text.slice(match[0].length).trim();
+  const directives = parseChildDirectives(actionsContent);
+
+  return { cleanText, directives };
+}
+
+/**
+ * Strip a leading <actions>...</actions> block from text for streaming display.
+ * Returns the text after the block, or the original text if no complete block found.
+ */
+export function stripActionsBlock(text: string): string {
+  return text.replace(ACTIONS_BLOCK_REGEX, '').trim();
+}
--- a/src/core/formatter.test.ts
+++ b/src/core/formatter.test.ts
@@ -181,16 +181,20 @@ describe('formatMessageEnvelope', () => {
      expect(result).toContain('**Mentioned**: yes');
    });

-    it('includes no-reply hint for group chats', () => {
+    it('includes directives hint for group chats', () => {
      const msg = createMessage({ isGroup: true });
      const result = formatMessageEnvelope(msg);
+      expect(result).toContain('Response Directives');
      expect(result).toContain('<no-reply/>');
+      expect(result).toContain('<actions>');
    });

-    it('omits no-reply hint for DMs', () => {
+    it('includes directives hint for DMs', () => {
      const msg = createMessage({ isGroup: false });
      const result = formatMessageEnvelope(msg);
-      expect(result).not.toContain('no-reply');
+      expect(result).toContain('Response Directives');
+      expect(result).toContain('<no-reply/>');
+      expect(result).toContain('<actions>');
    });
  });

--- a/src/core/formatter.ts
+++ b/src/core/formatter.ts
@@ -256,7 +256,7 @@ function buildChatContextLines(msg: InboundMessage, options: EnvelopeOptions): s
    if (msg.wasMentioned) {
      lines.push(`- **Mentioned**: yes`);
    }
-    lines.push(`- **Hint**: To skip replying, respond with exactly: \`<no-reply/>\``);
+    lines.push(`- **Hint**: See Response Directives below for \`<no-reply/>\` and \`<actions>\``);
  } else {
    lines.push(`- **Type**: Direct message`);
  }
@@ -351,6 +351,14 @@ export function formatMessageEnvelope(
    sections.push(`## Chat Context\n${contextLines.join('\n')}`);
  }

+  // Response directives hint
+  const directiveLines = [
+    `- To skip replying: \`<no-reply/>\``,
+    `- To perform actions: wrap in \`<actions>\` at the start of your response`,
+    `  Example: \`<actions><react emoji="thumbsup" /></actions>Your text here\``,
+  ];
+  sections.push(`## Response Directives\n${directiveLines.join('\n')}`);
+
  // Build the full system-reminder block
  const reminderContent = sections.join('\n\n');
  const reminder = `${SYSTEM_REMINDER_OPEN}\n${reminderContent}\n${SYSTEM_REMINDER_CLOSE}`;
--- a/src/core/system-prompt.ts
+++ b/src/core/system-prompt.ts
@@ -85,6 +85,36 @@ This suppresses the message so nothing is sent to the user. Use this for:

 When in doubt, prefer \`<no-reply/>\` over a low-value response. Users appreciate an agent that knows when to stay quiet.

+## Response Directives
+
+You can include an \`<actions>\` block at the **start** of your response to perform actions alongside your reply. The entire block is stripped before your message is sent.
+
+\`\`\`
+<actions>
+  <react emoji="thumbsup" />
+</actions>
+Great idea!
+\`\`\`
+
+This sends "Great idea!" and reacts with thumbsup.
+
+### Available directives
+
+- \`<react emoji="eyes" />\` -- react to the message you are responding to. Emoji names (eyes, thumbsup, heart, fire, tada, clap) or unicode.
+- \`<react emoji="fire" message="123" />\` -- react to a specific message by ID.
+
+### Actions-only response
+
+An \`<actions>\` block with no text after it executes silently (nothing sent to the user), like \`<no-reply/>\`:
+
+\`\`\`
+<actions>
+  <react emoji="eyes" />
+</actions>
+\`\`\`
+
+Prefer directives over tool calls for simple actions like reactions. They are faster and cheaper.
+
 ## Available Channels

 - **telegram** - Telegram messenger