feat: XML response directives via <actions> wrapper block (#239)
Agents can now include an <actions> block at the start of their text
response to perform actions without tool calls. The block is stripped
before the message is delivered to the user.
Example:
<actions>
<react emoji="thumbsup" />
</actions>
Great idea!
→ Sends "Great idea!", reacts with thumbsup
- New directives parser (src/core/directives.ts) finds <actions> block
at response start, parses self-closing child directives inside it
- addReaction() added to ChannelAdapter interface (Telegram, Slack,
WhatsApp already implement it)
- Streaming holdback covers the full <actions> block duration (prefix
check + incomplete block detection), preventing raw XML from flashing
- Directive execution extracted to executeDirectives() helper (no
duplication between finalizeMessage and final send paths)
- Message envelope includes Response Directives section so all agents
learn the feature regardless of system prompt
- System prompt documents the <actions> block syntax
- 19 unit tests for parser and stripping
Significantly cheaper than the Bash tool call approach (lettabot-react)
since no tool_call round trip is needed.
Relates to #19, #39, #240. Subsumes #210.
Written by Cameron ◯ Letta Code
"The best code is no code at all." - Jeff Atwood
This commit is contained in:
@@ -26,6 +26,7 @@ export interface ChannelAdapter {
|
||||
// Capabilities (optional)
|
||||
supportsEditing?(): boolean;
|
||||
sendFile?(file: OutboundFile): Promise<{ messageId: string }>;
|
||||
addReaction?(chatId: string, messageId: string, emoji: string): Promise<void>;
|
||||
getDmPolicy?(): string;
|
||||
|
||||
// Event handlers (set by bot core)
|
||||
|
||||
@@ -995,6 +995,7 @@ export class WhatsAppAdapter implements ChannelAdapter {
|
||||
|
||||
async addReaction(_chatId: string, _messageId: string, _emoji: string): Promise<void> {
|
||||
// WhatsApp reactions via Baileys are not supported here yet
|
||||
console.warn('[WhatsApp] addReaction not implemented -- directive skipped');
|
||||
}
|
||||
|
||||
async sendFile(file: OutboundFile): Promise<{ messageId: string }> {
|
||||
|
||||
@@ -16,6 +16,7 @@ import { formatMessageEnvelope, formatGroupBatchEnvelope, type SessionContextOpt
|
||||
import type { GroupBatcher } from './group-batcher.js';
|
||||
import { loadMemoryBlocks } from './memory.js';
|
||||
import { SYSTEM_PROMPT } from './system-prompt.js';
|
||||
import { parseDirectives, stripActionsBlock, type Directive } from './directives.js';
|
||||
|
||||
|
||||
/**
|
||||
@@ -151,6 +152,38 @@ export class LettaBot implements AgentSession {
|
||||
// Session lifecycle helpers
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Execute parsed directives (reactions, etc.) via the channel adapter.
|
||||
* Returns true if any directive was successfully executed.
|
||||
*/
|
||||
private async executeDirectives(
|
||||
directives: Directive[],
|
||||
adapter: ChannelAdapter,
|
||||
chatId: string,
|
||||
fallbackMessageId?: string,
|
||||
): Promise<boolean> {
|
||||
let acted = false;
|
||||
for (const directive of directives) {
|
||||
if (directive.type === 'react') {
|
||||
const targetId = directive.messageId || fallbackMessageId;
|
||||
if (!adapter.addReaction) {
|
||||
console.warn(`[Bot] Directive react skipped: ${adapter.name} does not support addReaction`);
|
||||
continue;
|
||||
}
|
||||
if (targetId) {
|
||||
try {
|
||||
await adapter.addReaction(chatId, targetId, directive.emoji);
|
||||
acted = true;
|
||||
console.log(`[Bot] Directive: reacted with ${directive.emoji}`);
|
||||
} catch (err) {
|
||||
console.warn('[Bot] Directive react failed:', err instanceof Error ? err.message : err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return acted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create or resume a session with automatic fallback.
|
||||
*
|
||||
@@ -563,6 +596,14 @@ export class LettaBot implements AgentSession {
|
||||
lastUpdate = Date.now();
|
||||
return;
|
||||
}
|
||||
// Parse and execute XML directives before sending
|
||||
if (response.trim()) {
|
||||
const { cleanText, directives } = parseDirectives(response);
|
||||
response = cleanText;
|
||||
if (await this.executeDirectives(directives, adapter, msg.chatId, msg.messageId)) {
|
||||
sentAnyMessage = true;
|
||||
}
|
||||
}
|
||||
if (response.trim()) {
|
||||
try {
|
||||
if (messageId) {
|
||||
@@ -628,14 +669,20 @@ export class LettaBot implements AgentSession {
|
||||
response += streamMsg.content || '';
|
||||
|
||||
// Live-edit streaming for channels that support it
|
||||
// Hold back streaming edits while response could still be <no-reply/> or <actions> block
|
||||
const canEdit = adapter.supportsEditing?.() ?? true;
|
||||
const mayBeNoReply = '<no-reply/>'.startsWith(response.trim());
|
||||
if (canEdit && !mayBeNoReply && Date.now() - lastUpdate > 500 && response.length > 0) {
|
||||
const trimmed = response.trim();
|
||||
const mayBeHidden = '<no-reply/>'.startsWith(trimmed)
|
||||
|| '<actions>'.startsWith(trimmed)
|
||||
|| (trimmed.startsWith('<actions') && !trimmed.includes('</actions>'));
|
||||
// Strip any completed <actions> block from the streaming text
|
||||
const streamText = stripActionsBlock(response).trim();
|
||||
if (canEdit && !mayBeHidden && streamText.length > 0 && Date.now() - lastUpdate > 500) {
|
||||
try {
|
||||
if (messageId) {
|
||||
await adapter.editMessage(msg.chatId, messageId, response);
|
||||
await adapter.editMessage(msg.chatId, messageId, streamText);
|
||||
} else {
|
||||
const result = await adapter.sendMessage({ chatId: msg.chatId, text: response, threadId: msg.threadId });
|
||||
const result = await adapter.sendMessage({ chatId: msg.chatId, text: streamText, threadId: msg.threadId });
|
||||
messageId = result.messageId;
|
||||
sentAnyMessage = true;
|
||||
}
|
||||
@@ -686,6 +733,15 @@ export class LettaBot implements AgentSession {
|
||||
response = '';
|
||||
}
|
||||
|
||||
// Parse and execute XML directives (e.g. <actions><react emoji="eyes" /></actions>)
|
||||
if (response.trim()) {
|
||||
const { cleanText, directives } = parseDirectives(response);
|
||||
response = cleanText;
|
||||
if (await this.executeDirectives(directives, adapter, msg.chatId, msg.messageId)) {
|
||||
sentAnyMessage = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Detect unsupported multimodal
|
||||
if (Array.isArray(messageToSend) && response.includes('[Image omitted]')) {
|
||||
console.warn('[Bot] Model does not support images -- consider a vision-capable model or features.inlineImages: false');
|
||||
|
||||
121
src/core/directives.test.ts
Normal file
121
src/core/directives.test.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { parseDirectives, stripActionsBlock } from './directives.js';
|
||||
|
||||
describe('parseDirectives', () => {
|
||||
it('returns text unchanged when no actions block present', () => {
|
||||
const result = parseDirectives('Hello world');
|
||||
expect(result.cleanText).toBe('Hello world');
|
||||
expect(result.directives).toEqual([]);
|
||||
});
|
||||
|
||||
it('parses a single react directive in actions block', () => {
|
||||
const result = parseDirectives('<actions>\n <react emoji="eyes" />\n</actions>');
|
||||
expect(result.cleanText).toBe('');
|
||||
expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]);
|
||||
});
|
||||
|
||||
it('parses react directive with unicode emoji', () => {
|
||||
const result = parseDirectives('<actions><react emoji="👀" /></actions>');
|
||||
expect(result.cleanText).toBe('');
|
||||
expect(result.directives).toEqual([{ type: 'react', emoji: '👀' }]);
|
||||
});
|
||||
|
||||
it('extracts text after actions block', () => {
|
||||
const result = parseDirectives('<actions>\n <react emoji="thumbsup" />\n</actions>\nGreat idea!');
|
||||
expect(result.cleanText).toBe('Great idea!');
|
||||
expect(result.directives).toEqual([{ type: 'react', emoji: 'thumbsup' }]);
|
||||
});
|
||||
|
||||
it('handles multiline text after actions block', () => {
|
||||
const result = parseDirectives('<actions><react emoji="fire" /></actions>\nLine 1\nLine 2');
|
||||
expect(result.cleanText).toBe('Line 1\nLine 2');
|
||||
expect(result.directives).toEqual([{ type: 'react', emoji: 'fire' }]);
|
||||
});
|
||||
|
||||
it('parses multiple directives in one actions block', () => {
|
||||
const input = '<actions>\n <react emoji="fire" />\n <react emoji="thumbsup" />\n</actions>\nNice!';
|
||||
const result = parseDirectives(input);
|
||||
expect(result.cleanText).toBe('Nice!');
|
||||
expect(result.directives).toHaveLength(2);
|
||||
expect(result.directives[0]).toEqual({ type: 'react', emoji: 'fire' });
|
||||
expect(result.directives[1]).toEqual({ type: 'react', emoji: 'thumbsup' });
|
||||
});
|
||||
|
||||
it('parses react directive with message attribute', () => {
|
||||
const result = parseDirectives('<actions><react emoji="eyes" message="456" /></actions>');
|
||||
expect(result.cleanText).toBe('');
|
||||
expect(result.directives).toEqual([
|
||||
{ type: 'react', emoji: 'eyes', messageId: '456' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('ignores react directive without emoji attribute', () => {
|
||||
const result = parseDirectives('<actions><react message="123" /></actions>');
|
||||
expect(result.cleanText).toBe('');
|
||||
expect(result.directives).toEqual([]);
|
||||
});
|
||||
|
||||
it('ignores actions block NOT at start of response', () => {
|
||||
const input = 'Some text first <actions><react emoji="eyes" /></actions>';
|
||||
const result = parseDirectives(input);
|
||||
expect(result.cleanText).toBe(input);
|
||||
expect(result.directives).toEqual([]);
|
||||
});
|
||||
|
||||
it('handles leading whitespace before actions block', () => {
|
||||
const result = parseDirectives(' \n<actions><react emoji="heart" /></actions>\nHello');
|
||||
expect(result.cleanText).toBe('Hello');
|
||||
expect(result.directives).toEqual([{ type: 'react', emoji: 'heart' }]);
|
||||
});
|
||||
|
||||
it('ignores incomplete/malformed actions block', () => {
|
||||
const input = '<actions><react emoji="eyes" />';
|
||||
const result = parseDirectives(input);
|
||||
expect(result.cleanText).toBe(input);
|
||||
expect(result.directives).toEqual([]);
|
||||
});
|
||||
|
||||
it('handles actions-only response (no text after)', () => {
|
||||
const result = parseDirectives('<actions><react emoji="thumbsup" /></actions>');
|
||||
expect(result.cleanText).toBe('');
|
||||
expect(result.directives).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('preserves non-directive XML-like content in text', () => {
|
||||
const input = 'Use <code> tags for formatting';
|
||||
const result = parseDirectives(input);
|
||||
expect(result.cleanText).toBe(input);
|
||||
expect(result.directives).toEqual([]);
|
||||
});
|
||||
|
||||
it('handles no-space before self-closing slash in child directives', () => {
|
||||
const result = parseDirectives('<actions><react emoji="eyes"/></actions>');
|
||||
expect(result.cleanText).toBe('');
|
||||
expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]);
|
||||
});
|
||||
|
||||
it('ignores unknown child tag names inside actions block', () => {
|
||||
const result = parseDirectives('<actions><unknown emoji="test" /></actions>');
|
||||
expect(result.cleanText).toBe('');
|
||||
expect(result.directives).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripActionsBlock', () => {
|
||||
it('strips a complete actions block', () => {
|
||||
expect(stripActionsBlock('<actions><react emoji="eyes" /></actions>\nHello')).toBe('Hello');
|
||||
});
|
||||
|
||||
it('returns text unchanged if no actions block', () => {
|
||||
expect(stripActionsBlock('Hello world')).toBe('Hello world');
|
||||
});
|
||||
|
||||
it('returns empty string for actions-only text', () => {
|
||||
expect(stripActionsBlock('<actions><react emoji="eyes" /></actions>')).toBe('');
|
||||
});
|
||||
|
||||
it('does not strip actions block in middle of text', () => {
|
||||
const input = 'Before <actions><react emoji="eyes" /></actions> After';
|
||||
expect(stripActionsBlock(input)).toBe(input);
|
||||
});
|
||||
});
|
||||
113
src/core/directives.ts
Normal file
113
src/core/directives.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
/**
|
||||
* XML Directive Parser
|
||||
*
|
||||
* Parses an <actions> block at the start of agent text responses.
|
||||
* Extends the existing <no-reply/> pattern to support richer actions
|
||||
* (reactions, file sends, etc.) without requiring tool calls.
|
||||
*
|
||||
* The <actions> block must appear at the start of the response:
|
||||
*
|
||||
* <actions>
|
||||
* <react emoji="thumbsup" />
|
||||
* </actions>
|
||||
* Great idea!
|
||||
*
|
||||
* → cleanText: "Great idea!"
|
||||
* → directives: [{ type: 'react', emoji: 'thumbsup' }]
|
||||
*/
|
||||
|
||||
export interface ReactDirective {
|
||||
type: 'react';
|
||||
emoji: string;
|
||||
messageId?: string;
|
||||
}
|
||||
|
||||
// Union type — extend with more directive types later
|
||||
export type Directive = ReactDirective;
|
||||
|
||||
export interface ParseResult {
|
||||
cleanText: string;
|
||||
directives: Directive[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the <actions>...</actions> wrapper at the start of the response.
|
||||
* Captures the inner content of the block.
|
||||
*/
|
||||
const ACTIONS_BLOCK_REGEX = /^\s*<actions>([\s\S]*?)<\/actions>/;
|
||||
|
||||
/**
|
||||
* Match self-closing child directive tags inside the actions block.
|
||||
* Captures the tag name and the full attributes string.
|
||||
*/
|
||||
const CHILD_DIRECTIVE_REGEX = /<(react)\s+((?:[a-zA-Z-]+="[^"]*"\s*)+)\s*\/>/g;
|
||||
|
||||
/**
|
||||
* Parse a single attribute string like: emoji="eyes" message="123"
|
||||
*/
|
||||
function parseAttributes(attrString: string): Record<string, string> {
|
||||
const attrs: Record<string, string> = {};
|
||||
const attrRegex = /([a-zA-Z-]+)="([^"]*)"/g;
|
||||
let match;
|
||||
while ((match = attrRegex.exec(attrString)) !== null) {
|
||||
attrs[match[1]] = match[2];
|
||||
}
|
||||
return attrs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse child directives from the inner content of an <actions> block.
|
||||
*/
|
||||
function parseChildDirectives(block: string): Directive[] {
|
||||
const directives: Directive[] = [];
|
||||
let match;
|
||||
|
||||
// Reset regex state (global flag)
|
||||
CHILD_DIRECTIVE_REGEX.lastIndex = 0;
|
||||
|
||||
while ((match = CHILD_DIRECTIVE_REGEX.exec(block)) !== null) {
|
||||
const [, tagName, attrString] = match;
|
||||
|
||||
if (tagName === 'react') {
|
||||
const attrs = parseAttributes(attrString);
|
||||
if (attrs.emoji) {
|
||||
directives.push({
|
||||
type: 'react',
|
||||
emoji: attrs.emoji,
|
||||
...(attrs.message ? { messageId: attrs.message } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return directives;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse XML directives from agent response text.
|
||||
*
|
||||
* Looks for an <actions>...</actions> block at the start of the response.
|
||||
* Returns the cleaned text (block stripped) and an array of parsed directives.
|
||||
* If no <actions> block is found, the text is returned unchanged.
|
||||
*/
|
||||
export function parseDirectives(text: string): ParseResult {
|
||||
const match = text.match(ACTIONS_BLOCK_REGEX);
|
||||
|
||||
if (!match) {
|
||||
return { cleanText: text, directives: [] };
|
||||
}
|
||||
|
||||
const actionsContent = match[1];
|
||||
const cleanText = text.slice(match[0].length).trim();
|
||||
const directives = parseChildDirectives(actionsContent);
|
||||
|
||||
return { cleanText, directives };
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip a leading <actions>...</actions> block from text for streaming display.
|
||||
* Returns the text after the block, or the original text if no complete block found.
|
||||
*/
|
||||
export function stripActionsBlock(text: string): string {
|
||||
return text.replace(ACTIONS_BLOCK_REGEX, '').trim();
|
||||
}
|
||||
@@ -181,16 +181,20 @@ describe('formatMessageEnvelope', () => {
|
||||
expect(result).toContain('**Mentioned**: yes');
|
||||
});
|
||||
|
||||
it('includes no-reply hint for group chats', () => {
|
||||
it('includes directives hint for group chats', () => {
|
||||
const msg = createMessage({ isGroup: true });
|
||||
const result = formatMessageEnvelope(msg);
|
||||
expect(result).toContain('Response Directives');
|
||||
expect(result).toContain('<no-reply/>');
|
||||
expect(result).toContain('<actions>');
|
||||
});
|
||||
|
||||
it('omits no-reply hint for DMs', () => {
|
||||
it('includes directives hint for DMs', () => {
|
||||
const msg = createMessage({ isGroup: false });
|
||||
const result = formatMessageEnvelope(msg);
|
||||
expect(result).not.toContain('no-reply');
|
||||
expect(result).toContain('Response Directives');
|
||||
expect(result).toContain('<no-reply/>');
|
||||
expect(result).toContain('<actions>');
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -256,7 +256,7 @@ function buildChatContextLines(msg: InboundMessage, options: EnvelopeOptions): s
|
||||
if (msg.wasMentioned) {
|
||||
lines.push(`- **Mentioned**: yes`);
|
||||
}
|
||||
lines.push(`- **Hint**: To skip replying, respond with exactly: \`<no-reply/>\``);
|
||||
lines.push(`- **Hint**: See Response Directives below for \`<no-reply/>\` and \`<actions>\``);
|
||||
} else {
|
||||
lines.push(`- **Type**: Direct message`);
|
||||
}
|
||||
@@ -351,6 +351,14 @@ export function formatMessageEnvelope(
|
||||
sections.push(`## Chat Context\n${contextLines.join('\n')}`);
|
||||
}
|
||||
|
||||
// Response directives hint
|
||||
const directiveLines = [
|
||||
`- To skip replying: \`<no-reply/>\``,
|
||||
`- To perform actions: wrap in \`<actions>\` at the start of your response`,
|
||||
` Example: \`<actions><react emoji="thumbsup" /></actions>Your text here\``,
|
||||
];
|
||||
sections.push(`## Response Directives\n${directiveLines.join('\n')}`);
|
||||
|
||||
// Build the full system-reminder block
|
||||
const reminderContent = sections.join('\n\n');
|
||||
const reminder = `${SYSTEM_REMINDER_OPEN}\n${reminderContent}\n${SYSTEM_REMINDER_CLOSE}`;
|
||||
|
||||
@@ -85,6 +85,36 @@ This suppresses the message so nothing is sent to the user. Use this for:
|
||||
|
||||
When in doubt, prefer \`<no-reply/>\` over a low-value response. Users appreciate an agent that knows when to stay quiet.
|
||||
|
||||
## Response Directives
|
||||
|
||||
You can include an \`<actions>\` block at the **start** of your response to perform actions alongside your reply. The entire block is stripped before your message is sent.
|
||||
|
||||
\`\`\`
|
||||
<actions>
|
||||
<react emoji="thumbsup" />
|
||||
</actions>
|
||||
Great idea!
|
||||
\`\`\`
|
||||
|
||||
This sends "Great idea!" and reacts with thumbsup.
|
||||
|
||||
### Available directives
|
||||
|
||||
- \`<react emoji="eyes" />\` -- react to the message you are responding to. Emoji names (eyes, thumbsup, heart, fire, tada, clap) or unicode.
|
||||
- \`<react emoji="fire" message="123" />\` -- react to a specific message by ID.
|
||||
|
||||
### Actions-only response
|
||||
|
||||
An \`<actions>\` block with no text after it executes silently (nothing sent to the user), like \`<no-reply/>\`:
|
||||
|
||||
\`\`\`
|
||||
<actions>
|
||||
<react emoji="eyes" />
|
||||
</actions>
|
||||
\`\`\`
|
||||
|
||||
Prefer directives over tool calls for simple actions like reactions. They are faster and cheaper.
|
||||
|
||||
## Available Channels
|
||||
|
||||
- **telegram** - Telegram messenger
|
||||
|
||||
Reference in New Issue
Block a user