diff --git a/src/channels/types.ts b/src/channels/types.ts index 899a695..c75fcc3 100644 --- a/src/channels/types.ts +++ b/src/channels/types.ts @@ -26,6 +26,7 @@ export interface ChannelAdapter { // Capabilities (optional) supportsEditing?(): boolean; sendFile?(file: OutboundFile): Promise<{ messageId: string }>; + addReaction?(chatId: string, messageId: string, emoji: string): Promise; getDmPolicy?(): string; // Event handlers (set by bot core) diff --git a/src/channels/whatsapp/index.ts b/src/channels/whatsapp/index.ts index 42953fb..4c37a85 100644 --- a/src/channels/whatsapp/index.ts +++ b/src/channels/whatsapp/index.ts @@ -995,6 +995,7 @@ export class WhatsAppAdapter implements ChannelAdapter { async addReaction(_chatId: string, _messageId: string, _emoji: string): Promise { // WhatsApp reactions via Baileys are not supported here yet + console.warn('[WhatsApp] addReaction not implemented -- directive skipped'); } async sendFile(file: OutboundFile): Promise<{ messageId: string }> { diff --git a/src/core/bot.ts b/src/core/bot.ts index 2170bbe..d399099 100644 --- a/src/core/bot.ts +++ b/src/core/bot.ts @@ -16,6 +16,7 @@ import { formatMessageEnvelope, formatGroupBatchEnvelope, type SessionContextOpt import type { GroupBatcher } from './group-batcher.js'; import { loadMemoryBlocks } from './memory.js'; import { SYSTEM_PROMPT } from './system-prompt.js'; +import { parseDirectives, stripActionsBlock, type Directive } from './directives.js'; /** @@ -151,6 +152,38 @@ export class LettaBot implements AgentSession { // Session lifecycle helpers // ========================================================================= + /** + * Execute parsed directives (reactions, etc.) via the channel adapter. + * Returns true if any directive was successfully executed. + */ + private async executeDirectives( + directives: Directive[], + adapter: ChannelAdapter, + chatId: string, + fallbackMessageId?: string, + ): Promise { + let acted = false; + for (const directive of directives) { + if (directive.type === 'react') { + const targetId = directive.messageId || fallbackMessageId; + if (!adapter.addReaction) { + console.warn(`[Bot] Directive react skipped: ${adapter.name} does not support addReaction`); + continue; + } + if (targetId) { + try { + await adapter.addReaction(chatId, targetId, directive.emoji); + acted = true; + console.log(`[Bot] Directive: reacted with ${directive.emoji}`); + } catch (err) { + console.warn('[Bot] Directive react failed:', err instanceof Error ? err.message : err); + } + } + } + } + return acted; + } + /** * Create or resume a session with automatic fallback. * @@ -563,6 +596,14 @@ export class LettaBot implements AgentSession { lastUpdate = Date.now(); return; } + // Parse and execute XML directives before sending + if (response.trim()) { + const { cleanText, directives } = parseDirectives(response); + response = cleanText; + if (await this.executeDirectives(directives, adapter, msg.chatId, msg.messageId)) { + sentAnyMessage = true; + } + } if (response.trim()) { try { if (messageId) { @@ -628,14 +669,20 @@ export class LettaBot implements AgentSession { response += streamMsg.content || ''; // Live-edit streaming for channels that support it + // Hold back streaming edits while response could still be or block const canEdit = adapter.supportsEditing?.() ?? true; - const mayBeNoReply = ''.startsWith(response.trim()); - if (canEdit && !mayBeNoReply && Date.now() - lastUpdate > 500 && response.length > 0) { + const trimmed = response.trim(); + const mayBeHidden = ''.startsWith(trimmed) + || ''.startsWith(trimmed) + || (trimmed.startsWith('')); + // Strip any completed block from the streaming text + const streamText = stripActionsBlock(response).trim(); + if (canEdit && !mayBeHidden && streamText.length > 0 && Date.now() - lastUpdate > 500) { try { if (messageId) { - await adapter.editMessage(msg.chatId, messageId, response); + await adapter.editMessage(msg.chatId, messageId, streamText); } else { - const result = await adapter.sendMessage({ chatId: msg.chatId, text: response, threadId: msg.threadId }); + const result = await adapter.sendMessage({ chatId: msg.chatId, text: streamText, threadId: msg.threadId }); messageId = result.messageId; sentAnyMessage = true; } @@ -686,6 +733,15 @@ export class LettaBot implements AgentSession { response = ''; } + // Parse and execute XML directives (e.g. ) + if (response.trim()) { + const { cleanText, directives } = parseDirectives(response); + response = cleanText; + if (await this.executeDirectives(directives, adapter, msg.chatId, msg.messageId)) { + sentAnyMessage = true; + } + } + // Detect unsupported multimodal if (Array.isArray(messageToSend) && response.includes('[Image omitted]')) { console.warn('[Bot] Model does not support images -- consider a vision-capable model or features.inlineImages: false'); diff --git a/src/core/directives.test.ts b/src/core/directives.test.ts new file mode 100644 index 0000000..1614033 --- /dev/null +++ b/src/core/directives.test.ts @@ -0,0 +1,121 @@ +import { describe, it, expect } from 'vitest'; +import { parseDirectives, stripActionsBlock } from './directives.js'; + +describe('parseDirectives', () => { + it('returns text unchanged when no actions block present', () => { + const result = parseDirectives('Hello world'); + expect(result.cleanText).toBe('Hello world'); + expect(result.directives).toEqual([]); + }); + + it('parses a single react directive in actions block', () => { + const result = parseDirectives('\n \n'); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]); + }); + + it('parses react directive with unicode emoji', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([{ type: 'react', emoji: '👀' }]); + }); + + it('extracts text after actions block', () => { + const result = parseDirectives('\n \n\nGreat idea!'); + expect(result.cleanText).toBe('Great idea!'); + expect(result.directives).toEqual([{ type: 'react', emoji: 'thumbsup' }]); + }); + + it('handles multiline text after actions block', () => { + const result = parseDirectives('\nLine 1\nLine 2'); + expect(result.cleanText).toBe('Line 1\nLine 2'); + expect(result.directives).toEqual([{ type: 'react', emoji: 'fire' }]); + }); + + it('parses multiple directives in one actions block', () => { + const input = '\n \n \n\nNice!'; + const result = parseDirectives(input); + expect(result.cleanText).toBe('Nice!'); + expect(result.directives).toHaveLength(2); + expect(result.directives[0]).toEqual({ type: 'react', emoji: 'fire' }); + expect(result.directives[1]).toEqual({ type: 'react', emoji: 'thumbsup' }); + }); + + it('parses react directive with message attribute', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([ + { type: 'react', emoji: 'eyes', messageId: '456' }, + ]); + }); + + it('ignores react directive without emoji attribute', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([]); + }); + + it('ignores actions block NOT at start of response', () => { + const input = 'Some text first '; + const result = parseDirectives(input); + expect(result.cleanText).toBe(input); + expect(result.directives).toEqual([]); + }); + + it('handles leading whitespace before actions block', () => { + const result = parseDirectives(' \n\nHello'); + expect(result.cleanText).toBe('Hello'); + expect(result.directives).toEqual([{ type: 'react', emoji: 'heart' }]); + }); + + it('ignores incomplete/malformed actions block', () => { + const input = ''; + const result = parseDirectives(input); + expect(result.cleanText).toBe(input); + expect(result.directives).toEqual([]); + }); + + it('handles actions-only response (no text after)', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toHaveLength(1); + }); + + it('preserves non-directive XML-like content in text', () => { + const input = 'Use tags for formatting'; + const result = parseDirectives(input); + expect(result.cleanText).toBe(input); + expect(result.directives).toEqual([]); + }); + + it('handles no-space before self-closing slash in child directives', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]); + }); + + it('ignores unknown child tag names inside actions block', () => { + const result = parseDirectives(''); + expect(result.cleanText).toBe(''); + expect(result.directives).toEqual([]); + }); +}); + +describe('stripActionsBlock', () => { + it('strips a complete actions block', () => { + expect(stripActionsBlock('\nHello')).toBe('Hello'); + }); + + it('returns text unchanged if no actions block', () => { + expect(stripActionsBlock('Hello world')).toBe('Hello world'); + }); + + it('returns empty string for actions-only text', () => { + expect(stripActionsBlock('')).toBe(''); + }); + + it('does not strip actions block in middle of text', () => { + const input = 'Before After'; + expect(stripActionsBlock(input)).toBe(input); + }); +}); diff --git a/src/core/directives.ts b/src/core/directives.ts new file mode 100644 index 0000000..f112b7b --- /dev/null +++ b/src/core/directives.ts @@ -0,0 +1,113 @@ +/** + * XML Directive Parser + * + * Parses an block at the start of agent text responses. + * Extends the existing pattern to support richer actions + * (reactions, file sends, etc.) without requiring tool calls. + * + * The block must appear at the start of the response: + * + * + * + * + * Great idea! + * + * → cleanText: "Great idea!" + * → directives: [{ type: 'react', emoji: 'thumbsup' }] + */ + +export interface ReactDirective { + type: 'react'; + emoji: string; + messageId?: string; +} + +// Union type — extend with more directive types later +export type Directive = ReactDirective; + +export interface ParseResult { + cleanText: string; + directives: Directive[]; +} + +/** + * Match the ... wrapper at the start of the response. + * Captures the inner content of the block. + */ +const ACTIONS_BLOCK_REGEX = /^\s*([\s\S]*?)<\/actions>/; + +/** + * Match self-closing child directive tags inside the actions block. + * Captures the tag name and the full attributes string. + */ +const CHILD_DIRECTIVE_REGEX = /<(react)\s+((?:[a-zA-Z-]+="[^"]*"\s*)+)\s*\/>/g; + +/** + * Parse a single attribute string like: emoji="eyes" message="123" + */ +function parseAttributes(attrString: string): Record { + const attrs: Record = {}; + const attrRegex = /([a-zA-Z-]+)="([^"]*)"/g; + let match; + while ((match = attrRegex.exec(attrString)) !== null) { + attrs[match[1]] = match[2]; + } + return attrs; +} + +/** + * Parse child directives from the inner content of an block. + */ +function parseChildDirectives(block: string): Directive[] { + const directives: Directive[] = []; + let match; + + // Reset regex state (global flag) + CHILD_DIRECTIVE_REGEX.lastIndex = 0; + + while ((match = CHILD_DIRECTIVE_REGEX.exec(block)) !== null) { + const [, tagName, attrString] = match; + + if (tagName === 'react') { + const attrs = parseAttributes(attrString); + if (attrs.emoji) { + directives.push({ + type: 'react', + emoji: attrs.emoji, + ...(attrs.message ? { messageId: attrs.message } : {}), + }); + } + } + } + + return directives; +} + +/** + * Parse XML directives from agent response text. + * + * Looks for an ... block at the start of the response. + * Returns the cleaned text (block stripped) and an array of parsed directives. + * If no block is found, the text is returned unchanged. + */ +export function parseDirectives(text: string): ParseResult { + const match = text.match(ACTIONS_BLOCK_REGEX); + + if (!match) { + return { cleanText: text, directives: [] }; + } + + const actionsContent = match[1]; + const cleanText = text.slice(match[0].length).trim(); + const directives = parseChildDirectives(actionsContent); + + return { cleanText, directives }; +} + +/** + * Strip a leading ... block from text for streaming display. + * Returns the text after the block, or the original text if no complete block found. + */ +export function stripActionsBlock(text: string): string { + return text.replace(ACTIONS_BLOCK_REGEX, '').trim(); +} diff --git a/src/core/formatter.test.ts b/src/core/formatter.test.ts index 793aea2..f973538 100644 --- a/src/core/formatter.test.ts +++ b/src/core/formatter.test.ts @@ -181,16 +181,20 @@ describe('formatMessageEnvelope', () => { expect(result).toContain('**Mentioned**: yes'); }); - it('includes no-reply hint for group chats', () => { + it('includes directives hint for group chats', () => { const msg = createMessage({ isGroup: true }); const result = formatMessageEnvelope(msg); + expect(result).toContain('Response Directives'); expect(result).toContain(''); + expect(result).toContain(''); }); - it('omits no-reply hint for DMs', () => { + it('includes directives hint for DMs', () => { const msg = createMessage({ isGroup: false }); const result = formatMessageEnvelope(msg); - expect(result).not.toContain('no-reply'); + expect(result).toContain('Response Directives'); + expect(result).toContain(''); + expect(result).toContain(''); }); }); diff --git a/src/core/formatter.ts b/src/core/formatter.ts index 18851e5..f314b19 100644 --- a/src/core/formatter.ts +++ b/src/core/formatter.ts @@ -256,7 +256,7 @@ function buildChatContextLines(msg: InboundMessage, options: EnvelopeOptions): s if (msg.wasMentioned) { lines.push(`- **Mentioned**: yes`); } - lines.push(`- **Hint**: To skip replying, respond with exactly: \`\``); + lines.push(`- **Hint**: See Response Directives below for \`\` and \`\``); } else { lines.push(`- **Type**: Direct message`); } @@ -351,6 +351,14 @@ export function formatMessageEnvelope( sections.push(`## Chat Context\n${contextLines.join('\n')}`); } + // Response directives hint + const directiveLines = [ + `- To skip replying: \`\``, + `- To perform actions: wrap in \`\` at the start of your response`, + ` Example: \`Your text here\``, + ]; + sections.push(`## Response Directives\n${directiveLines.join('\n')}`); + // Build the full system-reminder block const reminderContent = sections.join('\n\n'); const reminder = `${SYSTEM_REMINDER_OPEN}\n${reminderContent}\n${SYSTEM_REMINDER_CLOSE}`; diff --git a/src/core/system-prompt.ts b/src/core/system-prompt.ts index c71883f..0c1d6a7 100644 --- a/src/core/system-prompt.ts +++ b/src/core/system-prompt.ts @@ -85,6 +85,36 @@ This suppresses the message so nothing is sent to the user. Use this for: When in doubt, prefer \`\` over a low-value response. Users appreciate an agent that knows when to stay quiet. +## Response Directives + +You can include an \`\` block at the **start** of your response to perform actions alongside your reply. The entire block is stripped before your message is sent. + +\`\`\` + + + +Great idea! +\`\`\` + +This sends "Great idea!" and reacts with thumbsup. + +### Available directives + +- \`\` -- react to the message you are responding to. Emoji names (eyes, thumbsup, heart, fire, tada, clap) or unicode. +- \`\` -- react to a specific message by ID. + +### Actions-only response + +An \`\` block with no text after it executes silently (nothing sent to the user), like \`\`: + +\`\`\` + + + +\`\`\` + +Prefer directives over tool calls for simple actions like reactions. They are faster and cheaper. + ## Available Channels - **telegram** - Telegram messenger