fix(core): parse <actions> blocks anywhere in responses (#579)

Co-authored-by: Letta Code <noreply@letta.com>
This commit is contained in:
Cameron
2026-03-12 17:02:06 -07:00
committed by GitHub
parent 1d636d6fa9
commit 00a0433358
3 changed files with 117 additions and 26 deletions

View File

@@ -20,7 +20,13 @@ import { getAgentSkillExecutableDirs, isVoiceMemoConfigured } from '../skills/lo
import { formatMessageEnvelope, formatGroupBatchEnvelope, type SessionContextOptions } from './formatter.js'; import { formatMessageEnvelope, formatGroupBatchEnvelope, type SessionContextOptions } from './formatter.js';
import type { GroupBatcher } from './group-batcher.js'; import type { GroupBatcher } from './group-batcher.js';
import { redactOutbound } from './redact.js'; import { redactOutbound } from './redact.js';
import { parseDirectives, stripActionsBlock, type Directive } from './directives.js'; import {
hasIncompleteActionsTag,
hasUnclosedActionsBlock,
parseDirectives,
stripActionsBlock,
type Directive,
} from './directives.js';
import { resolveEmoji } from './emoji.js'; import { resolveEmoji } from './emoji.js';
import { SessionManager } from './session-manager.js'; import { SessionManager } from './session-manager.js';
import { createDisplayPipeline, type DisplayEvent, type CompleteEvent, type ErrorEvent } from './display-pipeline.js'; import { createDisplayPipeline, type DisplayEvent, type CompleteEvent, type ErrorEvent } from './display-pipeline.js';
@@ -1437,8 +1443,8 @@ export class LettaBot implements AgentSession {
const canEdit = adapter.supportsEditing?.() ?? false; const canEdit = adapter.supportsEditing?.() ?? false;
const trimmed = response.trim(); const trimmed = response.trim();
const mayBeHidden = '<no-reply/>'.startsWith(trimmed) const mayBeHidden = '<no-reply/>'.startsWith(trimmed)
|| '<actions>'.startsWith(trimmed) || hasIncompleteActionsTag(response)
|| (trimmed.startsWith('<actions') && !trimmed.includes('</actions>')); || hasUnclosedActionsBlock(response);
const streamText = stripActionsBlock(response).trim(); const streamText = stripActionsBlock(response).trim();
if (canEdit && !mayBeHidden && !suppressDelivery && !this.cancelledKeys.has(convKey) if (canEdit && !mayBeHidden && !suppressDelivery && !this.cancelledKeys.has(convKey)
&& streamText.length > 0 && Date.now() - lastUpdate > 1500 && Date.now() > rateLimitedUntil) { && streamText.length > 0 && Date.now() - lastUpdate > 1500 && Date.now() > rateLimitedUntil) {

View File

@@ -1,5 +1,10 @@
import { describe, it, expect } from 'vitest'; import { describe, it, expect } from 'vitest';
import { parseDirectives, stripActionsBlock } from './directives.js'; import {
hasIncompleteActionsTag,
hasUnclosedActionsBlock,
parseDirectives,
stripActionsBlock,
} from './directives.js';
describe('parseDirectives', () => { describe('parseDirectives', () => {
it('returns text unchanged when no actions block present', () => { it('returns text unchanged when no actions block present', () => {
@@ -113,11 +118,34 @@ describe('parseDirectives', () => {
expect(result.directives).toEqual([]); expect(result.directives).toEqual([]);
}); });
it('ignores actions block NOT at start of response', () => { it('parses actions block in middle of response', () => {
const input = 'Some text first <actions><react emoji="eyes" /></actions>'; const input = 'Some text first <actions><react emoji="eyes" /></actions>';
const result = parseDirectives(input); const result = parseDirectives(input);
expect(result.cleanText).toBe(input); expect(result.cleanText).toBe('Some text first');
expect(result.directives).toEqual([]); expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]);
});
it('parses trailing actions block after visible text', () => {
const input = 'Message complete. <actions><react emoji="thumbsup" /></actions>';
const result = parseDirectives(input);
expect(result.cleanText).toBe('Message complete.');
expect(result.directives).toEqual([{ type: 'react', emoji: 'thumbsup' }]);
});
it('parses and executes directives across multiple actions blocks in source order', () => {
const input = [
'Start',
'<actions><react emoji="eyes" /></actions>',
'Middle',
'<actions><voice>Hello</voice></actions>',
'End',
].join(' ');
const result = parseDirectives(input);
expect(result.cleanText).toBe('Start Middle End');
expect(result.directives).toEqual([
{ type: 'react', emoji: 'eyes' },
{ type: 'voice', text: 'Hello' },
]);
}); });
it('handles leading whitespace before actions block', () => { it('handles leading whitespace before actions block', () => {
@@ -339,8 +367,37 @@ describe('stripActionsBlock', () => {
expect(stripActionsBlock('<actions><react emoji="eyes" /></actions>')).toBe(''); expect(stripActionsBlock('<actions><react emoji="eyes" /></actions>')).toBe('');
}); });
it('does not strip actions block in middle of text', () => { it('strips actions block in middle of text', () => {
const input = 'Before <actions><react emoji="eyes" /></actions> After'; const input = 'Before <actions><react emoji="eyes" /></actions> After';
expect(stripActionsBlock(input)).toBe(input); expect(stripActionsBlock(input)).toBe('Before After');
});
it('strips multiple actions blocks in one response', () => {
const input = 'A <actions><react emoji="eyes" /></actions> B <actions><voice>Hello</voice></actions> C';
expect(stripActionsBlock(input)).toBe('A B C');
});
});
describe('hasUnclosedActionsBlock', () => {
it('detects unmatched opening actions tag', () => {
expect(hasUnclosedActionsBlock('Before <actions><react emoji="eyes" />')).toBe(true);
});
it('returns false for complete actions block', () => {
expect(hasUnclosedActionsBlock('Before <actions><react emoji="eyes" /></actions> After')).toBe(false);
});
});
describe('hasIncompleteActionsTag', () => {
it('detects partial opening actions tag while streaming', () => {
expect(hasIncompleteActionsTag('Before <act')).toBe(true);
});
it('detects partial closing actions tag while streaming', () => {
expect(hasIncompleteActionsTag('Before </act')).toBe(true);
});
it('returns false when no partial actions tag is present', () => {
expect(hasIncompleteActionsTag('Before <code>ok</code>')).toBe(false);
}); });
}); });

View File

@@ -1,11 +1,11 @@
/** /**
* XML Directive Parser * XML Directive Parser
* *
* Parses an <actions> block at the start of agent text responses. * Parses <actions> blocks from agent text responses.
* Extends the existing <no-reply/> pattern to support richer actions * Extends the existing <no-reply/> pattern to support richer actions
* (reactions, file sends, etc.) without requiring tool calls. * (reactions, file sends, etc.) without requiring tool calls.
* *
* The <actions> block must appear at the start of the response: * <actions> blocks can appear anywhere in the response:
* *
* <actions> * <actions>
* <react emoji="thumbsup" /> * <react emoji="thumbsup" />
@@ -53,10 +53,14 @@ export interface ParseResult {
} }
/** /**
* Match the <actions>...</actions> wrapper at the start of the response. * Match complete <actions>...</actions> wrappers anywhere in the response.
* Captures the inner content of the block. * Captures the inner content of each block.
*/ */
const ACTIONS_BLOCK_REGEX = /^\s*<actions>([\s\S]*?)<\/actions>/; const ACTIONS_BLOCK_REGEX_SOURCE = '<actions>([\\s\\S]*?)<\\/actions>';
function createActionsBlockRegex(flags = 'g'): RegExp {
return new RegExp(ACTIONS_BLOCK_REGEX_SOURCE, flags);
}
/** /**
* Match supported directive tags inside the actions block in source order. * Match supported directive tags inside the actions block in source order.
@@ -156,28 +160,52 @@ function parseChildDirectives(block: string): Directive[] {
/** /**
* Parse XML directives from agent response text. * Parse XML directives from agent response text.
* *
* Looks for an <actions>...</actions> block at the start of the response. * Looks for complete <actions>...</actions> blocks anywhere in the response.
* Returns the cleaned text (block stripped) and an array of parsed directives. * Returns the cleaned text (all complete blocks stripped) and parsed directives.
* If no <actions> block is found, the text is returned unchanged. * If no complete block is found, the text is returned unchanged.
*/ */
export function parseDirectives(text: string): ParseResult { export function parseDirectives(text: string): ParseResult {
const match = text.match(ACTIONS_BLOCK_REGEX); const blockRegex = createActionsBlockRegex();
if (!blockRegex.test(text)) {
if (!match) {
return { cleanText: text, directives: [] }; return { cleanText: text, directives: [] };
} }
const actionsContent = match[1]; const directives: Directive[] = [];
const cleanText = text.slice(match[0].length).trim(); const cleanText = text.replace(createActionsBlockRegex(), (_, actionsContent: string) => {
const directives = parseChildDirectives(actionsContent); directives.push(...parseChildDirectives(actionsContent));
return '';
}).trim();
return { cleanText, directives }; return { cleanText, directives };
} }
/** /**
* Strip a leading <actions>...</actions> block from text for streaming display. * Returns true when text contains an opening <actions> tag with no matching
* Returns the text after the block, or the original text if no complete block found. * closing tag yet. Used during streaming to avoid flashing raw XML.
*/
export function hasUnclosedActionsBlock(text: string): boolean {
const lastOpen = text.lastIndexOf('<actions>');
if (lastOpen < 0) return false;
const lastClose = text.lastIndexOf('</actions>');
return lastOpen > lastClose;
}
/**
* Returns true when the tail of the text contains a partial actions tag
* (opening or closing) that has not streamed fully yet.
*/
export function hasIncompleteActionsTag(text: string): boolean {
const lastLt = text.lastIndexOf('<');
const lastGt = text.lastIndexOf('>');
if (lastLt < 0 || lastLt <= lastGt) return false;
const tail = text.slice(lastLt);
return '<actions>'.startsWith(tail) || '</actions>'.startsWith(tail);
}
/**
* Strip complete <actions>...</actions> blocks from text for streaming display.
* Returns the text after stripping blocks, or the original text if none found.
*/ */
export function stripActionsBlock(text: string): string { export function stripActionsBlock(text: string): string {
return text.replace(ACTIONS_BLOCK_REGEX, '').trim(); return text.replace(createActionsBlockRegex(), '').trim();
} }