fix(core): parse <actions> blocks anywhere in responses (#579)

Co-authored-by: Letta Code <noreply@letta.com>
This commit is contained in:
Cameron
2026-03-12 17:02:06 -07:00
committed by GitHub
parent 1d636d6fa9
commit 00a0433358
3 changed files with 117 additions and 26 deletions

View File

@@ -20,7 +20,13 @@ import { getAgentSkillExecutableDirs, isVoiceMemoConfigured } from '../skills/lo
import { formatMessageEnvelope, formatGroupBatchEnvelope, type SessionContextOptions } from './formatter.js';
import type { GroupBatcher } from './group-batcher.js';
import { redactOutbound } from './redact.js';
import { parseDirectives, stripActionsBlock, type Directive } from './directives.js';
import {
hasIncompleteActionsTag,
hasUnclosedActionsBlock,
parseDirectives,
stripActionsBlock,
type Directive,
} from './directives.js';
import { resolveEmoji } from './emoji.js';
import { SessionManager } from './session-manager.js';
import { createDisplayPipeline, type DisplayEvent, type CompleteEvent, type ErrorEvent } from './display-pipeline.js';
@@ -1437,8 +1443,8 @@ export class LettaBot implements AgentSession {
const canEdit = adapter.supportsEditing?.() ?? false;
const trimmed = response.trim();
const mayBeHidden = '<no-reply/>'.startsWith(trimmed)
|| '<actions>'.startsWith(trimmed)
|| (trimmed.startsWith('<actions') && !trimmed.includes('</actions>'));
|| hasIncompleteActionsTag(response)
|| hasUnclosedActionsBlock(response);
const streamText = stripActionsBlock(response).trim();
if (canEdit && !mayBeHidden && !suppressDelivery && !this.cancelledKeys.has(convKey)
&& streamText.length > 0 && Date.now() - lastUpdate > 1500 && Date.now() > rateLimitedUntil) {

View File

@@ -1,5 +1,10 @@
import { describe, it, expect } from 'vitest';
import { parseDirectives, stripActionsBlock } from './directives.js';
import {
hasIncompleteActionsTag,
hasUnclosedActionsBlock,
parseDirectives,
stripActionsBlock,
} from './directives.js';
describe('parseDirectives', () => {
it('returns text unchanged when no actions block present', () => {
@@ -113,11 +118,34 @@ describe('parseDirectives', () => {
expect(result.directives).toEqual([]);
});
it('ignores actions block NOT at start of response', () => {
it('parses actions block in middle of response', () => {
const input = 'Some text first <actions><react emoji="eyes" /></actions>';
const result = parseDirectives(input);
expect(result.cleanText).toBe(input);
expect(result.directives).toEqual([]);
expect(result.cleanText).toBe('Some text first');
expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]);
});
it('parses trailing actions block after visible text', () => {
const input = 'Message complete. <actions><react emoji="thumbsup" /></actions>';
const result = parseDirectives(input);
expect(result.cleanText).toBe('Message complete.');
expect(result.directives).toEqual([{ type: 'react', emoji: 'thumbsup' }]);
});
it('parses and executes directives across multiple actions blocks in source order', () => {
const input = [
'Start',
'<actions><react emoji="eyes" /></actions>',
'Middle',
'<actions><voice>Hello</voice></actions>',
'End',
].join(' ');
const result = parseDirectives(input);
expect(result.cleanText).toBe('Start Middle End');
expect(result.directives).toEqual([
{ type: 'react', emoji: 'eyes' },
{ type: 'voice', text: 'Hello' },
]);
});
it('handles leading whitespace before actions block', () => {
@@ -339,8 +367,37 @@ describe('stripActionsBlock', () => {
expect(stripActionsBlock('<actions><react emoji="eyes" /></actions>')).toBe('');
});
it('does not strip actions block in middle of text', () => {
it('strips actions block in middle of text', () => {
const input = 'Before <actions><react emoji="eyes" /></actions> After';
expect(stripActionsBlock(input)).toBe(input);
expect(stripActionsBlock(input)).toBe('Before After');
});
it('strips multiple actions blocks in one response', () => {
const input = 'A <actions><react emoji="eyes" /></actions> B <actions><voice>Hello</voice></actions> C';
expect(stripActionsBlock(input)).toBe('A B C');
});
});
describe('hasUnclosedActionsBlock', () => {
it('detects unmatched opening actions tag', () => {
expect(hasUnclosedActionsBlock('Before <actions><react emoji="eyes" />')).toBe(true);
});
it('returns false for complete actions block', () => {
expect(hasUnclosedActionsBlock('Before <actions><react emoji="eyes" /></actions> After')).toBe(false);
});
});
describe('hasIncompleteActionsTag', () => {
it('detects partial opening actions tag while streaming', () => {
expect(hasIncompleteActionsTag('Before <act')).toBe(true);
});
it('detects partial closing actions tag while streaming', () => {
expect(hasIncompleteActionsTag('Before </act')).toBe(true);
});
it('returns false when no partial actions tag is present', () => {
expect(hasIncompleteActionsTag('Before <code>ok</code>')).toBe(false);
});
});

View File

@@ -1,11 +1,11 @@
/**
* XML Directive Parser
*
* Parses an <actions> block at the start of agent text responses.
* Parses <actions> blocks from agent text responses.
* Extends the existing <no-reply/> pattern to support richer actions
* (reactions, file sends, etc.) without requiring tool calls.
*
* The <actions> block must appear at the start of the response:
* <actions> blocks can appear anywhere in the response:
*
* <actions>
* <react emoji="thumbsup" />
@@ -53,10 +53,14 @@ export interface ParseResult {
}
/**
* Match the <actions>...</actions> wrapper at the start of the response.
* Captures the inner content of the block.
* Match complete <actions>...</actions> wrappers anywhere in the response.
* Captures the inner content of each block.
*/
const ACTIONS_BLOCK_REGEX = /^\s*<actions>([\s\S]*?)<\/actions>/;
const ACTIONS_BLOCK_REGEX_SOURCE = '<actions>([\\s\\S]*?)<\\/actions>';
function createActionsBlockRegex(flags = 'g'): RegExp {
return new RegExp(ACTIONS_BLOCK_REGEX_SOURCE, flags);
}
/**
* Match supported directive tags inside the actions block in source order.
@@ -156,28 +160,52 @@ function parseChildDirectives(block: string): Directive[] {
/**
* Parse XML directives from agent response text.
*
* Looks for an <actions>...</actions> block at the start of the response.
* Returns the cleaned text (block stripped) and an array of parsed directives.
* If no <actions> block is found, the text is returned unchanged.
* Looks for complete <actions>...</actions> blocks anywhere in the response.
* Returns the cleaned text (all complete blocks stripped) and parsed directives.
* If no complete block is found, the text is returned unchanged.
*/
export function parseDirectives(text: string): ParseResult {
const match = text.match(ACTIONS_BLOCK_REGEX);
if (!match) {
const blockRegex = createActionsBlockRegex();
if (!blockRegex.test(text)) {
return { cleanText: text, directives: [] };
}
const actionsContent = match[1];
const cleanText = text.slice(match[0].length).trim();
const directives = parseChildDirectives(actionsContent);
const directives: Directive[] = [];
const cleanText = text.replace(createActionsBlockRegex(), (_, actionsContent: string) => {
directives.push(...parseChildDirectives(actionsContent));
return '';
}).trim();
return { cleanText, directives };
}
/**
* Strip a leading <actions>...</actions> block from text for streaming display.
* Returns the text after the block, or the original text if no complete block found.
* Returns true when text contains an opening <actions> tag with no matching
* closing tag yet. Used during streaming to avoid flashing raw XML.
*/
export function hasUnclosedActionsBlock(text: string): boolean {
const lastOpen = text.lastIndexOf('<actions>');
if (lastOpen < 0) return false;
const lastClose = text.lastIndexOf('</actions>');
return lastOpen > lastClose;
}
/**
* Returns true when the tail of the text contains a partial actions tag
* (opening or closing) that has not streamed fully yet.
*/
export function hasIncompleteActionsTag(text: string): boolean {
const lastLt = text.lastIndexOf('<');
const lastGt = text.lastIndexOf('>');
if (lastLt < 0 || lastLt <= lastGt) return false;
const tail = text.slice(lastLt);
return '<actions>'.startsWith(tail) || '</actions>'.startsWith(tail);
}
/**
* Strip complete <actions>...</actions> blocks from text for streaming display.
* Returns the text after stripping blocks, or the original text if none found.
*/
export function stripActionsBlock(text: string): string {
return text.replace(ACTIONS_BLOCK_REGEX, '').trim();
return text.replace(createActionsBlockRegex(), '').trim();
}