fix(core): parse <actions> blocks anywhere in responses (#579)
Co-authored-by: Letta Code <noreply@letta.com>
This commit is contained in:
@@ -20,7 +20,13 @@ import { getAgentSkillExecutableDirs, isVoiceMemoConfigured } from '../skills/lo
|
||||
import { formatMessageEnvelope, formatGroupBatchEnvelope, type SessionContextOptions } from './formatter.js';
|
||||
import type { GroupBatcher } from './group-batcher.js';
|
||||
import { redactOutbound } from './redact.js';
|
||||
import { parseDirectives, stripActionsBlock, type Directive } from './directives.js';
|
||||
import {
|
||||
hasIncompleteActionsTag,
|
||||
hasUnclosedActionsBlock,
|
||||
parseDirectives,
|
||||
stripActionsBlock,
|
||||
type Directive,
|
||||
} from './directives.js';
|
||||
import { resolveEmoji } from './emoji.js';
|
||||
import { SessionManager } from './session-manager.js';
|
||||
import { createDisplayPipeline, type DisplayEvent, type CompleteEvent, type ErrorEvent } from './display-pipeline.js';
|
||||
@@ -1437,8 +1443,8 @@ export class LettaBot implements AgentSession {
|
||||
const canEdit = adapter.supportsEditing?.() ?? false;
|
||||
const trimmed = response.trim();
|
||||
const mayBeHidden = '<no-reply/>'.startsWith(trimmed)
|
||||
|| '<actions>'.startsWith(trimmed)
|
||||
|| (trimmed.startsWith('<actions') && !trimmed.includes('</actions>'));
|
||||
|| hasIncompleteActionsTag(response)
|
||||
|| hasUnclosedActionsBlock(response);
|
||||
const streamText = stripActionsBlock(response).trim();
|
||||
if (canEdit && !mayBeHidden && !suppressDelivery && !this.cancelledKeys.has(convKey)
|
||||
&& streamText.length > 0 && Date.now() - lastUpdate > 1500 && Date.now() > rateLimitedUntil) {
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { parseDirectives, stripActionsBlock } from './directives.js';
|
||||
import {
|
||||
hasIncompleteActionsTag,
|
||||
hasUnclosedActionsBlock,
|
||||
parseDirectives,
|
||||
stripActionsBlock,
|
||||
} from './directives.js';
|
||||
|
||||
describe('parseDirectives', () => {
|
||||
it('returns text unchanged when no actions block present', () => {
|
||||
@@ -113,11 +118,34 @@ describe('parseDirectives', () => {
|
||||
expect(result.directives).toEqual([]);
|
||||
});
|
||||
|
||||
it('ignores actions block NOT at start of response', () => {
|
||||
it('parses actions block in middle of response', () => {
|
||||
const input = 'Some text first <actions><react emoji="eyes" /></actions>';
|
||||
const result = parseDirectives(input);
|
||||
expect(result.cleanText).toBe(input);
|
||||
expect(result.directives).toEqual([]);
|
||||
expect(result.cleanText).toBe('Some text first');
|
||||
expect(result.directives).toEqual([{ type: 'react', emoji: 'eyes' }]);
|
||||
});
|
||||
|
||||
it('parses trailing actions block after visible text', () => {
|
||||
const input = 'Message complete. <actions><react emoji="thumbsup" /></actions>';
|
||||
const result = parseDirectives(input);
|
||||
expect(result.cleanText).toBe('Message complete.');
|
||||
expect(result.directives).toEqual([{ type: 'react', emoji: 'thumbsup' }]);
|
||||
});
|
||||
|
||||
it('parses and executes directives across multiple actions blocks in source order', () => {
|
||||
const input = [
|
||||
'Start',
|
||||
'<actions><react emoji="eyes" /></actions>',
|
||||
'Middle',
|
||||
'<actions><voice>Hello</voice></actions>',
|
||||
'End',
|
||||
].join(' ');
|
||||
const result = parseDirectives(input);
|
||||
expect(result.cleanText).toBe('Start Middle End');
|
||||
expect(result.directives).toEqual([
|
||||
{ type: 'react', emoji: 'eyes' },
|
||||
{ type: 'voice', text: 'Hello' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('handles leading whitespace before actions block', () => {
|
||||
@@ -339,8 +367,37 @@ describe('stripActionsBlock', () => {
|
||||
expect(stripActionsBlock('<actions><react emoji="eyes" /></actions>')).toBe('');
|
||||
});
|
||||
|
||||
it('does not strip actions block in middle of text', () => {
|
||||
it('strips actions block in middle of text', () => {
|
||||
const input = 'Before <actions><react emoji="eyes" /></actions> After';
|
||||
expect(stripActionsBlock(input)).toBe(input);
|
||||
expect(stripActionsBlock(input)).toBe('Before After');
|
||||
});
|
||||
|
||||
it('strips multiple actions blocks in one response', () => {
|
||||
const input = 'A <actions><react emoji="eyes" /></actions> B <actions><voice>Hello</voice></actions> C';
|
||||
expect(stripActionsBlock(input)).toBe('A B C');
|
||||
});
|
||||
});
|
||||
|
||||
describe('hasUnclosedActionsBlock', () => {
|
||||
it('detects unmatched opening actions tag', () => {
|
||||
expect(hasUnclosedActionsBlock('Before <actions><react emoji="eyes" />')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for complete actions block', () => {
|
||||
expect(hasUnclosedActionsBlock('Before <actions><react emoji="eyes" /></actions> After')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('hasIncompleteActionsTag', () => {
|
||||
it('detects partial opening actions tag while streaming', () => {
|
||||
expect(hasIncompleteActionsTag('Before <act')).toBe(true);
|
||||
});
|
||||
|
||||
it('detects partial closing actions tag while streaming', () => {
|
||||
expect(hasIncompleteActionsTag('Before </act')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false when no partial actions tag is present', () => {
|
||||
expect(hasIncompleteActionsTag('Before <code>ok</code>')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/**
|
||||
* XML Directive Parser
|
||||
*
|
||||
* Parses an <actions> block at the start of agent text responses.
|
||||
* Parses <actions> blocks from agent text responses.
|
||||
* Extends the existing <no-reply/> pattern to support richer actions
|
||||
* (reactions, file sends, etc.) without requiring tool calls.
|
||||
*
|
||||
* The <actions> block must appear at the start of the response:
|
||||
* <actions> blocks can appear anywhere in the response:
|
||||
*
|
||||
* <actions>
|
||||
* <react emoji="thumbsup" />
|
||||
@@ -53,10 +53,14 @@ export interface ParseResult {
|
||||
}
|
||||
|
||||
/**
|
||||
* Match the <actions>...</actions> wrapper at the start of the response.
|
||||
* Captures the inner content of the block.
|
||||
* Match complete <actions>...</actions> wrappers anywhere in the response.
|
||||
* Captures the inner content of each block.
|
||||
*/
|
||||
const ACTIONS_BLOCK_REGEX = /^\s*<actions>([\s\S]*?)<\/actions>/;
|
||||
const ACTIONS_BLOCK_REGEX_SOURCE = '<actions>([\\s\\S]*?)<\\/actions>';
|
||||
|
||||
function createActionsBlockRegex(flags = 'g'): RegExp {
|
||||
return new RegExp(ACTIONS_BLOCK_REGEX_SOURCE, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match supported directive tags inside the actions block in source order.
|
||||
@@ -156,28 +160,52 @@ function parseChildDirectives(block: string): Directive[] {
|
||||
/**
|
||||
* Parse XML directives from agent response text.
|
||||
*
|
||||
* Looks for an <actions>...</actions> block at the start of the response.
|
||||
* Returns the cleaned text (block stripped) and an array of parsed directives.
|
||||
* If no <actions> block is found, the text is returned unchanged.
|
||||
* Looks for complete <actions>...</actions> blocks anywhere in the response.
|
||||
* Returns the cleaned text (all complete blocks stripped) and parsed directives.
|
||||
* If no complete block is found, the text is returned unchanged.
|
||||
*/
|
||||
export function parseDirectives(text: string): ParseResult {
|
||||
const match = text.match(ACTIONS_BLOCK_REGEX);
|
||||
|
||||
if (!match) {
|
||||
const blockRegex = createActionsBlockRegex();
|
||||
if (!blockRegex.test(text)) {
|
||||
return { cleanText: text, directives: [] };
|
||||
}
|
||||
|
||||
const actionsContent = match[1];
|
||||
const cleanText = text.slice(match[0].length).trim();
|
||||
const directives = parseChildDirectives(actionsContent);
|
||||
const directives: Directive[] = [];
|
||||
const cleanText = text.replace(createActionsBlockRegex(), (_, actionsContent: string) => {
|
||||
directives.push(...parseChildDirectives(actionsContent));
|
||||
return '';
|
||||
}).trim();
|
||||
|
||||
return { cleanText, directives };
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip a leading <actions>...</actions> block from text for streaming display.
|
||||
* Returns the text after the block, or the original text if no complete block found.
|
||||
* Returns true when text contains an opening <actions> tag with no matching
|
||||
* closing tag yet. Used during streaming to avoid flashing raw XML.
|
||||
*/
|
||||
export function hasUnclosedActionsBlock(text: string): boolean {
|
||||
const lastOpen = text.lastIndexOf('<actions>');
|
||||
if (lastOpen < 0) return false;
|
||||
const lastClose = text.lastIndexOf('</actions>');
|
||||
return lastOpen > lastClose;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when the tail of the text contains a partial actions tag
|
||||
* (opening or closing) that has not streamed fully yet.
|
||||
*/
|
||||
export function hasIncompleteActionsTag(text: string): boolean {
|
||||
const lastLt = text.lastIndexOf('<');
|
||||
const lastGt = text.lastIndexOf('>');
|
||||
if (lastLt < 0 || lastLt <= lastGt) return false;
|
||||
const tail = text.slice(lastLt);
|
||||
return '<actions>'.startsWith(tail) || '</actions>'.startsWith(tail);
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip complete <actions>...</actions> blocks from text for streaming display.
|
||||
* Returns the text after stripping blocks, or the original text if none found.
|
||||
*/
|
||||
export function stripActionsBlock(text: string): string {
|
||||
return text.replace(ACTIONS_BLOCK_REGEX, '').trim();
|
||||
return text.replace(createActionsBlockRegex(), '').trim();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user