fix: custom tool call display with pretty verb headers (#369)
This commit is contained in:
@@ -505,6 +505,21 @@ export class TelegramAdapter implements ChannelAdapter {
|
||||
// Only first chunk replies to the original message
|
||||
const replyId = !lastMessageId && msg.replyToMessageId ? Number(msg.replyToMessageId) : undefined;
|
||||
|
||||
// If caller specified a parse mode, send directly (skip markdown conversion)
|
||||
if (msg.parseMode) {
|
||||
try {
|
||||
const result = await this.bot.api.sendMessage(msg.chatId, chunk, {
|
||||
parse_mode: msg.parseMode as 'MarkdownV2' | 'HTML',
|
||||
reply_to_message_id: replyId,
|
||||
});
|
||||
lastMessageId = String(result.message_id);
|
||||
continue;
|
||||
} catch (e) {
|
||||
console.warn(`[Telegram] ${msg.parseMode} send failed, falling back to default:`, e);
|
||||
// Fall through to default conversion path
|
||||
}
|
||||
}
|
||||
|
||||
// Try MarkdownV2 first
|
||||
try {
|
||||
const formatted = await markdownToTelegramV2(chunk);
|
||||
|
||||
242
src/core/bot.ts
242
src/core/bot.ts
@@ -259,18 +259,148 @@ export class LettaBot implements AgentSession {
|
||||
return `${this.config.displayName}: ${text}`;
|
||||
}
|
||||
|
||||
// ---- Tool call display ----
|
||||
|
||||
/**
|
||||
* Pretty display config for known tools.
|
||||
* `header`: bold verb shown to the user (e.g., "Searching")
|
||||
* `argKeys`: ordered preference list of fields to extract from toolInput
|
||||
* or tool_result JSON as the detail line
|
||||
* `format`: optional -- 'code' wraps the detail in backticks
|
||||
*/
|
||||
private static readonly TOOL_DISPLAY_MAP: Record<string, {
|
||||
header: string;
|
||||
argKeys: string[];
|
||||
format?: 'code';
|
||||
/** For 'code' format: if the first argKey value exceeds this length,
|
||||
* fall back to the next argKey shown as plain text instead. */
|
||||
adaptiveCodeThreshold?: number;
|
||||
/** Dynamic header based on tool input. When provided, the return value
|
||||
* replaces `header` entirely and no argKey detail is appended. */
|
||||
headerFn?: (input: Record<string, unknown>) => string;
|
||||
}> = {
|
||||
web_search: { header: 'Searching', argKeys: ['query'] },
|
||||
fetch_webpage: { header: 'Reading', argKeys: ['url'] },
|
||||
Bash: { header: 'Running', argKeys: ['command', 'description'], format: 'code', adaptiveCodeThreshold: 80 },
|
||||
Read: { header: 'Reading', argKeys: ['file_path'] },
|
||||
Edit: { header: 'Editing', argKeys: ['file_path'] },
|
||||
Write: { header: 'Writing', argKeys: ['file_path'] },
|
||||
Glob: { header: 'Finding files', argKeys: ['pattern'] },
|
||||
Grep: { header: 'Searching code', argKeys: ['pattern'] },
|
||||
Task: { header: 'Delegating', argKeys: ['description'] },
|
||||
conversation_search: { header: 'Searching conversation history', argKeys: ['query'] },
|
||||
archival_memory_search: { header: 'Searching archival memory', argKeys: ['query'] },
|
||||
run_code: { header: 'Running code', argKeys: ['code'], format: 'code' },
|
||||
note: { header: 'Taking note', argKeys: ['title', 'content'] },
|
||||
manage_todo: { header: 'Updating todos', argKeys: [] },
|
||||
TodoWrite: { header: 'Updating todos', argKeys: [] },
|
||||
Skill: {
|
||||
header: 'Loading skill',
|
||||
argKeys: ['skill'],
|
||||
headerFn: (input) => {
|
||||
const skill = input.skill as string | undefined;
|
||||
const command = (input.command as string | undefined) || (input.args as string | undefined);
|
||||
if (command === 'unload') return skill ? `Unloading ${skill}` : 'Unloading skill';
|
||||
if (command === 'refresh') return 'Refreshing skills';
|
||||
return skill ? `Loading ${skill}` : 'Loading skill';
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Format a tool call for channel display.
|
||||
* Shows tool name + abbreviated key parameters.
|
||||
*
|
||||
* Known tools get a pretty verb-based header (e.g., **Searching**).
|
||||
* Unknown tools fall back to **Tool**\n<name> (<args>).
|
||||
*
|
||||
* When toolInput is empty (SDK streaming limitation -- the CLI only
|
||||
* forwards the first chunk before args are accumulated), we fall back
|
||||
* to extracting the detail from the tool_result content.
|
||||
*/
|
||||
private formatToolCallDisplay(streamMsg: StreamMsg): string {
|
||||
private formatToolCallDisplay(streamMsg: StreamMsg, toolResult?: StreamMsg): string {
|
||||
const name = streamMsg.toolName || 'unknown';
|
||||
const params = this.abbreviateToolInput(streamMsg);
|
||||
return params ? `**Tool:** ${name} (${params})` : `**Tool:** ${name}`;
|
||||
const display = LettaBot.TOOL_DISPLAY_MAP[name];
|
||||
|
||||
if (display) {
|
||||
// --- Dynamic header path (e.g., Skill tool with load/unload/refresh modes) ---
|
||||
if (display.headerFn) {
|
||||
const input = (streamMsg.toolInput as Record<string, unknown> | undefined) ?? {};
|
||||
return `**${display.headerFn(input)}**`;
|
||||
}
|
||||
|
||||
// --- Custom display path ---
|
||||
const detail = this.extractToolDetail(display.argKeys, streamMsg, toolResult);
|
||||
if (detail) {
|
||||
let formatted: string;
|
||||
if (display.format === 'code' && display.adaptiveCodeThreshold) {
|
||||
// Adaptive: short values get code format, long values fall back to
|
||||
// the next argKey as plain text (e.g., Bash shows `command` for short
|
||||
// commands, but the human-readable `description` for long ones).
|
||||
if (detail.length <= display.adaptiveCodeThreshold) {
|
||||
formatted = `\`${detail}\``;
|
||||
} else {
|
||||
const fallback = this.extractToolDetail(display.argKeys.slice(1), streamMsg, toolResult);
|
||||
formatted = fallback || detail.slice(0, display.adaptiveCodeThreshold) + '...';
|
||||
}
|
||||
} else {
|
||||
formatted = display.format === 'code' ? `\`${detail}\`` : detail;
|
||||
}
|
||||
return `**${display.header}**\n${formatted}`;
|
||||
}
|
||||
return `**${display.header}**`;
|
||||
}
|
||||
|
||||
// --- Generic fallback for unknown tools ---
|
||||
let params = this.abbreviateToolInput(streamMsg);
|
||||
if (!params && toolResult?.content) {
|
||||
params = this.extractInputFromToolResult(toolResult.content);
|
||||
}
|
||||
return params ? `**Tool**\n${name} (${params})` : `**Tool**\n${name}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the first matching detail string from a tool call's input or
|
||||
* the subsequent tool_result content (fallback for empty toolInput).
|
||||
*/
|
||||
private extractToolDetail(
|
||||
argKeys: string[],
|
||||
streamMsg: StreamMsg,
|
||||
toolResult?: StreamMsg,
|
||||
): string {
|
||||
if (argKeys.length === 0) return '';
|
||||
|
||||
// 1. Try toolInput (primary -- when SDK provides args)
|
||||
const input = streamMsg.toolInput as Record<string, unknown> | undefined;
|
||||
if (input && typeof input === 'object') {
|
||||
for (const key of argKeys) {
|
||||
const val = input[key];
|
||||
if (typeof val === 'string' && val.length > 0) {
|
||||
return val.length > 120 ? val.slice(0, 117) + '...' : val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Try tool_result content (fallback for empty toolInput)
|
||||
if (toolResult?.content) {
|
||||
try {
|
||||
const parsed = JSON.parse(toolResult.content);
|
||||
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
||||
for (const key of argKeys) {
|
||||
const val = (parsed as Record<string, unknown>)[key];
|
||||
if (typeof val === 'string' && val.length > 0) {
|
||||
return val.length > 120 ? val.slice(0, 117) + '...' : val;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch { /* non-JSON result -- skip */ }
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a brief parameter summary from a tool call's input.
|
||||
* Used only by the generic fallback display path.
|
||||
*/
|
||||
private abbreviateToolInput(streamMsg: StreamMsg): string {
|
||||
const input = streamMsg.toolInput as Record<string, unknown> | undefined;
|
||||
@@ -292,16 +422,68 @@ export class LettaBot implements AgentSession {
|
||||
}
|
||||
|
||||
/**
|
||||
* Format reasoning text for channel display, respecting truncation config.
|
||||
* Fallback: extract input parameters from a tool_result's content.
|
||||
* Some tools echo their input in the result (e.g., web_search includes
|
||||
* `query`). Used only by the generic fallback display path.
|
||||
*/
|
||||
private formatReasoningDisplay(text: string): string {
|
||||
private extractInputFromToolResult(content: string): string {
|
||||
try {
|
||||
const parsed = JSON.parse(content);
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return '';
|
||||
|
||||
const inputKeys = ['query', 'input', 'prompt', 'url', 'search_query', 'text'];
|
||||
const parts: string[] = [];
|
||||
|
||||
for (const key of inputKeys) {
|
||||
const val = (parsed as Record<string, unknown>)[key];
|
||||
if (typeof val === 'string' && val.length > 0) {
|
||||
const truncated = val.length > 80 ? val.slice(0, 77) + '...' : val;
|
||||
parts.push(`${key}: ${truncated}`);
|
||||
if (parts.length >= 2) break;
|
||||
}
|
||||
}
|
||||
|
||||
return parts.join(', ');
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format reasoning text for channel display, respecting truncation config.
|
||||
* Returns { text, parseMode? } -- Telegram gets HTML with <blockquote> to
|
||||
* bypass telegramify-markdown (which adds unwanted spaces to blockquotes).
|
||||
* Signal falls back to italic (no blockquote support).
|
||||
* Discord/Slack use markdown blockquotes.
|
||||
*/
|
||||
private formatReasoningDisplay(text: string, channelId?: string): { text: string; parseMode?: string } {
|
||||
const maxChars = this.config.display?.reasoningMaxChars ?? 0;
|
||||
const truncated = maxChars > 0 && text.length > maxChars
|
||||
? text.slice(0, maxChars) + '...'
|
||||
: text;
|
||||
// Use italic for reasoning -- works across all channels including Signal
|
||||
// (Signal only supports bold/italic/code, no blockquotes)
|
||||
return `**Thinking**\n_${truncated}_`;
|
||||
// Trim leading whitespace from each line -- the API often includes leading
|
||||
// spaces in reasoning chunks that look wrong in channel output.
|
||||
const cleaned = text.split('\n').map(line => line.trimStart()).join('\n').trim();
|
||||
const truncated = maxChars > 0 && cleaned.length > maxChars
|
||||
? cleaned.slice(0, maxChars) + '...'
|
||||
: cleaned;
|
||||
|
||||
if (channelId === 'signal') {
|
||||
// Signal: no blockquote support, use italic
|
||||
return { text: `**Thinking**\n_${truncated}_` };
|
||||
}
|
||||
if (channelId === 'telegram' || channelId === 'telegram-mtproto') {
|
||||
// Telegram: use HTML blockquote to bypass telegramify-markdown spacing
|
||||
const escaped = truncated
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>');
|
||||
return {
|
||||
text: `<blockquote expandable><b>Thinking</b>\n${escaped}</blockquote>`,
|
||||
parseMode: 'HTML',
|
||||
};
|
||||
}
|
||||
// Discord, Slack, etc: markdown blockquote
|
||||
const lines = truncated.split('\n');
|
||||
const quoted = lines.map(line => `> ${line}`).join('\n');
|
||||
return { text: `> **Thinking**\n${quoted}` };
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
@@ -1231,9 +1413,7 @@ export class LettaBot implements AgentSession {
|
||||
let lastErrorDetail: { message: string; stopReason: string; apiError?: Record<string, unknown> } | null = null;
|
||||
let retryInfo: { attempt: number; maxAttempts: number; reason: string } | null = null;
|
||||
let reasoningBuffer = '';
|
||||
// Buffer the latest tool_call by ID so we display it once with full args
|
||||
// (the SDK streams multiple tool_call messages per call -- first has empty input).
|
||||
let pendingToolDisplay: { toolCallId: string; msg: any } | null = null;
|
||||
// Tool call displays fire immediately on arrival (SDK now accumulates args).
|
||||
const msgTypeCounts: Record<string, number> = {};
|
||||
|
||||
const parseAndHandleDirectives = async () => {
|
||||
@@ -1312,8 +1492,8 @@ export class LettaBot implements AgentSession {
|
||||
if (isSemanticType && lastMsgType === 'reasoning' && streamMsg.type !== 'reasoning' && reasoningBuffer.trim()) {
|
||||
if (this.config.display?.showReasoning && !suppressDelivery) {
|
||||
try {
|
||||
const text = this.formatReasoningDisplay(reasoningBuffer);
|
||||
await adapter.sendMessage({ chatId: msg.chatId, text, threadId: msg.threadId });
|
||||
const reasoning = this.formatReasoningDisplay(reasoningBuffer, adapter.id);
|
||||
await adapter.sendMessage({ chatId: msg.chatId, text: reasoning.text, threadId: msg.threadId, parseMode: reasoning.parseMode });
|
||||
// Note: display messages don't set sentAnyMessage -- they're informational,
|
||||
// not a substitute for an assistant response. Error handling and retry must
|
||||
// still fire even if reasoning was displayed.
|
||||
@@ -1324,21 +1504,7 @@ export class LettaBot implements AgentSession {
|
||||
reasoningBuffer = '';
|
||||
}
|
||||
|
||||
// Flush pending tool call display when type changes away from tool_call.
|
||||
// The SDK streams multiple tool_call messages per call (first has empty args),
|
||||
// so we buffer and display the last one which has the complete input.
|
||||
if (isSemanticType && pendingToolDisplay && streamMsg.type !== 'tool_call') {
|
||||
if (this.config.display?.showToolCalls && !suppressDelivery) {
|
||||
try {
|
||||
const text = this.formatToolCallDisplay(pendingToolDisplay.msg);
|
||||
await adapter.sendMessage({ chatId: msg.chatId, text, threadId: msg.threadId });
|
||||
// Display messages don't set sentAnyMessage (see reasoning display comment).
|
||||
} catch (err) {
|
||||
console.warn('[Bot] Failed to send tool call display:', err instanceof Error ? err.message : err);
|
||||
}
|
||||
}
|
||||
pendingToolDisplay = null;
|
||||
}
|
||||
// (Tool call displays fire immediately in the tool_call handler below.)
|
||||
|
||||
// Tool loop detection
|
||||
const maxToolCalls = this.config.maxToolCalls ?? 100;
|
||||
@@ -1356,9 +1522,15 @@ export class LettaBot implements AgentSession {
|
||||
const tcId = streamMsg.toolCallId?.slice(0, 12) || '?';
|
||||
log.info(`>>> TOOL CALL: ${tcName} (id: ${tcId})`);
|
||||
sawNonAssistantSinceLastUuid = true;
|
||||
// Buffer the tool call -- the SDK streams multiple chunks per call
|
||||
// (first has empty args). We display the last chunk when type changes.
|
||||
pendingToolDisplay = { toolCallId: streamMsg.toolCallId || '', msg: streamMsg };
|
||||
// Display tool call immediately (args are now populated by SDK accumulation fix)
|
||||
if (this.config.display?.showToolCalls && !suppressDelivery) {
|
||||
try {
|
||||
const text = this.formatToolCallDisplay(streamMsg);
|
||||
await adapter.sendMessage({ chatId: msg.chatId, text, threadId: msg.threadId });
|
||||
} catch (err) {
|
||||
console.warn('[Bot] Failed to send tool call display:', err instanceof Error ? err.message : err);
|
||||
}
|
||||
}
|
||||
} else if (streamMsg.type === 'tool_result') {
|
||||
log.info(`<<< TOOL RESULT: error=${streamMsg.isError}, len=${(streamMsg as any).content?.length || 0}`);
|
||||
sawNonAssistantSinceLastUuid = true;
|
||||
|
||||
@@ -94,6 +94,10 @@ export interface OutboundMessage {
|
||||
text: string;
|
||||
replyToMessageId?: string;
|
||||
threadId?: string; // Slack thread_ts
|
||||
/** When set, tells the adapter which parse mode to use (e.g., 'MarkdownV2',
|
||||
* 'HTML') and to skip its default markdown conversion. Adapters that don't
|
||||
* support the specified mode ignore this and fall back to default. */
|
||||
parseMode?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user