fix: custom tool call display with pretty verb headers (#369)

2026-02-24 10:45:04 -08:00
parent 43f9b332cd
commit 266ded012c
3 changed files with 226 additions and 35 deletions
--- a/src/channels/telegram.ts
+++ b/src/channels/telegram.ts
@@ -505,6 +505,21 @@ export class TelegramAdapter implements ChannelAdapter {
      // Only first chunk replies to the original message
      const replyId = !lastMessageId && msg.replyToMessageId ? Number(msg.replyToMessageId) : undefined;
      
+      // If caller specified a parse mode, send directly (skip markdown conversion)
+      if (msg.parseMode) {
+        try {
+          const result = await this.bot.api.sendMessage(msg.chatId, chunk, {
+            parse_mode: msg.parseMode as 'MarkdownV2' | 'HTML',
+            reply_to_message_id: replyId,
+          });
+          lastMessageId = String(result.message_id);
+          continue;
+        } catch (e) {
+          console.warn(`[Telegram] ${msg.parseMode} send failed, falling back to default:`, e);
+          // Fall through to default conversion path
+        }
+      }
+
      // Try MarkdownV2 first
      try {
        const formatted = await markdownToTelegramV2(chunk);
--- a/src/core/bot.ts
+++ b/src/core/bot.ts
@@ -259,18 +259,148 @@ export class LettaBot implements AgentSession {
    return `${this.config.displayName}: ${text}`;
  }

+  // ---- Tool call display ----
+
+  /**
+   * Pretty display config for known tools.
+   * `header`: bold verb shown to the user (e.g., "Searching")
+   * `argKeys`: ordered preference list of fields to extract from toolInput
+   *            or tool_result JSON as the detail line
+   * `format`: optional -- 'code' wraps the detail in backticks
+   */
+  private static readonly TOOL_DISPLAY_MAP: Record<string, {
+    header: string;
+    argKeys: string[];
+    format?: 'code';
+    /** For 'code' format: if the first argKey value exceeds this length,
+     *  fall back to the next argKey shown as plain text instead. */
+    adaptiveCodeThreshold?: number;
+    /** Dynamic header based on tool input. When provided, the return value
+     *  replaces `header` entirely and no argKey detail is appended. */
+    headerFn?: (input: Record<string, unknown>) => string;
+  }> = {
+    web_search:          { header: 'Searching',      argKeys: ['query'] },
+    fetch_webpage:       { header: 'Reading',         argKeys: ['url'] },
+    Bash:                { header: 'Running',          argKeys: ['command', 'description'], format: 'code', adaptiveCodeThreshold: 80 },
+    Read:                { header: 'Reading',          argKeys: ['file_path'] },
+    Edit:                { header: 'Editing',          argKeys: ['file_path'] },
+    Write:               { header: 'Writing',          argKeys: ['file_path'] },
+    Glob:                { header: 'Finding files',    argKeys: ['pattern'] },
+    Grep:                { header: 'Searching code',   argKeys: ['pattern'] },
+    Task:                { header: 'Delegating',       argKeys: ['description'] },
+    conversation_search: { header: 'Searching conversation history', argKeys: ['query'] },
+    archival_memory_search: { header: 'Searching archival memory', argKeys: ['query'] },
+    run_code:            { header: 'Running code',     argKeys: ['code'], format: 'code' },
+    note:                { header: 'Taking note',      argKeys: ['title', 'content'] },
+    manage_todo:         { header: 'Updating todos',   argKeys: [] },
+    TodoWrite:           { header: 'Updating todos',   argKeys: [] },
+    Skill:               {
+      header: 'Loading skill',
+      argKeys: ['skill'],
+      headerFn: (input) => {
+        const skill = input.skill as string | undefined;
+        const command = (input.command as string | undefined) || (input.args as string | undefined);
+        if (command === 'unload') return skill ? `Unloading ${skill}` : 'Unloading skill';
+        if (command === 'refresh') return 'Refreshing skills';
+        return skill ? `Loading ${skill}` : 'Loading skill';
+      },
+    },
+  };
+
  /**
   * Format a tool call for channel display.
-   * Shows tool name + abbreviated key parameters.
+   *
+   * Known tools get a pretty verb-based header (e.g., **Searching**).
+   * Unknown tools fall back to **Tool**\n<name> (<args>).
+   *
+   * When toolInput is empty (SDK streaming limitation -- the CLI only
+   * forwards the first chunk before args are accumulated), we fall back
+   * to extracting the detail from the tool_result content.
   */
-  private formatToolCallDisplay(streamMsg: StreamMsg): string {
+  private formatToolCallDisplay(streamMsg: StreamMsg, toolResult?: StreamMsg): string {
    const name = streamMsg.toolName || 'unknown';
-    const params = this.abbreviateToolInput(streamMsg);
-    return params ? `**Tool:** ${name} (${params})` : `**Tool:** ${name}`;
+    const display = LettaBot.TOOL_DISPLAY_MAP[name];
+
+    if (display) {
+      // --- Dynamic header path (e.g., Skill tool with load/unload/refresh modes) ---
+      if (display.headerFn) {
+        const input = (streamMsg.toolInput as Record<string, unknown> | undefined) ?? {};
+        return `**${display.headerFn(input)}**`;
+      }
+
+      // --- Custom display path ---
+      const detail = this.extractToolDetail(display.argKeys, streamMsg, toolResult);
+      if (detail) {
+        let formatted: string;
+        if (display.format === 'code' && display.adaptiveCodeThreshold) {
+          // Adaptive: short values get code format, long values fall back to
+          // the next argKey as plain text (e.g., Bash shows `command` for short
+          // commands, but the human-readable `description` for long ones).
+          if (detail.length <= display.adaptiveCodeThreshold) {
+            formatted = `\`${detail}\``;
+          } else {
+            const fallback = this.extractToolDetail(display.argKeys.slice(1), streamMsg, toolResult);
+            formatted = fallback || detail.slice(0, display.adaptiveCodeThreshold) + '...';
+          }
+        } else {
+          formatted = display.format === 'code' ? `\`${detail}\`` : detail;
+        }
+        return `**${display.header}**\n${formatted}`;
+      }
+      return `**${display.header}**`;
+    }
+
+    // --- Generic fallback for unknown tools ---
+    let params = this.abbreviateToolInput(streamMsg);
+    if (!params && toolResult?.content) {
+      params = this.extractInputFromToolResult(toolResult.content);
+    }
+    return params ? `**Tool**\n${name} (${params})` : `**Tool**\n${name}`;
+  }
+
+  /**
+   * Extract the first matching detail string from a tool call's input or
+   * the subsequent tool_result content (fallback for empty toolInput).
+   */
+  private extractToolDetail(
+    argKeys: string[],
+    streamMsg: StreamMsg,
+    toolResult?: StreamMsg,
+  ): string {
+    if (argKeys.length === 0) return '';
+
+    // 1. Try toolInput (primary -- when SDK provides args)
+    const input = streamMsg.toolInput as Record<string, unknown> | undefined;
+    if (input && typeof input === 'object') {
+      for (const key of argKeys) {
+        const val = input[key];
+        if (typeof val === 'string' && val.length > 0) {
+          return val.length > 120 ? val.slice(0, 117) + '...' : val;
+        }
+      }
+    }
+
+    // 2. Try tool_result content (fallback for empty toolInput)
+    if (toolResult?.content) {
+      try {
+        const parsed = JSON.parse(toolResult.content);
+        if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+          for (const key of argKeys) {
+            const val = (parsed as Record<string, unknown>)[key];
+            if (typeof val === 'string' && val.length > 0) {
+              return val.length > 120 ? val.slice(0, 117) + '...' : val;
+            }
+          }
+        }
+      } catch { /* non-JSON result -- skip */ }
+    }
+
+    return '';
  }

  /**
   * Extract a brief parameter summary from a tool call's input.
+   * Used only by the generic fallback display path.
   */
  private abbreviateToolInput(streamMsg: StreamMsg): string {
    const input = streamMsg.toolInput as Record<string, unknown> | undefined;
@@ -292,16 +422,68 @@ export class LettaBot implements AgentSession {
  }

  /**
-   * Format reasoning text for channel display, respecting truncation config.
+   * Fallback: extract input parameters from a tool_result's content.
+   * Some tools echo their input in the result (e.g., web_search includes
+   * `query`). Used only by the generic fallback display path.
   */
-  private formatReasoningDisplay(text: string): string {
+  private extractInputFromToolResult(content: string): string {
+    try {
+      const parsed = JSON.parse(content);
+      if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return '';
+
+      const inputKeys = ['query', 'input', 'prompt', 'url', 'search_query', 'text'];
+      const parts: string[] = [];
+
+      for (const key of inputKeys) {
+        const val = (parsed as Record<string, unknown>)[key];
+        if (typeof val === 'string' && val.length > 0) {
+          const truncated = val.length > 80 ? val.slice(0, 77) + '...' : val;
+          parts.push(`${key}: ${truncated}`);
+          if (parts.length >= 2) break;
+        }
+      }
+
+      return parts.join(', ');
+    } catch {
+      return '';
+    }
+  }
+
+  /**
+   * Format reasoning text for channel display, respecting truncation config.
+   * Returns { text, parseMode? } -- Telegram gets HTML with <blockquote> to
+   * bypass telegramify-markdown (which adds unwanted spaces to blockquotes).
+   * Signal falls back to italic (no blockquote support).
+   * Discord/Slack use markdown blockquotes.
+   */
+  private formatReasoningDisplay(text: string, channelId?: string): { text: string; parseMode?: string } {
    const maxChars = this.config.display?.reasoningMaxChars ?? 0;
-    const truncated = maxChars > 0 && text.length > maxChars
-      ? text.slice(0, maxChars) + '...'
-      : text;
-    // Use italic for reasoning -- works across all channels including Signal
-    // (Signal only supports bold/italic/code, no blockquotes)
-    return `**Thinking**\n_${truncated}_`;
+    // Trim leading whitespace from each line -- the API often includes leading
+    // spaces in reasoning chunks that look wrong in channel output.
+    const cleaned = text.split('\n').map(line => line.trimStart()).join('\n').trim();
+    const truncated = maxChars > 0 && cleaned.length > maxChars
+      ? cleaned.slice(0, maxChars) + '...'
+      : cleaned;
+
+    if (channelId === 'signal') {
+      // Signal: no blockquote support, use italic
+      return { text: `**Thinking**\n_${truncated}_` };
+    }
+    if (channelId === 'telegram' || channelId === 'telegram-mtproto') {
+      // Telegram: use HTML blockquote to bypass telegramify-markdown spacing
+      const escaped = truncated
+        .replace(/&/g, '&amp;')
+        .replace(/</g, '&lt;')
+        .replace(/>/g, '&gt;');
+      return {
+        text: `<blockquote expandable><b>Thinking</b>\n${escaped}</blockquote>`,
+        parseMode: 'HTML',
+      };
+    }
+    // Discord, Slack, etc: markdown blockquote
+    const lines = truncated.split('\n');
+    const quoted = lines.map(line => `> ${line}`).join('\n');
+    return { text: `> **Thinking**\n${quoted}` };
  }

  // =========================================================================
@@ -1231,9 +1413,7 @@ export class LettaBot implements AgentSession {
      let lastErrorDetail: { message: string; stopReason: string; apiError?: Record<string, unknown> } | null = null;
      let retryInfo: { attempt: number; maxAttempts: number; reason: string } | null = null;
      let reasoningBuffer = '';
-      // Buffer the latest tool_call by ID so we display it once with full args
-      // (the SDK streams multiple tool_call messages per call -- first has empty input).
-      let pendingToolDisplay: { toolCallId: string; msg: any } | null = null;
+      // Tool call displays fire immediately on arrival (SDK now accumulates args).
      const msgTypeCounts: Record<string, number> = {};

      const parseAndHandleDirectives = async () => {
@@ -1312,8 +1492,8 @@ export class LettaBot implements AgentSession {
          if (isSemanticType && lastMsgType === 'reasoning' && streamMsg.type !== 'reasoning' && reasoningBuffer.trim()) {
            if (this.config.display?.showReasoning && !suppressDelivery) {
              try {
-                const text = this.formatReasoningDisplay(reasoningBuffer);
-                await adapter.sendMessage({ chatId: msg.chatId, text, threadId: msg.threadId });
+                const reasoning = this.formatReasoningDisplay(reasoningBuffer, adapter.id);
+                await adapter.sendMessage({ chatId: msg.chatId, text: reasoning.text, threadId: msg.threadId, parseMode: reasoning.parseMode });
                // Note: display messages don't set sentAnyMessage -- they're informational,
                // not a substitute for an assistant response. Error handling and retry must
                // still fire even if reasoning was displayed.
@@ -1324,21 +1504,7 @@ export class LettaBot implements AgentSession {
            reasoningBuffer = '';
          }

-          // Flush pending tool call display when type changes away from tool_call.
-          // The SDK streams multiple tool_call messages per call (first has empty args),
-          // so we buffer and display the last one which has the complete input.
-          if (isSemanticType && pendingToolDisplay && streamMsg.type !== 'tool_call') {
-            if (this.config.display?.showToolCalls && !suppressDelivery) {
-              try {
-                const text = this.formatToolCallDisplay(pendingToolDisplay.msg);
-                await adapter.sendMessage({ chatId: msg.chatId, text, threadId: msg.threadId });
-                // Display messages don't set sentAnyMessage (see reasoning display comment).
-              } catch (err) {
-                console.warn('[Bot] Failed to send tool call display:', err instanceof Error ? err.message : err);
-              }
-            }
-            pendingToolDisplay = null;
-          }
+          // (Tool call displays fire immediately in the tool_call handler below.)
          
          // Tool loop detection
          const maxToolCalls = this.config.maxToolCalls ?? 100;
@@ -1356,9 +1522,15 @@ export class LettaBot implements AgentSession {
            const tcId = streamMsg.toolCallId?.slice(0, 12) || '?';
            log.info(`>>> TOOL CALL: ${tcName} (id: ${tcId})`);
            sawNonAssistantSinceLastUuid = true;
-            // Buffer the tool call -- the SDK streams multiple chunks per call
-            // (first has empty args). We display the last chunk when type changes.
-            pendingToolDisplay = { toolCallId: streamMsg.toolCallId || '', msg: streamMsg };
+            // Display tool call immediately (args are now populated by SDK accumulation fix)
+            if (this.config.display?.showToolCalls && !suppressDelivery) {
+              try {
+                const text = this.formatToolCallDisplay(streamMsg);
+                await adapter.sendMessage({ chatId: msg.chatId, text, threadId: msg.threadId });
+              } catch (err) {
+                console.warn('[Bot] Failed to send tool call display:', err instanceof Error ? err.message : err);
+              }
+            }
          } else if (streamMsg.type === 'tool_result') {
            log.info(`<<< TOOL RESULT: error=${streamMsg.isError}, len=${(streamMsg as any).content?.length || 0}`);
            sawNonAssistantSinceLastUuid = true;
--- a/src/core/types.ts
+++ b/src/core/types.ts
@@ -94,6 +94,10 @@ export interface OutboundMessage {
  text: string;
  replyToMessageId?: string;
  threadId?: string;  // Slack thread_ts
+  /** When set, tells the adapter which parse mode to use (e.g., 'MarkdownV2',
+   *  'HTML') and to skip its default markdown conversion. Adapters that don't
+   *  support the specified mode ignore this and fall back to default. */
+  parseMode?: string;
 }

 /**