diff --git a/src/channels/telegram.ts b/src/channels/telegram.ts index 5050a86..a3a2564 100644 --- a/src/channels/telegram.ts +++ b/src/channels/telegram.ts @@ -505,6 +505,21 @@ export class TelegramAdapter implements ChannelAdapter { // Only first chunk replies to the original message const replyId = !lastMessageId && msg.replyToMessageId ? Number(msg.replyToMessageId) : undefined; + // If caller specified a parse mode, send directly (skip markdown conversion) + if (msg.parseMode) { + try { + const result = await this.bot.api.sendMessage(msg.chatId, chunk, { + parse_mode: msg.parseMode as 'MarkdownV2' | 'HTML', + reply_to_message_id: replyId, + }); + lastMessageId = String(result.message_id); + continue; + } catch (e) { + console.warn(`[Telegram] ${msg.parseMode} send failed, falling back to default:`, e); + // Fall through to default conversion path + } + } + // Try MarkdownV2 first try { const formatted = await markdownToTelegramV2(chunk); diff --git a/src/core/bot.ts b/src/core/bot.ts index 9af4db3..2961efa 100644 --- a/src/core/bot.ts +++ b/src/core/bot.ts @@ -259,18 +259,148 @@ export class LettaBot implements AgentSession { return `${this.config.displayName}: ${text}`; } + // ---- Tool call display ---- + + /** + * Pretty display config for known tools. + * `header`: bold verb shown to the user (e.g., "Searching") + * `argKeys`: ordered preference list of fields to extract from toolInput + * or tool_result JSON as the detail line + * `format`: optional -- 'code' wraps the detail in backticks + */ + private static readonly TOOL_DISPLAY_MAP: Record) => string; + }> = { + web_search: { header: 'Searching', argKeys: ['query'] }, + fetch_webpage: { header: 'Reading', argKeys: ['url'] }, + Bash: { header: 'Running', argKeys: ['command', 'description'], format: 'code', adaptiveCodeThreshold: 80 }, + Read: { header: 'Reading', argKeys: ['file_path'] }, + Edit: { header: 'Editing', argKeys: ['file_path'] }, + Write: { header: 'Writing', argKeys: ['file_path'] }, + Glob: { header: 'Finding files', argKeys: ['pattern'] }, + Grep: { header: 'Searching code', argKeys: ['pattern'] }, + Task: { header: 'Delegating', argKeys: ['description'] }, + conversation_search: { header: 'Searching conversation history', argKeys: ['query'] }, + archival_memory_search: { header: 'Searching archival memory', argKeys: ['query'] }, + run_code: { header: 'Running code', argKeys: ['code'], format: 'code' }, + note: { header: 'Taking note', argKeys: ['title', 'content'] }, + manage_todo: { header: 'Updating todos', argKeys: [] }, + TodoWrite: { header: 'Updating todos', argKeys: [] }, + Skill: { + header: 'Loading skill', + argKeys: ['skill'], + headerFn: (input) => { + const skill = input.skill as string | undefined; + const command = (input.command as string | undefined) || (input.args as string | undefined); + if (command === 'unload') return skill ? `Unloading ${skill}` : 'Unloading skill'; + if (command === 'refresh') return 'Refreshing skills'; + return skill ? `Loading ${skill}` : 'Loading skill'; + }, + }, + }; + /** * Format a tool call for channel display. - * Shows tool name + abbreviated key parameters. + * + * Known tools get a pretty verb-based header (e.g., **Searching**). + * Unknown tools fall back to **Tool**\n (). + * + * When toolInput is empty (SDK streaming limitation -- the CLI only + * forwards the first chunk before args are accumulated), we fall back + * to extracting the detail from the tool_result content. */ - private formatToolCallDisplay(streamMsg: StreamMsg): string { + private formatToolCallDisplay(streamMsg: StreamMsg, toolResult?: StreamMsg): string { const name = streamMsg.toolName || 'unknown'; - const params = this.abbreviateToolInput(streamMsg); - return params ? `**Tool:** ${name} (${params})` : `**Tool:** ${name}`; + const display = LettaBot.TOOL_DISPLAY_MAP[name]; + + if (display) { + // --- Dynamic header path (e.g., Skill tool with load/unload/refresh modes) --- + if (display.headerFn) { + const input = (streamMsg.toolInput as Record | undefined) ?? {}; + return `**${display.headerFn(input)}**`; + } + + // --- Custom display path --- + const detail = this.extractToolDetail(display.argKeys, streamMsg, toolResult); + if (detail) { + let formatted: string; + if (display.format === 'code' && display.adaptiveCodeThreshold) { + // Adaptive: short values get code format, long values fall back to + // the next argKey as plain text (e.g., Bash shows `command` for short + // commands, but the human-readable `description` for long ones). + if (detail.length <= display.adaptiveCodeThreshold) { + formatted = `\`${detail}\``; + } else { + const fallback = this.extractToolDetail(display.argKeys.slice(1), streamMsg, toolResult); + formatted = fallback || detail.slice(0, display.adaptiveCodeThreshold) + '...'; + } + } else { + formatted = display.format === 'code' ? `\`${detail}\`` : detail; + } + return `**${display.header}**\n${formatted}`; + } + return `**${display.header}**`; + } + + // --- Generic fallback for unknown tools --- + let params = this.abbreviateToolInput(streamMsg); + if (!params && toolResult?.content) { + params = this.extractInputFromToolResult(toolResult.content); + } + return params ? `**Tool**\n${name} (${params})` : `**Tool**\n${name}`; + } + + /** + * Extract the first matching detail string from a tool call's input or + * the subsequent tool_result content (fallback for empty toolInput). + */ + private extractToolDetail( + argKeys: string[], + streamMsg: StreamMsg, + toolResult?: StreamMsg, + ): string { + if (argKeys.length === 0) return ''; + + // 1. Try toolInput (primary -- when SDK provides args) + const input = streamMsg.toolInput as Record | undefined; + if (input && typeof input === 'object') { + for (const key of argKeys) { + const val = input[key]; + if (typeof val === 'string' && val.length > 0) { + return val.length > 120 ? val.slice(0, 117) + '...' : val; + } + } + } + + // 2. Try tool_result content (fallback for empty toolInput) + if (toolResult?.content) { + try { + const parsed = JSON.parse(toolResult.content); + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + for (const key of argKeys) { + const val = (parsed as Record)[key]; + if (typeof val === 'string' && val.length > 0) { + return val.length > 120 ? val.slice(0, 117) + '...' : val; + } + } + } + } catch { /* non-JSON result -- skip */ } + } + + return ''; } /** * Extract a brief parameter summary from a tool call's input. + * Used only by the generic fallback display path. */ private abbreviateToolInput(streamMsg: StreamMsg): string { const input = streamMsg.toolInput as Record | undefined; @@ -292,16 +422,68 @@ export class LettaBot implements AgentSession { } /** - * Format reasoning text for channel display, respecting truncation config. + * Fallback: extract input parameters from a tool_result's content. + * Some tools echo their input in the result (e.g., web_search includes + * `query`). Used only by the generic fallback display path. */ - private formatReasoningDisplay(text: string): string { + private extractInputFromToolResult(content: string): string { + try { + const parsed = JSON.parse(content); + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return ''; + + const inputKeys = ['query', 'input', 'prompt', 'url', 'search_query', 'text']; + const parts: string[] = []; + + for (const key of inputKeys) { + const val = (parsed as Record)[key]; + if (typeof val === 'string' && val.length > 0) { + const truncated = val.length > 80 ? val.slice(0, 77) + '...' : val; + parts.push(`${key}: ${truncated}`); + if (parts.length >= 2) break; + } + } + + return parts.join(', '); + } catch { + return ''; + } + } + + /** + * Format reasoning text for channel display, respecting truncation config. + * Returns { text, parseMode? } -- Telegram gets HTML with
to + * bypass telegramify-markdown (which adds unwanted spaces to blockquotes). + * Signal falls back to italic (no blockquote support). + * Discord/Slack use markdown blockquotes. + */ + private formatReasoningDisplay(text: string, channelId?: string): { text: string; parseMode?: string } { const maxChars = this.config.display?.reasoningMaxChars ?? 0; - const truncated = maxChars > 0 && text.length > maxChars - ? text.slice(0, maxChars) + '...' - : text; - // Use italic for reasoning -- works across all channels including Signal - // (Signal only supports bold/italic/code, no blockquotes) - return `**Thinking**\n_${truncated}_`; + // Trim leading whitespace from each line -- the API often includes leading + // spaces in reasoning chunks that look wrong in channel output. + const cleaned = text.split('\n').map(line => line.trimStart()).join('\n').trim(); + const truncated = maxChars > 0 && cleaned.length > maxChars + ? cleaned.slice(0, maxChars) + '...' + : cleaned; + + if (channelId === 'signal') { + // Signal: no blockquote support, use italic + return { text: `**Thinking**\n_${truncated}_` }; + } + if (channelId === 'telegram' || channelId === 'telegram-mtproto') { + // Telegram: use HTML blockquote to bypass telegramify-markdown spacing + const escaped = truncated + .replace(/&/g, '&') + .replace(//g, '>'); + return { + text: `
Thinking\n${escaped}
`, + parseMode: 'HTML', + }; + } + // Discord, Slack, etc: markdown blockquote + const lines = truncated.split('\n'); + const quoted = lines.map(line => `> ${line}`).join('\n'); + return { text: `> **Thinking**\n${quoted}` }; } // ========================================================================= @@ -1231,9 +1413,7 @@ export class LettaBot implements AgentSession { let lastErrorDetail: { message: string; stopReason: string; apiError?: Record } | null = null; let retryInfo: { attempt: number; maxAttempts: number; reason: string } | null = null; let reasoningBuffer = ''; - // Buffer the latest tool_call by ID so we display it once with full args - // (the SDK streams multiple tool_call messages per call -- first has empty input). - let pendingToolDisplay: { toolCallId: string; msg: any } | null = null; + // Tool call displays fire immediately on arrival (SDK now accumulates args). const msgTypeCounts: Record = {}; const parseAndHandleDirectives = async () => { @@ -1312,8 +1492,8 @@ export class LettaBot implements AgentSession { if (isSemanticType && lastMsgType === 'reasoning' && streamMsg.type !== 'reasoning' && reasoningBuffer.trim()) { if (this.config.display?.showReasoning && !suppressDelivery) { try { - const text = this.formatReasoningDisplay(reasoningBuffer); - await adapter.sendMessage({ chatId: msg.chatId, text, threadId: msg.threadId }); + const reasoning = this.formatReasoningDisplay(reasoningBuffer, adapter.id); + await adapter.sendMessage({ chatId: msg.chatId, text: reasoning.text, threadId: msg.threadId, parseMode: reasoning.parseMode }); // Note: display messages don't set sentAnyMessage -- they're informational, // not a substitute for an assistant response. Error handling and retry must // still fire even if reasoning was displayed. @@ -1324,21 +1504,7 @@ export class LettaBot implements AgentSession { reasoningBuffer = ''; } - // Flush pending tool call display when type changes away from tool_call. - // The SDK streams multiple tool_call messages per call (first has empty args), - // so we buffer and display the last one which has the complete input. - if (isSemanticType && pendingToolDisplay && streamMsg.type !== 'tool_call') { - if (this.config.display?.showToolCalls && !suppressDelivery) { - try { - const text = this.formatToolCallDisplay(pendingToolDisplay.msg); - await adapter.sendMessage({ chatId: msg.chatId, text, threadId: msg.threadId }); - // Display messages don't set sentAnyMessage (see reasoning display comment). - } catch (err) { - console.warn('[Bot] Failed to send tool call display:', err instanceof Error ? err.message : err); - } - } - pendingToolDisplay = null; - } + // (Tool call displays fire immediately in the tool_call handler below.) // Tool loop detection const maxToolCalls = this.config.maxToolCalls ?? 100; @@ -1356,9 +1522,15 @@ export class LettaBot implements AgentSession { const tcId = streamMsg.toolCallId?.slice(0, 12) || '?'; log.info(`>>> TOOL CALL: ${tcName} (id: ${tcId})`); sawNonAssistantSinceLastUuid = true; - // Buffer the tool call -- the SDK streams multiple chunks per call - // (first has empty args). We display the last chunk when type changes. - pendingToolDisplay = { toolCallId: streamMsg.toolCallId || '', msg: streamMsg }; + // Display tool call immediately (args are now populated by SDK accumulation fix) + if (this.config.display?.showToolCalls && !suppressDelivery) { + try { + const text = this.formatToolCallDisplay(streamMsg); + await adapter.sendMessage({ chatId: msg.chatId, text, threadId: msg.threadId }); + } catch (err) { + console.warn('[Bot] Failed to send tool call display:', err instanceof Error ? err.message : err); + } + } } else if (streamMsg.type === 'tool_result') { log.info(`<<< TOOL RESULT: error=${streamMsg.isError}, len=${(streamMsg as any).content?.length || 0}`); sawNonAssistantSinceLastUuid = true; diff --git a/src/core/types.ts b/src/core/types.ts index 6509973..aedf6d7 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -94,6 +94,10 @@ export interface OutboundMessage { text: string; replyToMessageId?: string; threadId?: string; // Slack thread_ts + /** When set, tells the adapter which parse mode to use (e.g., 'MarkdownV2', + * 'HTML') and to skip its default markdown conversion. Adapters that don't + * support the specified mode ignore this and fall back to default. */ + parseMode?: string; } /**