perf: reuse SDK session subprocess across messages (#289)

2026-02-12 11:24:58 -08:00
parent 01ed38a15d
commit b79d705a99
2 changed files with 117 additions and 48 deletions
--- a/src/core/bot.ts
+++ b/src/core/bot.ts
@@ -130,6 +130,18 @@ export class LettaBot implements AgentSession {

  // AskUserQuestion support: resolves when the next user message arrives
  private pendingQuestionResolver: ((text: string) => void) | null = null;
+
+  // Persistent session: reuse a single CLI subprocess across messages
+  private persistentSession: Session | null = null;
+  private currentCanUseTool: CanUseToolCallback | undefined;
+  // Stable callback wrapper so the Session options never change, but we can
+  // swap out the per-message handler before each send().
+  private readonly sessionCanUseTool: CanUseToolCallback = async (toolName, toolInput) => {
+    if (this.currentCanUseTool) {
+      return this.currentCanUseTool(toolName, toolInput);
+    }
+    return { behavior: 'allow' as const };
+  };
  
  constructor(config: BotConfig) {
    this.config = config;
@@ -292,51 +304,79 @@ export class LettaBot implements AgentSession {
  }

  /**
-   * Create or resume a session with automatic fallback.
-   * 
-   * Priority: conversationId → agentId (default conv) → createAgent
-   * If resume fails (conversation missing), falls back to createSession.
+   * Return the persistent session, creating and initializing it if needed.
+   * The subprocess stays alive across messages -- only recreated on failure.
   */
-  private async getSession(canUseTool?: CanUseToolCallback): Promise<Session> {
-    const opts = this.baseSessionOptions(canUseTool);
+  private async ensureSession(): Promise<Session> {
+    if (this.persistentSession) {
+      return this.persistentSession;
+    }
+
+    const opts = this.baseSessionOptions(this.sessionCanUseTool);
+    let session: Session;

    if (this.store.conversationId) {
      process.env.LETTA_AGENT_ID = this.store.agentId || undefined;
-      return resumeSession(this.store.conversationId, opts);
-    }
-    if (this.store.agentId) {
+      session = resumeSession(this.store.conversationId, opts);
+    } else if (this.store.agentId) {
      process.env.LETTA_AGENT_ID = this.store.agentId;
-      // Create a new conversation instead of resuming the default.
-      // This handles the case where the default conversation was deleted
-      // or never created (e.g., after migrations).
-      return createSession(this.store.agentId, opts);
+      session = createSession(this.store.agentId, opts);
+    } else {
+      // Create new agent -- persist immediately so we don't orphan it on later failures
+      console.log('[Bot] Creating new agent');
+      const newAgentId = await createAgent({
+        systemPrompt: SYSTEM_PROMPT,
+        memory: loadMemoryBlocks(this.config.agentName),
+      });
+      const currentBaseUrl = process.env.LETTA_BASE_URL || 'https://api.letta.com';
+      this.store.setAgent(newAgentId, currentBaseUrl);
+      console.log('[Bot] Saved new agent ID:', newAgentId);
+
+      if (this.config.agentName) {
+        updateAgentName(newAgentId, this.config.agentName).catch(() => {});
+      }
+      installSkillsToAgent(newAgentId, this.config.skills);
+
+      session = createSession(newAgentId, opts);
    }

-    // Create new agent -- persist immediately so we don't orphan it on later failures
-    console.log('[Bot] Creating new agent');
-    const newAgentId = await createAgent({
-      systemPrompt: SYSTEM_PROMPT,
-      memory: loadMemoryBlocks(this.config.agentName),
-    });
-    const currentBaseUrl = process.env.LETTA_BASE_URL || 'https://api.letta.com';
-    this.store.setAgent(newAgentId, currentBaseUrl);
-    console.log('[Bot] Saved new agent ID:', newAgentId);
+    // Initialize eagerly so the subprocess is ready before the first send()
+    console.log('[Bot] Initializing session subprocess...');
+    await session.initialize();
+    console.log('[Bot] Session subprocess ready');
+    this.persistentSession = session;
+    return session;
+  }

-    // First-run setup: name and skills
-    if (this.config.agentName) {
-      updateAgentName(newAgentId, this.config.agentName).catch(() => {});
+  /**
+   * Destroy the persistent session so the next ensureSession() spawns a fresh one.
+   */
+  private invalidateSession(): void {
+    if (this.persistentSession) {
+      console.log('[Bot] Invalidating persistent session');
+      this.persistentSession.close();
+      this.persistentSession = null;
    }
-    installSkillsToAgent(newAgentId, this.config.skills);
+  }

-    return createSession(newAgentId, opts);
+  /**
+   * Pre-warm the session subprocess at startup. Call after config/agent is loaded.
+   */
+  async warmSession(): Promise<void> {
+    if (!this.store.agentId && !this.store.conversationId) return;
+    try {
+      await this.ensureSession();
+    } catch (err) {
+      console.warn('[Bot] Session pre-warm failed:', err instanceof Error ? err.message : err);
+    }
  }

  /**
   * Persist conversation ID after a successful session result.
-   * Agent ID and first-run setup are handled eagerly in getSession().
+   * Agent ID and first-run setup are handled eagerly in ensureSession().
   */
  private persistSessionState(session: Session): void {
-    // Agent ID already persisted in getSession() on creation.
+    // Agent ID already persisted in ensureSession() on creation.
    // Here we only update if the server returned a different one (shouldn't happen).
    if (session.agentId && session.agentId !== this.store.agentId) {
      const currentBaseUrl = process.env.LETTA_BASE_URL || 'https://api.letta.com';
@@ -352,13 +392,11 @@ export class LettaBot implements AgentSession {
   * Send a message and return a deduplicated stream.
   * 
   * Handles:
-   * - Session creation with fallback chain
+   * - Persistent session reuse (subprocess stays alive across messages)
   * - CONFLICT recovery from orphaned approvals (retry once)
   * - Conversation-not-found fallback (create new conversation)
   * - Tool call deduplication
   * - Session persistence after result
-   * 
-   * Caller is responsible for consuming the stream and closing the session.
   */
  private async runSession(
    message: SendMessage,
@@ -366,7 +404,10 @@ export class LettaBot implements AgentSession {
  ): Promise<{ session: Session; stream: () => AsyncGenerator<StreamMsg> }> {
    const { retried = false, canUseTool } = options;

-    let session = await this.getSession(canUseTool);
+    // Update the per-message callback before sending
+    this.currentCanUseTool = canUseTool;
+
+    let session = await this.ensureSession();

    // Send message with fallback chain
    try {
@@ -375,7 +416,7 @@ export class LettaBot implements AgentSession {
      // 409 CONFLICT from orphaned approval
      if (!retried && isApprovalConflictError(error) && this.store.agentId && this.store.conversationId) {
        console.log('[Bot] CONFLICT detected - attempting orphaned approval recovery...');
-        session.close();
+        this.invalidateSession();
        const result = await recoverOrphanedConversationApproval(
          this.store.agentId,
          this.store.conversationId
@@ -393,10 +434,12 @@ export class LettaBot implements AgentSession {
      // on auth, network, or protocol errors (which would just fail again).
      if (this.store.agentId && isConversationMissingError(error)) {
        console.warn('[Bot] Conversation not found, creating a new conversation...');
-        session.close();
-        session = createSession(this.store.agentId, this.baseSessionOptions(canUseTool));
+        this.invalidateSession();
+        session = await this.ensureSession();
        await session.send(message);
      } else {
+        // Unknown error -- invalidate so we get a fresh subprocess next time
+        this.invalidateSession();
        throw error;
      }
    }
@@ -513,6 +556,7 @@ export class LettaBot implements AgentSession {
        const oldConversationId = this.store.conversationId;
        this.store.conversationId = null;
        this.store.resetRecoveryAttempts();
+        this.invalidateSession(); // Subprocess has old conversation baked in
        console.log(`[Command] /reset - conversation cleared (was: ${oldConversationId})`);
        return 'Conversation reset. Send a message to start a new conversation. (Agent memory is preserved.)';
      }
@@ -672,6 +716,11 @@ export class LettaBot implements AgentSession {
  
  private async processMessage(msg: InboundMessage, adapter: ChannelAdapter, retried = false): Promise<void> {
    // Track timing and last target
+    const debugTiming = !!process.env.LETTABOT_DEBUG_TIMING;
+    const t0 = debugTiming ? performance.now() : 0;
+    const lap = (label: string) => {
+      if (debugTiming) console.log(`[Timing] ${label}: ${(performance.now() - t0).toFixed(0)}ms`);
+    };
    this.lastUserMessageTime = new Date();

    // Skip heartbeat target update for listening mode (don't redirect heartbeats)
@@ -684,13 +733,20 @@ export class LettaBot implements AgentSession {
      };
    }

-    // Skip typing indicator for listening mode
+    // Fire-and-forget typing indicator so session creation starts immediately
    if (!msg.isListeningMode) {
-      await adapter.sendTypingIndicator(msg.chatId);
+      adapter.sendTypingIndicator(msg.chatId).catch(() => {});
    }
+    lap('typing indicator');

    // Pre-send approval recovery
-    const recovery = await this.attemptRecovery();
+    // Only run proactive recovery when previous failures were detected.
+    // Clean-path messages skip straight to session creation (the 409 retry
+    // in runSession() still catches stuck states reactively).
+    const recovery = this.store.recoveryAttempts > 0
+      ? await this.attemptRecovery()
+      : { recovered: false, shouldReset: false };
+    lap('recovery check');
    if (recovery.shouldReset) {
      if (!msg.isListeningMode) {
        await adapter.sendMessage({
@@ -714,6 +770,7 @@ export class LettaBot implements AgentSession {
      ? formatGroupBatchEnvelope(msg.batchedMessages, {}, msg.isListeningMode)
      : formatMessageEnvelope(msg, {}, sessionContext);
    const messageToSend = await buildMultimodalMessage(formattedText, msg);
+    lap('format message');

    // Build AskUserQuestion-aware canUseTool callback with channel context.
    // In bypassPermissions mode, this callback is only invoked for interactive
@@ -754,11 +811,12 @@ export class LettaBot implements AgentSession {
    let session: Session | null = null;
    try {
      const run = await this.runSession(messageToSend, { retried, canUseTool });
+      lap('session send');
      session = run.session;

      // Stream response with delivery
      let response = '';
-      let lastUpdate = Date.now();
+      let lastUpdate = 0; // Start at 0 so the first streaming edit fires immediately
      let messageId: string | null = null;
      let lastMsgType: string | null = null;
      let lastAssistantUuid: string | null = null;
@@ -807,7 +865,9 @@ export class LettaBot implements AgentSession {
      }, 4000);
      
      try {
+        let firstChunkLogged = false;
        for await (const streamMsg of run.stream()) {
+          if (!firstChunkLogged) { lap('first stream chunk'); firstChunkLogged = true; }
          receivedAnyData = true;
          msgTypeCounts[streamMsg.type] = (msgTypeCounts[streamMsg.type] || 0) + 1;
          
@@ -930,7 +990,7 @@ export class LettaBot implements AgentSession {
              if (!retried && this.store.agentId && this.store.conversationId) {
                const reason = shouldRetryForErrorResult ? 'error result' : 'empty result';
                console.log(`[Bot] ${reason} - attempting orphaned approval recovery...`);
-                session.close();
+                this.invalidateSession();
                session = null;
                clearInterval(typingInterval);
                const convResult = await recoverOrphanedConversationApproval(
@@ -965,6 +1025,7 @@ export class LettaBot implements AgentSession {
        clearInterval(typingInterval);
        adapter.stopTypingIndicator?.(msg.chatId)?.catch(() => {});
      }
+      lap('stream complete');
      
      // Handle no-reply marker
      if (response.trim() === '<no-reply/>') {
@@ -992,6 +1053,7 @@ export class LettaBot implements AgentSession {
        return;
      }

+      lap('directives done');
      // Send final response
      if (response.trim()) {
        const prefixedFinal = this.prefixResponse(response);
@@ -1015,6 +1077,7 @@ export class LettaBot implements AgentSession {
        }
      }
      
+      lap('message delivered');
      // Handle no response
      if (!sentAnyMessage) {
        if (!receivedAnyData) {
@@ -1051,7 +1114,7 @@ export class LettaBot implements AgentSession {
        console.error('[Bot] Failed to send error message to channel:', sendError);
      }
    } finally {
-      session?.close();
+      // Session stays alive for reuse -- only invalidated on errors
    }
  }

@@ -1071,7 +1134,7 @@ export class LettaBot implements AgentSession {
    this.processing = true;
    
    try {
-      const { session, stream } = await this.runSession(text);
+      const { stream } = await this.runSession(text);
      
      try {
        let response = '';
@@ -1092,8 +1155,10 @@ export class LettaBot implements AgentSession {
          }
        }
        return response;
-      } finally {
-        session.close();
+      } catch (error) {
+        // Invalidate on stream errors so next call gets a fresh subprocess
+        this.invalidateSession();
+        throw error;
      }
    } finally {
      this.processing = false;
@@ -1117,12 +1182,13 @@ export class LettaBot implements AgentSession {
    this.processing = true;

    try {
-      const { session, stream } = await this.runSession(text);
+      const { stream } = await this.runSession(text);

      try {
        yield* stream();
-      } finally {
-        session.close();
+      } catch (error) {
+        this.invalidateSession();
+        throw error;
      }
    } finally {
      this.processing = false;
--- a/src/main.ts
+++ b/src/main.ts
@@ -585,6 +585,9 @@ async function main() {
      services.groupBatchers.push(batcher);
    }

+    // Pre-warm the SDK session subprocess so the first message doesn't pay startup cost
+    bot.warmSession().catch(() => {});
+
    // Per-agent cron
    if (agentConfig.features?.cron ?? globalConfig.cronEnabled) {
      const cronService = new CronService(bot);