From 5a6d8040698502ccc205baf4879d0d23a4e8cbf8 Mon Sep 17 00:00:00 2001
From: paulbettner <paulbettner@playfulstudios.com>
Date: Thu, 5 Mar 2026 19:38:36 -0500
Subject: [PATCH] fix(tui): keep conversation model overrides sticky (#1238)

Co-authored-by: Letta Code <noreply@letta.com>
---
 src/agent/check-approval.ts                   |   7 +-
 src/cli/App.tsx                               | 188 +++++++++++++-----
 src/cli/commands/runner.ts                    |  12 +-
 src/cli/components/ConversationSelector.tsx   |   4 +-
 src/cli/subcommands/messages.ts               |   4 +-
 .../agent/model-preset-refresh.wiring.test.ts |  30 ++-
 src/tests/cli/reasoning-cycle-wiring.test.ts  |   5 +-
 7 files changed, 178 insertions(+), 72 deletions(-)
diff --git a/src/agent/check-approval.ts b/src/agent/check-approval.ts
index 63286cb..c4be991 100644
--- a/src/agent/check-approval.ts
+++ b/src/agent/check-approval.ts
@@ -474,15 +474,14 @@ export async function getResumeData(
       const retrievedMessages = await client.messages.retrieve(lastInContextId);
 
       // Fetch message history for backfill through the default conversation route.
-      // For default conversation, pass agent_id as query parameter.
+      // Default conversation is represented by the agent id at the conversations endpoint.
       // Wrapped in try/catch so backfill failures don't crash the CLI (e.g., older servers
       // may not support this pattern)
       if (includeMessageHistory && isBackfillEnabled()) {
         try {
           const messagesPage = await client.conversations.messages.list(
-            "default",
+            agent.id,
             {
-              agent_id: agent.id,
               limit: BACKFILL_PAGE_LIMIT,
               order: "desc",
             },
@@ -491,7 +490,7 @@ export async function getResumeData(
 
           if (process.env.DEBUG) {
             console.log(
-              `[DEBUG] conversations.messages.list(default, agent_id=${agent.id}) returned ${messages.length} messages`,
+              `[DEBUG] conversations.messages.list(${agent.id}) returned ${messages.length} messages`,
             );
           }
         } catch (backfillError) {
diff --git a/src/cli/App.tsx b/src/cli/App.tsx
index f83ddcc..95fce78 100644
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -1477,16 +1477,25 @@ export default function App({
   const [currentToolsetPreference, setCurrentToolsetPreference] =
     useState<ToolsetPreference>("auto");
   const [llmConfig, setLlmConfig] = useState<LlmConfig | null>(null);
-  const [hasConversationModelOverride, setHasConversationModelOverride] =
-    useState(false);
+  // Keep state + ref synchronized so async callbacks (e.g. syncAgentState) never
+  // read a stale value and accidentally clobber conversation-scoped overrides.
+  const [
+    hasConversationModelOverride,
+    setHasConversationModelOverride,
+    hasConversationModelOverrideRef,
+  ] = useSyncedState(false);
   const llmConfigRef = useRef(llmConfig);
   useEffect(() => {
     llmConfigRef.current = llmConfig;
   }, [llmConfig]);
-  const hasConversationModelOverrideRef = useRef(hasConversationModelOverride);
-  useEffect(() => {
-    hasConversationModelOverrideRef.current = hasConversationModelOverride;
-  }, [hasConversationModelOverride]);
+
+  // Cache the conversation's model_settings when a conversation-scoped override is active.
+  // On resume, llm_config may omit reasoning_effort even when the conversation model_settings
+  // includes it; this snapshot prevents the footer reasoning tag from missing.
+  const [
+    conversationOverrideModelSettings,
+    setConversationOverrideModelSettings,
+  ] = useState<AgentState["model_settings"] | null>(null);
   const agentStateRef = useRef(agentState);
   useEffect(() => {
     agentStateRef.current = agentState;
@@ -1509,12 +1518,22 @@ export default function App({
       ? `${llmConfig.model_endpoint_type}/${llmConfig.model}`
       : (llmConfig?.model ?? null)) ||
     null;
+
+  // Derive reasoning effort from model_settings (canonical) with llm_config as legacy fallback.
+  // When a conversation override is active, the server may still return an agent llm_config
+  // with reasoning_effort="none"; prefer the conversation model_settings snapshot.
+  const effectiveModelSettings = hasConversationModelOverride
+    ? conversationOverrideModelSettings
+    : agentState?.model_settings;
+  const derivedReasoningEffort: ModelReasoningEffort | null =
+    deriveReasoningEffort(effectiveModelSettings, llmConfig);
+
   // Use tier-aware resolution so the display matches the agent's reasoning effort
   // (e.g. "GPT-5.3-Codex" not just "GPT-5" for the first match).
   const currentModelDisplay = useMemo(() => {
     if (!currentModelLabel) return null;
     const info = getModelInfoForLlmConfig(currentModelLabel, {
-      reasoning_effort: llmConfig?.reasoning_effort ?? null,
+      reasoning_effort: derivedReasoningEffort ?? null,
       enable_reasoner:
         (llmConfig as { enable_reasoner?: boolean | null })?.enable_reasoner ??
         null,
@@ -1527,16 +1546,10 @@ export default function App({
       currentModelLabel.split("/").pop() ??
       null
     );
-  }, [currentModelLabel, llmConfig]);
+  }, [currentModelLabel, derivedReasoningEffort, llmConfig]);
   const currentModelProvider = llmConfig?.provider_name ?? null;
-  // Derive reasoning effort from model_settings (canonical) with llm_config as legacy fallback.
-  // Some providers may omit explicit effort for default tiers (e.g., Sonnet 4.6 high),
-  // so fall back to the selected model preset when needed.
-  const effectiveModelSettings = hasConversationModelOverride
-    ? undefined
-    : agentState?.model_settings;
   const currentReasoningEffort: ModelReasoningEffort | null =
-    deriveReasoningEffort(effectiveModelSettings, llmConfig) ??
+    derivedReasoningEffort ??
     inferReasoningEffortFromModelPreset(currentModelId, currentModelLabel);
 
   // Billing tier for conditional UI and error context (fetched once on mount)
@@ -3266,6 +3279,7 @@ export default function App({
   }, [loadingState, agentId]);
 
   // Keep effective model state in sync with the active conversation override.
+  // biome-ignore lint/correctness/useExhaustiveDependencies: ref.current is intentionally read dynamically
   useEffect(() => {
     if (
       loadingState !== "ready" ||
@@ -3283,6 +3297,7 @@ export default function App({
         agentState.model ??
         buildModelHandleFromLlmConfig(agentState.llm_config);
       setHasConversationModelOverride(false);
+      setConversationOverrideModelSettings(null);
       setLlmConfig(agentState.llm_config);
       setCurrentModelHandle(agentModelHandle ?? null);
 
@@ -3351,6 +3366,7 @@ export default function App({
         );
 
         setHasConversationModelOverride(true);
+        setConversationOverrideModelSettings(conversationModelSettings ?? null);
         setCurrentModelHandle(effectiveModelHandle);
         const modelInfo = getModelInfoForLlmConfig(effectiveModelHandle, {
           reasoning_effort: reasoningEffort,
@@ -3391,8 +3407,15 @@ export default function App({
     return () => {
       cancelled = true;
     };
-  }, [agentId, agentState, conversationId, loadingState]);
+  }, [
+    agentId,
+    agentState,
+    conversationId,
+    loadingState,
+    setHasConversationModelOverride,
+  ]);
 
+  // biome-ignore lint/correctness/useExhaustiveDependencies: refs are stable objects, .current is read dynamically
   const maybeCarryOverActiveConversationModel = useCallback(
     async (targetConversationId: string) => {
       if (!hasConversationModelOverrideRef.current) {
@@ -3663,6 +3686,7 @@ export default function App({
   // removed. Git-backed memory uses standard git merge conflict resolution via the agent.
 
   // Core streaming function - iterative loop that processes conversation turns
+  // biome-ignore lint/correctness/useExhaustiveDependencies: refs read .current dynamically, complex callback with intentional deps
   const processConversation = useCallback(
     async (
       initialInput: Array<MessageCreate | ApprovalCreate>,
@@ -5841,12 +5865,14 @@ export default function App({
       // causing CONFLICT on the next user message.
       getClient()
         .then((client) => {
-          if (conversationIdRef.current === "default") {
-            return client.conversations.cancel("default", {
-              agent_id: agentIdRef.current,
-            });
+          const cancelConversationId =
+            conversationIdRef.current === "default"
+              ? agentIdRef.current
+              : conversationIdRef.current;
+          if (!cancelConversationId || cancelConversationId === "loading") {
+            return;
           }
-          return client.conversations.cancel(conversationIdRef.current);
+          return client.conversations.cancel(cancelConversationId);
         })
         .catch(() => {
           // Silently ignore - cancellation already happened client-side
@@ -5961,12 +5987,14 @@ export default function App({
       // Don't wait for it or show errors since user already got feedback
       getClient()
         .then((client) => {
-          if (conversationIdRef.current === "default") {
-            return client.conversations.cancel("default", {
-              agent_id: agentIdRef.current,
-            });
+          const cancelConversationId =
+            conversationIdRef.current === "default"
+              ? agentIdRef.current
+              : conversationIdRef.current;
+          if (!cancelConversationId || cancelConversationId === "loading") {
+            return;
           }
-          return client.conversations.cancel(conversationIdRef.current);
+          return client.conversations.cancel(cancelConversationId);
         })
         .catch(() => {
           // Silently ignore - cancellation already happened client-side
@@ -5986,13 +6014,14 @@ export default function App({
       setInterruptRequested(true);
       try {
         const client = await getClient();
-        if (conversationIdRef.current === "default") {
-          await client.conversations.cancel("default", {
-            agent_id: agentIdRef.current,
-          });
-        } else {
-          await client.conversations.cancel(conversationIdRef.current);
+        const cancelConversationId =
+          conversationIdRef.current === "default"
+            ? agentIdRef.current
+            : conversationIdRef.current;
+        if (!cancelConversationId || cancelConversationId === "loading") {
+          return;
         }
+        await client.conversations.cancel(cancelConversationId);
 
         if (abortControllerRef.current) {
           abortControllerRef.current.abort();
@@ -11163,15 +11192,24 @@ ${SYSTEM_REMINDER_CLOSE}
             phase: "running",
           });
 
-          // Persist model change to the backend.
-          // For real conversations, update the conversation-scoped override.
-          // For "default" (virtual sentinel with no real conversation object),
-          // update the agent itself so the model sticks across messages.
+          // "default" is a virtual sentinel for the agent's primary history, not a
+          // real conversation object. When active, model changes must update the agent
+          // itself (otherwise the next agent sync will snap back).
+          const isDefaultConversation = conversationIdRef.current === "default";
           let conversationModelSettings:
             | AgentState["model_settings"]
             | null
             | undefined;
-          if (conversationIdRef.current !== "default") {
+          let updatedAgent: AgentState | null = null;
+          if (isDefaultConversation) {
+            const { updateAgentLLMConfig } = await import("../agent/modify");
+            updatedAgent = await updateAgentLLMConfig(
+              agentIdRef.current,
+              modelHandle,
+              model.updateArgs,
+            );
+            conversationModelSettings = updatedAgent?.model_settings;
+          } else {
             const { updateConversationLLMConfig } = await import(
               "../agent/modify"
             );
@@ -11185,14 +11223,6 @@ ${SYSTEM_REMINDER_CLOSE}
                 model_settings?: AgentState["model_settings"] | null;
               }
             ).model_settings;
-          } else {
-            const { updateAgentLLMConfig } = await import("../agent/modify");
-            const updatedAgent = await updateAgentLLMConfig(
-              agentId,
-              modelHandle,
-              model.updateArgs,
-            );
-            conversationModelSettings = updatedAgent.model_settings;
           }
 
           // The API may not echo reasoning_effort back, so populate it from
@@ -11206,9 +11236,23 @@ ${SYSTEM_REMINDER_CLOSE}
                   llmConfigRef.current,
                 ) ?? null);
 
-          setHasConversationModelOverride(true);
+          if (isDefaultConversation) {
+            setHasConversationModelOverride(false);
+            setConversationOverrideModelSettings(null);
+            if (updatedAgent) {
+              setAgentState(updatedAgent);
+            }
+          } else {
+            setHasConversationModelOverride(true);
+            setConversationOverrideModelSettings(
+              conversationModelSettings ?? null,
+            );
+          }
+
           setLlmConfig({
-            ...(llmConfigRef.current ?? ({} as LlmConfig)),
+            ...(updatedAgent?.llm_config ??
+              llmConfigRef.current ??
+              ({} as LlmConfig)),
             ...mapHandleToLlmConfigPatch(modelHandle),
             ...(typeof resolvedReasoningEffort === "string"
               ? {
@@ -11306,6 +11350,7 @@ ${SYSTEM_REMINDER_CLOSE}
       maybeRecordToolsetChangeReminder,
       resetPendingReasoningCycle,
       withCommandLock,
+      setHasConversationModelOverride,
     ],
   );
 
@@ -11956,13 +12001,25 @@ ${SYSTEM_REMINDER_CLOSE}
         const cmd = commandRunner.start("/reasoning", "Setting reasoning...");
 
         try {
-          // "default" is a virtual sentinel, not a real conversation object —
-          // skip the API call and fall through with undefined model_settings.
+          // "default" is a virtual sentinel for the agent's primary history. When
+          // active, reasoning tier changes must update the agent itself so the next
+          // agent sync doesn't snap back.
+          const isDefaultConversation = conversationIdRef.current === "default";
           let conversationModelSettings:
             | AgentState["model_settings"]
             | null
             | undefined;
-          if (conversationIdRef.current !== "default") {
+          let updatedAgent: AgentState | null = null;
+          if (isDefaultConversation) {
+            const { updateAgentLLMConfig } = await import("../agent/modify");
+            updatedAgent = await updateAgentLLMConfig(
+              agentIdRef.current,
+              desired.modelHandle,
+              {
+                reasoning_effort: desired.effort,
+              },
+            );
+          } else {
             const { updateConversationLLMConfig } = await import(
               "../agent/modify"
             );
@@ -11981,14 +12038,30 @@ ${SYSTEM_REMINDER_CLOSE}
           }
           const resolvedReasoningEffort =
             deriveReasoningEffort(
-              conversationModelSettings,
+              isDefaultConversation
+                ? (updatedAgent?.model_settings ?? null)
+                : conversationModelSettings,
               llmConfigRef.current,
             ) ?? desired.effort;
 
-          setHasConversationModelOverride(true);
+          if (isDefaultConversation) {
+            setHasConversationModelOverride(false);
+            setConversationOverrideModelSettings(null);
+            if (updatedAgent) {
+              setAgentState(updatedAgent);
+            }
+          } else {
+            setHasConversationModelOverride(true);
+            setConversationOverrideModelSettings(
+              conversationModelSettings ?? null,
+            );
+          }
+
           // The API may not echo reasoning_effort back; preserve explicit desired effort.
           setLlmConfig({
-            ...(llmConfigRef.current ?? ({} as LlmConfig)),
+            ...(updatedAgent?.llm_config ??
+              llmConfigRef.current ??
+              ({} as LlmConfig)),
             ...mapHandleToLlmConfigPatch(desired.modelHandle),
             reasoning_effort: resolvedReasoningEffort as ModelReasoningEffort,
           });
@@ -12045,8 +12118,15 @@ ${SYSTEM_REMINDER_CLOSE}
     } finally {
       reasoningCycleInFlightRef.current = false;
     }
-  }, [agentId, commandRunner, isAgentBusy, withCommandLock]);
+  }, [
+    agentId,
+    commandRunner,
+    isAgentBusy,
+    withCommandLock,
+    setHasConversationModelOverride,
+  ]);
 
+  // biome-ignore lint/correctness/useExhaustiveDependencies: refs are stable objects, .current is read dynamically
   const handleCycleReasoningEffort = useCallback(() => {
     void (async () => {
       if (!agentId) return;
diff --git a/src/cli/commands/runner.ts b/src/cli/commands/runner.ts
index ec49c38..c271443 100644
--- a/src/cli/commands/runner.ts
+++ b/src/cli/commands/runner.ts
@@ -76,9 +76,15 @@ export function createCommandRunner({
     const handle: CommandHandle = {
       id,
       input,
-      update: null!,
-      finish: null!,
-      fail: null!,
+      // Placeholders are overwritten below before the handle is returned.
+      update: (_update: CommandUpdate) => {},
+      finish: (
+        _output: string,
+        _success?: boolean,
+        _dimOutput?: boolean,
+        _preformatted?: boolean,
+      ) => {},
+      fail: (_output: string) => {},
     };
 
     const update = (updateData: CommandUpdate) => {
diff --git a/src/cli/components/ConversationSelector.tsx b/src/cli/components/ConversationSelector.tsx
index 1825064..fa7fea2 100644
--- a/src/cli/components/ConversationSelector.tsx
+++ b/src/cli/components/ConversationSelector.tsx
@@ -243,9 +243,9 @@ export function ConversationSelector({
         if (!afterCursor) {
           try {
             const defaultMessages = await client.conversations.messages.list(
-              "default",
+              // Default conversation is represented by the agent id at the conversations endpoint.
+              agentId,
               {
-                agent_id: agentId,
                 limit: 20,
                 order: "desc",
               },
diff --git a/src/cli/subcommands/messages.ts b/src/cli/subcommands/messages.ts
index c8d268b..03874de 100644
--- a/src/cli/subcommands/messages.ts
+++ b/src/cli/subcommands/messages.ts
@@ -159,8 +159,8 @@ export async function runMessagesSubcommand(argv: string[]): Promise<number> {
         return 1;
       }
 
-      const response = await client.conversations.messages.list("default", {
-        agent_id: agentId,
+      // Default conversation is represented by the agent id at the conversations endpoint.
+      const response = await client.conversations.messages.list(agentId, {
         limit: parseLimit(parsed.values.limit, 20),
         after: parsed.values.after,
         before: parsed.values.before,
diff --git a/src/tests/agent/model-preset-refresh.wiring.test.ts b/src/tests/agent/model-preset-refresh.wiring.test.ts
index 3b5252b..e3cfeb5 100644
--- a/src/tests/agent/model-preset-refresh.wiring.test.ts
+++ b/src/tests/agent/model-preset-refresh.wiring.test.ts
@@ -71,7 +71,7 @@ describe("model preset refresh wiring", () => {
     expect(updateSegment).not.toContain("client.agents.update(");
   });
 
-  test("/model handler updates conversation model and falls back to agent for default", () => {
+  test("/model handler updates conversation model (default updates agent)", () => {
     const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
     const source = readFileSync(path, "utf-8");
 
@@ -85,11 +85,9 @@ describe("model preset refresh wiring", () => {
     const segment = source.slice(start, end);
 
     expect(segment).toContain("updateConversationLLMConfig(");
-    expect(segment).toContain("conversationIdRef.current");
-    // For the "default" virtual conversation (no real conversation object),
-    // the handler falls back to updating the agent directly.
     expect(segment).toContain("updateAgentLLMConfig(");
-    expect(segment).toContain('conversationIdRef.current !== "default"');
+    expect(segment).toContain("conversationIdRef.current");
+    expect(segment).toContain('conversationIdRef.current === "default"');
   });
 
   test("App defines helper to carry over active conversation model", () => {
@@ -116,6 +114,28 @@ describe("model preset refresh wiring", () => {
     );
   });
 
+  test("conversation model override flag is synced for async callbacks", () => {
+    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
+    const source = readFileSync(path, "utf-8");
+
+    // The override flag must be safe to read inside async callbacks (e.g. the
+    // first streamed chunk sync) without waiting for a render/effect.
+    expect(source).toMatch(
+      /\[\s*hasConversationModelOverride,\s*setHasConversationModelOverride,\s*hasConversationModelOverrideRef,\s*\]\s*=\s*useSyncedState\(false\)/,
+    );
+  });
+
+  test("reasoning tier prefers conversation override model_settings", () => {
+    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
+    const source = readFileSync(path, "utf-8");
+
+    // When a conversation override is active, prefer the conversation model_settings
+    // snapshot when deriving reasoning effort (not the base agent llm_config).
+    expect(source).toMatch(
+      /const effectiveModelSettings = hasConversationModelOverride\s*\?\s*conversationOverrideModelSettings\s*:\s*agentState\?\.model_settings;/,
+    );
+  });
+
   test("new conversation flows reapply active conversation model before switching", () => {
     const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
     const source = readFileSync(path, "utf-8");
diff --git a/src/tests/cli/reasoning-cycle-wiring.test.ts b/src/tests/cli/reasoning-cycle-wiring.test.ts
index 0e16541..8a88783 100644
--- a/src/tests/cli/reasoning-cycle-wiring.test.ts
+++ b/src/tests/cli/reasoning-cycle-wiring.test.ts
@@ -46,7 +46,7 @@ describe("reasoning tier cycle wiring", () => {
     expect(callbackBlocks.length).toBeGreaterThanOrEqual(2);
   });
 
-  test("flush uses conversation-scoped reasoning updates", () => {
+  test("flush uses conversation-scoped reasoning updates (default updates agent)", () => {
     const appPath = fileURLToPath(
       new URL("../../cli/App.tsx", import.meta.url),
     );
@@ -64,8 +64,9 @@ describe("reasoning tier cycle wiring", () => {
 
     const segment = source.slice(start, end);
     expect(segment).toContain("updateConversationLLMConfig(");
+    expect(segment).toContain("updateAgentLLMConfig(");
     expect(segment).toContain("conversationIdRef.current");
-    expect(segment).not.toContain("updateAgentLLMConfig(");
+    expect(segment).toContain('conversationIdRef.current === "default"');
   });
 
   test("tab-based reasoning cycling is opt-in only", () => {