fix(tui): keep conversation model overrides sticky (#1238)

Co-authored-by: Letta Code <noreply@letta.com>
2026-03-05 19:38:36 -05:00
parent a185d567ba
commit 5a6d804069
7 changed files with 178 additions and 72 deletions
--- a/src/agent/check-approval.ts
+++ b/src/agent/check-approval.ts
@@ -474,15 +474,14 @@ export async function getResumeData(
      const retrievedMessages = await client.messages.retrieve(lastInContextId);

      // Fetch message history for backfill through the default conversation route.
-      // For default conversation, pass agent_id as query parameter.
+      // Default conversation is represented by the agent id at the conversations endpoint.
      // Wrapped in try/catch so backfill failures don't crash the CLI (e.g., older servers
      // may not support this pattern)
      if (includeMessageHistory && isBackfillEnabled()) {
        try {
          const messagesPage = await client.conversations.messages.list(
-            "default",
+            agent.id,
            {
-              agent_id: agent.id,
              limit: BACKFILL_PAGE_LIMIT,
              order: "desc",
            },
@@ -491,7 +490,7 @@ export async function getResumeData(

          if (process.env.DEBUG) {
            console.log(
-              `[DEBUG] conversations.messages.list(default, agent_id=${agent.id}) returned ${messages.length} messages`,
+              `[DEBUG] conversations.messages.list(${agent.id}) returned ${messages.length} messages`,
            );
          }
        } catch (backfillError) {
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -1477,16 +1477,25 @@ export default function App({
  const [currentToolsetPreference, setCurrentToolsetPreference] =
    useState<ToolsetPreference>("auto");
  const [llmConfig, setLlmConfig] = useState<LlmConfig | null>(null);
-  const [hasConversationModelOverride, setHasConversationModelOverride] =
-    useState(false);
+  // Keep state + ref synchronized so async callbacks (e.g. syncAgentState) never
+  // read a stale value and accidentally clobber conversation-scoped overrides.
+  const [
+    hasConversationModelOverride,
+    setHasConversationModelOverride,
+    hasConversationModelOverrideRef,
+  ] = useSyncedState(false);
  const llmConfigRef = useRef(llmConfig);
  useEffect(() => {
    llmConfigRef.current = llmConfig;
  }, [llmConfig]);
-  const hasConversationModelOverrideRef = useRef(hasConversationModelOverride);
-  useEffect(() => {
-    hasConversationModelOverrideRef.current = hasConversationModelOverride;
-  }, [hasConversationModelOverride]);
+
+  // Cache the conversation's model_settings when a conversation-scoped override is active.
+  // On resume, llm_config may omit reasoning_effort even when the conversation model_settings
+  // includes it; this snapshot prevents the footer reasoning tag from missing.
+  const [
+    conversationOverrideModelSettings,
+    setConversationOverrideModelSettings,
+  ] = useState<AgentState["model_settings"] | null>(null);
  const agentStateRef = useRef(agentState);
  useEffect(() => {
    agentStateRef.current = agentState;
@@ -1509,12 +1518,22 @@ export default function App({
      ? `${llmConfig.model_endpoint_type}/${llmConfig.model}`
      : (llmConfig?.model ?? null)) ||
    null;
+
+  // Derive reasoning effort from model_settings (canonical) with llm_config as legacy fallback.
+  // When a conversation override is active, the server may still return an agent llm_config
+  // with reasoning_effort="none"; prefer the conversation model_settings snapshot.
+  const effectiveModelSettings = hasConversationModelOverride
+    ? conversationOverrideModelSettings
+    : agentState?.model_settings;
+  const derivedReasoningEffort: ModelReasoningEffort | null =
+    deriveReasoningEffort(effectiveModelSettings, llmConfig);
+
  // Use tier-aware resolution so the display matches the agent's reasoning effort
  // (e.g. "GPT-5.3-Codex" not just "GPT-5" for the first match).
  const currentModelDisplay = useMemo(() => {
    if (!currentModelLabel) return null;
    const info = getModelInfoForLlmConfig(currentModelLabel, {
-      reasoning_effort: llmConfig?.reasoning_effort ?? null,
+      reasoning_effort: derivedReasoningEffort ?? null,
      enable_reasoner:
        (llmConfig as { enable_reasoner?: boolean | null })?.enable_reasoner ??
        null,
@@ -1527,16 +1546,10 @@ export default function App({
      currentModelLabel.split("/").pop() ??
      null
    );
-  }, [currentModelLabel, llmConfig]);
+  }, [currentModelLabel, derivedReasoningEffort, llmConfig]);
  const currentModelProvider = llmConfig?.provider_name ?? null;
-  // Derive reasoning effort from model_settings (canonical) with llm_config as legacy fallback.
-  // Some providers may omit explicit effort for default tiers (e.g., Sonnet 4.6 high),
-  // so fall back to the selected model preset when needed.
-  const effectiveModelSettings = hasConversationModelOverride
-    ? undefined
-    : agentState?.model_settings;
  const currentReasoningEffort: ModelReasoningEffort | null =
-    deriveReasoningEffort(effectiveModelSettings, llmConfig) ??
+    derivedReasoningEffort ??
    inferReasoningEffortFromModelPreset(currentModelId, currentModelLabel);

  // Billing tier for conditional UI and error context (fetched once on mount)
@@ -3266,6 +3279,7 @@ export default function App({
  }, [loadingState, agentId]);

  // Keep effective model state in sync with the active conversation override.
+  // biome-ignore lint/correctness/useExhaustiveDependencies: ref.current is intentionally read dynamically
  useEffect(() => {
    if (
      loadingState !== "ready" ||
@@ -3283,6 +3297,7 @@ export default function App({
        agentState.model ??
        buildModelHandleFromLlmConfig(agentState.llm_config);
      setHasConversationModelOverride(false);
+      setConversationOverrideModelSettings(null);
      setLlmConfig(agentState.llm_config);
      setCurrentModelHandle(agentModelHandle ?? null);

@@ -3351,6 +3366,7 @@ export default function App({
        );

        setHasConversationModelOverride(true);
+        setConversationOverrideModelSettings(conversationModelSettings ?? null);
        setCurrentModelHandle(effectiveModelHandle);
        const modelInfo = getModelInfoForLlmConfig(effectiveModelHandle, {
          reasoning_effort: reasoningEffort,
@@ -3391,8 +3407,15 @@ export default function App({
    return () => {
      cancelled = true;
    };
-  }, [agentId, agentState, conversationId, loadingState]);
+  }, [
+    agentId,
+    agentState,
+    conversationId,
+    loadingState,
+    setHasConversationModelOverride,
+  ]);

+  // biome-ignore lint/correctness/useExhaustiveDependencies: refs are stable objects, .current is read dynamically
  const maybeCarryOverActiveConversationModel = useCallback(
    async (targetConversationId: string) => {
      if (!hasConversationModelOverrideRef.current) {
@@ -3663,6 +3686,7 @@ export default function App({
  // removed. Git-backed memory uses standard git merge conflict resolution via the agent.

  // Core streaming function - iterative loop that processes conversation turns
+  // biome-ignore lint/correctness/useExhaustiveDependencies: refs read .current dynamically, complex callback with intentional deps
  const processConversation = useCallback(
    async (
      initialInput: Array<MessageCreate | ApprovalCreate>,
@@ -5841,12 +5865,14 @@ export default function App({
      // causing CONFLICT on the next user message.
      getClient()
        .then((client) => {
-          if (conversationIdRef.current === "default") {
-            return client.conversations.cancel("default", {
-              agent_id: agentIdRef.current,
-            });
+          const cancelConversationId =
+            conversationIdRef.current === "default"
+              ? agentIdRef.current
+              : conversationIdRef.current;
+          if (!cancelConversationId || cancelConversationId === "loading") {
+            return;
          }
-          return client.conversations.cancel(conversationIdRef.current);
+          return client.conversations.cancel(cancelConversationId);
        })
        .catch(() => {
          // Silently ignore - cancellation already happened client-side
@@ -5961,12 +5987,14 @@ export default function App({
      // Don't wait for it or show errors since user already got feedback
      getClient()
        .then((client) => {
-          if (conversationIdRef.current === "default") {
-            return client.conversations.cancel("default", {
-              agent_id: agentIdRef.current,
-            });
+          const cancelConversationId =
+            conversationIdRef.current === "default"
+              ? agentIdRef.current
+              : conversationIdRef.current;
+          if (!cancelConversationId || cancelConversationId === "loading") {
+            return;
          }
-          return client.conversations.cancel(conversationIdRef.current);
+          return client.conversations.cancel(cancelConversationId);
        })
        .catch(() => {
          // Silently ignore - cancellation already happened client-side
@@ -5986,13 +6014,14 @@ export default function App({
      setInterruptRequested(true);
      try {
        const client = await getClient();
-        if (conversationIdRef.current === "default") {
-          await client.conversations.cancel("default", {
-            agent_id: agentIdRef.current,
-          });
-        } else {
-          await client.conversations.cancel(conversationIdRef.current);
+        const cancelConversationId =
+          conversationIdRef.current === "default"
+            ? agentIdRef.current
+            : conversationIdRef.current;
+        if (!cancelConversationId || cancelConversationId === "loading") {
+          return;
        }
+        await client.conversations.cancel(cancelConversationId);

        if (abortControllerRef.current) {
          abortControllerRef.current.abort();
@@ -11163,15 +11192,24 @@ ${SYSTEM_REMINDER_CLOSE}
            phase: "running",
          });

-          // Persist model change to the backend.
-          // For real conversations, update the conversation-scoped override.
-          // For "default" (virtual sentinel with no real conversation object),
-          // update the agent itself so the model sticks across messages.
+          // "default" is a virtual sentinel for the agent's primary history, not a
+          // real conversation object. When active, model changes must update the agent
+          // itself (otherwise the next agent sync will snap back).
+          const isDefaultConversation = conversationIdRef.current === "default";
          let conversationModelSettings:
            | AgentState["model_settings"]
            | null
            | undefined;
-          if (conversationIdRef.current !== "default") {
+          let updatedAgent: AgentState | null = null;
+          if (isDefaultConversation) {
+            const { updateAgentLLMConfig } = await import("../agent/modify");
+            updatedAgent = await updateAgentLLMConfig(
+              agentIdRef.current,
+              modelHandle,
+              model.updateArgs,
+            );
+            conversationModelSettings = updatedAgent?.model_settings;
+          } else {
            const { updateConversationLLMConfig } = await import(
              "../agent/modify"
            );
@@ -11185,14 +11223,6 @@ ${SYSTEM_REMINDER_CLOSE}
                model_settings?: AgentState["model_settings"] | null;
              }
            ).model_settings;
-          } else {
-            const { updateAgentLLMConfig } = await import("../agent/modify");
-            const updatedAgent = await updateAgentLLMConfig(
-              agentId,
-              modelHandle,
-              model.updateArgs,
-            );
-            conversationModelSettings = updatedAgent.model_settings;
          }

          // The API may not echo reasoning_effort back, so populate it from
@@ -11206,9 +11236,23 @@ ${SYSTEM_REMINDER_CLOSE}
                  llmConfigRef.current,
                ) ?? null);

-          setHasConversationModelOverride(true);
+          if (isDefaultConversation) {
+            setHasConversationModelOverride(false);
+            setConversationOverrideModelSettings(null);
+            if (updatedAgent) {
+              setAgentState(updatedAgent);
+            }
+          } else {
+            setHasConversationModelOverride(true);
+            setConversationOverrideModelSettings(
+              conversationModelSettings ?? null,
+            );
+          }
+
          setLlmConfig({
-            ...(llmConfigRef.current ?? ({} as LlmConfig)),
+            ...(updatedAgent?.llm_config ??
+              llmConfigRef.current ??
+              ({} as LlmConfig)),
            ...mapHandleToLlmConfigPatch(modelHandle),
            ...(typeof resolvedReasoningEffort === "string"
              ? {
@@ -11306,6 +11350,7 @@ ${SYSTEM_REMINDER_CLOSE}
      maybeRecordToolsetChangeReminder,
      resetPendingReasoningCycle,
      withCommandLock,
+      setHasConversationModelOverride,
    ],
  );

@@ -11956,13 +12001,25 @@ ${SYSTEM_REMINDER_CLOSE}
        const cmd = commandRunner.start("/reasoning", "Setting reasoning...");

        try {
-          // "default" is a virtual sentinel, not a real conversation object —
-          // skip the API call and fall through with undefined model_settings.
+          // "default" is a virtual sentinel for the agent's primary history. When
+          // active, reasoning tier changes must update the agent itself so the next
+          // agent sync doesn't snap back.
+          const isDefaultConversation = conversationIdRef.current === "default";
          let conversationModelSettings:
            | AgentState["model_settings"]
            | null
            | undefined;
-          if (conversationIdRef.current !== "default") {
+          let updatedAgent: AgentState | null = null;
+          if (isDefaultConversation) {
+            const { updateAgentLLMConfig } = await import("../agent/modify");
+            updatedAgent = await updateAgentLLMConfig(
+              agentIdRef.current,
+              desired.modelHandle,
+              {
+                reasoning_effort: desired.effort,
+              },
+            );
+          } else {
            const { updateConversationLLMConfig } = await import(
              "../agent/modify"
            );
@@ -11981,14 +12038,30 @@ ${SYSTEM_REMINDER_CLOSE}
          }
          const resolvedReasoningEffort =
            deriveReasoningEffort(
-              conversationModelSettings,
+              isDefaultConversation
+                ? (updatedAgent?.model_settings ?? null)
+                : conversationModelSettings,
              llmConfigRef.current,
            ) ?? desired.effort;

-          setHasConversationModelOverride(true);
+          if (isDefaultConversation) {
+            setHasConversationModelOverride(false);
+            setConversationOverrideModelSettings(null);
+            if (updatedAgent) {
+              setAgentState(updatedAgent);
+            }
+          } else {
+            setHasConversationModelOverride(true);
+            setConversationOverrideModelSettings(
+              conversationModelSettings ?? null,
+            );
+          }
+
          // The API may not echo reasoning_effort back; preserve explicit desired effort.
          setLlmConfig({
-            ...(llmConfigRef.current ?? ({} as LlmConfig)),
+            ...(updatedAgent?.llm_config ??
+              llmConfigRef.current ??
+              ({} as LlmConfig)),
            ...mapHandleToLlmConfigPatch(desired.modelHandle),
            reasoning_effort: resolvedReasoningEffort as ModelReasoningEffort,
          });
@@ -12045,8 +12118,15 @@ ${SYSTEM_REMINDER_CLOSE}
    } finally {
      reasoningCycleInFlightRef.current = false;
    }
-  }, [agentId, commandRunner, isAgentBusy, withCommandLock]);
+  }, [
+    agentId,
+    commandRunner,
+    isAgentBusy,
+    withCommandLock,
+    setHasConversationModelOverride,
+  ]);

+  // biome-ignore lint/correctness/useExhaustiveDependencies: refs are stable objects, .current is read dynamically
  const handleCycleReasoningEffort = useCallback(() => {
    void (async () => {
      if (!agentId) return;
--- a/src/cli/commands/runner.ts
+++ b/src/cli/commands/runner.ts
@@ -76,9 +76,15 @@ export function createCommandRunner({
    const handle: CommandHandle = {
      id,
      input,
-      update: null!,
-      finish: null!,
-      fail: null!,
+      // Placeholders are overwritten below before the handle is returned.
+      update: (_update: CommandUpdate) => {},
+      finish: (
+        _output: string,
+        _success?: boolean,
+        _dimOutput?: boolean,
+        _preformatted?: boolean,
+      ) => {},
+      fail: (_output: string) => {},
    };

    const update = (updateData: CommandUpdate) => {
--- a/src/cli/components/ConversationSelector.tsx
+++ b/src/cli/components/ConversationSelector.tsx
@@ -243,9 +243,9 @@ export function ConversationSelector({
        if (!afterCursor) {
          try {
            const defaultMessages = await client.conversations.messages.list(
-              "default",
+              // Default conversation is represented by the agent id at the conversations endpoint.
+              agentId,
              {
-                agent_id: agentId,
                limit: 20,
                order: "desc",
              },
--- a/src/cli/subcommands/messages.ts
+++ b/src/cli/subcommands/messages.ts
@@ -159,8 +159,8 @@ export async function runMessagesSubcommand(argv: string[]): Promise<number> {
        return 1;
      }

-      const response = await client.conversations.messages.list("default", {
-        agent_id: agentId,
+      // Default conversation is represented by the agent id at the conversations endpoint.
+      const response = await client.conversations.messages.list(agentId, {
        limit: parseLimit(parsed.values.limit, 20),
        after: parsed.values.after,
        before: parsed.values.before,
--- a/src/tests/agent/model-preset-refresh.wiring.test.ts
+++ b/src/tests/agent/model-preset-refresh.wiring.test.ts
@@ -71,7 +71,7 @@ describe("model preset refresh wiring", () => {
    expect(updateSegment).not.toContain("client.agents.update(");
  });

-  test("/model handler updates conversation model and falls back to agent for default", () => {
+  test("/model handler updates conversation model (default updates agent)", () => {
    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
    const source = readFileSync(path, "utf-8");

@@ -85,11 +85,9 @@ describe("model preset refresh wiring", () => {
    const segment = source.slice(start, end);

    expect(segment).toContain("updateConversationLLMConfig(");
-    expect(segment).toContain("conversationIdRef.current");
-    // For the "default" virtual conversation (no real conversation object),
-    // the handler falls back to updating the agent directly.
    expect(segment).toContain("updateAgentLLMConfig(");
-    expect(segment).toContain('conversationIdRef.current !== "default"');
+    expect(segment).toContain("conversationIdRef.current");
+    expect(segment).toContain('conversationIdRef.current === "default"');
  });

  test("App defines helper to carry over active conversation model", () => {
@@ -116,6 +114,28 @@ describe("model preset refresh wiring", () => {
    );
  });

+  test("conversation model override flag is synced for async callbacks", () => {
+    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
+    const source = readFileSync(path, "utf-8");
+
+    // The override flag must be safe to read inside async callbacks (e.g. the
+    // first streamed chunk sync) without waiting for a render/effect.
+    expect(source).toMatch(
+      /\[\s*hasConversationModelOverride,\s*setHasConversationModelOverride,\s*hasConversationModelOverrideRef,\s*\]\s*=\s*useSyncedState\(false\)/,
+    );
+  });
+
+  test("reasoning tier prefers conversation override model_settings", () => {
+    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
+    const source = readFileSync(path, "utf-8");
+
+    // When a conversation override is active, prefer the conversation model_settings
+    // snapshot when deriving reasoning effort (not the base agent llm_config).
+    expect(source).toMatch(
+      /const effectiveModelSettings = hasConversationModelOverride\s*\?\s*conversationOverrideModelSettings\s*:\s*agentState\?\.model_settings;/,
+    );
+  });
+
  test("new conversation flows reapply active conversation model before switching", () => {
    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
    const source = readFileSync(path, "utf-8");
--- a/src/tests/cli/reasoning-cycle-wiring.test.ts
+++ b/src/tests/cli/reasoning-cycle-wiring.test.ts
@@ -46,7 +46,7 @@ describe("reasoning tier cycle wiring", () => {
    expect(callbackBlocks.length).toBeGreaterThanOrEqual(2);
  });

-  test("flush uses conversation-scoped reasoning updates", () => {
+  test("flush uses conversation-scoped reasoning updates (default updates agent)", () => {
    const appPath = fileURLToPath(
      new URL("../../cli/App.tsx", import.meta.url),
    );
@@ -64,8 +64,9 @@ describe("reasoning tier cycle wiring", () => {

    const segment = source.slice(start, end);
    expect(segment).toContain("updateConversationLLMConfig(");
+    expect(segment).toContain("updateAgentLLMConfig(");
    expect(segment).toContain("conversationIdRef.current");
-    expect(segment).not.toContain("updateAgentLLMConfig(");
+    expect(segment).toContain('conversationIdRef.current === "default"');
  });

  test("tab-based reasoning cycling is opt-in only", () => {