fix(tui): keep conversation model overrides sticky (#1238)

Co-authored-by: Letta Code <noreply@letta.com>
2026-03-05 19:38:36 -05:00
parent a185d567ba
commit 5a6d804069
7 changed files with 178 additions and 72 deletions
--- a/src/agent/check-approval.ts
+++ b/src/agent/check-approval.ts
@@ -474,15 +474,14 @@ export async function getResumeData(
      const retrievedMessages = await client.messages.retrieve(lastInContextId);
      // Fetch message history for backfill through the default conversation route.
-      // For default conversation, pass agent_id as query parameter.
+      // Default conversation is represented by the agent id at the conversations endpoint.
      // Wrapped in try/catch so backfill failures don't crash the CLI (e.g., older servers
      // may not support this pattern)
      if (includeMessageHistory && isBackfillEnabled()) {
        try {
          const messagesPage = await client.conversations.messages.list(
-            "default",
+            agent.id,
            {
              agent_id: agent.id,
              limit: BACKFILL_PAGE_LIMIT,
              order: "desc",
            },
@@ -491,7 +490,7 @@ export async function getResumeData(
          if (process.env.DEBUG) {
            console.log(
-              `[DEBUG] conversations.messages.list(default, agent_id=${agent.id}) returned ${messages.length} messages`,
+              `[DEBUG] conversations.messages.list(${agent.id}) returned ${messages.length} messages`,
            );
          }
        } catch (backfillError) {
--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -1477,16 +1477,25 @@ export default function App({
  const [currentToolsetPreference, setCurrentToolsetPreference] =
    useState<ToolsetPreference>("auto");
  const [llmConfig, setLlmConfig] = useState<LlmConfig | null>(null);
-  const [hasConversationModelOverride, setHasConversationModelOverride] =
+  // Keep state + ref synchronized so async callbacks (e.g. syncAgentState) never
-    useState(false);
+  // read a stale value and accidentally clobber conversation-scoped overrides.
  const [
    hasConversationModelOverride,
    setHasConversationModelOverride,
    hasConversationModelOverrideRef,
  ] = useSyncedState(false);
  const llmConfigRef = useRef(llmConfig);
  useEffect(() => {
    llmConfigRef.current = llmConfig;
  }, [llmConfig]);
-  const hasConversationModelOverrideRef = useRef(hasConversationModelOverride);
+
-  useEffect(() => {
+  // Cache the conversation's model_settings when a conversation-scoped override is active.
-    hasConversationModelOverrideRef.current = hasConversationModelOverride;
+  // On resume, llm_config may omit reasoning_effort even when the conversation model_settings
-  }, [hasConversationModelOverride]);
+  // includes it; this snapshot prevents the footer reasoning tag from missing.
  const [
    conversationOverrideModelSettings,
    setConversationOverrideModelSettings,
  ] = useState<AgentState["model_settings"] | null>(null);
  const agentStateRef = useRef(agentState);
  useEffect(() => {
    agentStateRef.current = agentState;
@@ -1509,12 +1518,22 @@ export default function App({
      ? `${llmConfig.model_endpoint_type}/${llmConfig.model}`
      : (llmConfig?.model ?? null)) ||
    null;
  // Derive reasoning effort from model_settings (canonical) with llm_config as legacy fallback.
  // When a conversation override is active, the server may still return an agent llm_config
  // with reasoning_effort="none"; prefer the conversation model_settings snapshot.
  const effectiveModelSettings = hasConversationModelOverride
    ? conversationOverrideModelSettings
    : agentState?.model_settings;
  const derivedReasoningEffort: ModelReasoningEffort | null =
    deriveReasoningEffort(effectiveModelSettings, llmConfig);
  // Use tier-aware resolution so the display matches the agent's reasoning effort
  // (e.g. "GPT-5.3-Codex" not just "GPT-5" for the first match).
  const currentModelDisplay = useMemo(() => {
    if (!currentModelLabel) return null;
    const info = getModelInfoForLlmConfig(currentModelLabel, {
-      reasoning_effort: llmConfig?.reasoning_effort ?? null,
+      reasoning_effort: derivedReasoningEffort ?? null,
      enable_reasoner:
        (llmConfig as { enable_reasoner?: boolean | null })?.enable_reasoner ??
        null,
@@ -1527,16 +1546,10 @@ export default function App({
      currentModelLabel.split("/").pop() ??
      null
    );
-  }, [currentModelLabel, llmConfig]);
+  }, [currentModelLabel, derivedReasoningEffort, llmConfig]);
  const currentModelProvider = llmConfig?.provider_name ?? null;
  // Derive reasoning effort from model_settings (canonical) with llm_config as legacy fallback.
  // Some providers may omit explicit effort for default tiers (e.g., Sonnet 4.6 high),
  // so fall back to the selected model preset when needed.
  const effectiveModelSettings = hasConversationModelOverride
    ? undefined
    : agentState?.model_settings;
  const currentReasoningEffort: ModelReasoningEffort | null =
-    deriveReasoningEffort(effectiveModelSettings, llmConfig) ??
+    derivedReasoningEffort ??
    inferReasoningEffortFromModelPreset(currentModelId, currentModelLabel);
  // Billing tier for conditional UI and error context (fetched once on mount)
@@ -3266,6 +3279,7 @@ export default function App({
  }, [loadingState, agentId]);
  // Keep effective model state in sync with the active conversation override.
  // biome-ignore lint/correctness/useExhaustiveDependencies: ref.current is intentionally read dynamically
  useEffect(() => {
    if (
      loadingState !== "ready" ||
@@ -3283,6 +3297,7 @@ export default function App({
        agentState.model ??
        buildModelHandleFromLlmConfig(agentState.llm_config);
      setHasConversationModelOverride(false);
      setConversationOverrideModelSettings(null);
      setLlmConfig(agentState.llm_config);
      setCurrentModelHandle(agentModelHandle ?? null);
@@ -3351,6 +3366,7 @@ export default function App({
        );
        setHasConversationModelOverride(true);
        setConversationOverrideModelSettings(conversationModelSettings ?? null);
        setCurrentModelHandle(effectiveModelHandle);
        const modelInfo = getModelInfoForLlmConfig(effectiveModelHandle, {
          reasoning_effort: reasoningEffort,
@@ -3391,8 +3407,15 @@ export default function App({
    return () => {
      cancelled = true;
    };
-  }, [agentId, agentState, conversationId, loadingState]);
+  }, [
    agentId,
    agentState,
    conversationId,
    loadingState,
    setHasConversationModelOverride,
  ]);
  // biome-ignore lint/correctness/useExhaustiveDependencies: refs are stable objects, .current is read dynamically
  const maybeCarryOverActiveConversationModel = useCallback(
    async (targetConversationId: string) => {
      if (!hasConversationModelOverrideRef.current) {
@@ -3663,6 +3686,7 @@ export default function App({
  // removed. Git-backed memory uses standard git merge conflict resolution via the agent.
  // Core streaming function - iterative loop that processes conversation turns
  // biome-ignore lint/correctness/useExhaustiveDependencies: refs read .current dynamically, complex callback with intentional deps
  const processConversation = useCallback(
    async (
      initialInput: Array<MessageCreate | ApprovalCreate>,
@@ -5841,12 +5865,14 @@ export default function App({
      // causing CONFLICT on the next user message.
      getClient()
        .then((client) => {
-          if (conversationIdRef.current === "default") {
+          const cancelConversationId =
-            return client.conversations.cancel("default", {
+            conversationIdRef.current === "default"
-              agent_id: agentIdRef.current,
+              ? agentIdRef.current
-            });
+              : conversationIdRef.current;
          if (!cancelConversationId || cancelConversationId === "loading") {
            return;
          }
-          return client.conversations.cancel(conversationIdRef.current);
+          return client.conversations.cancel(cancelConversationId);
        })
        .catch(() => {
          // Silently ignore - cancellation already happened client-side
@@ -5961,12 +5987,14 @@ export default function App({
      // Don't wait for it or show errors since user already got feedback
      getClient()
        .then((client) => {
-          if (conversationIdRef.current === "default") {
+          const cancelConversationId =
-            return client.conversations.cancel("default", {
+            conversationIdRef.current === "default"
-              agent_id: agentIdRef.current,
+              ? agentIdRef.current
-            });
+              : conversationIdRef.current;
          if (!cancelConversationId || cancelConversationId === "loading") {
            return;
          }
-          return client.conversations.cancel(conversationIdRef.current);
+          return client.conversations.cancel(cancelConversationId);
        })
        .catch(() => {
          // Silently ignore - cancellation already happened client-side
@@ -5986,13 +6014,14 @@ export default function App({
      setInterruptRequested(true);
      try {
        const client = await getClient();
-        if (conversationIdRef.current === "default") {
+        const cancelConversationId =
-          await client.conversations.cancel("default", {
+          conversationIdRef.current === "default"
-            agent_id: agentIdRef.current,
+            ? agentIdRef.current
-          });
+            : conversationIdRef.current;
-        } else {
+        if (!cancelConversationId || cancelConversationId === "loading") {
-          await client.conversations.cancel(conversationIdRef.current);
+          return;
        }
        await client.conversations.cancel(cancelConversationId);
        if (abortControllerRef.current) {
          abortControllerRef.current.abort();
@@ -11163,15 +11192,24 @@ ${SYSTEM_REMINDER_CLOSE}
            phase: "running",
          });
-          // Persist model change to the backend.
+          // "default" is a virtual sentinel for the agent's primary history, not a
-          // For real conversations, update the conversation-scoped override.
+          // real conversation object. When active, model changes must update the agent
-          // For "default" (virtual sentinel with no real conversation object),
+          // itself (otherwise the next agent sync will snap back).
-          // update the agent itself so the model sticks across messages.
+          const isDefaultConversation = conversationIdRef.current === "default";
          let conversationModelSettings:
            | AgentState["model_settings"]
            | null
            | undefined;
-          if (conversationIdRef.current !== "default") {
+          let updatedAgent: AgentState | null = null;
          if (isDefaultConversation) {
            const { updateAgentLLMConfig } = await import("../agent/modify");
            updatedAgent = await updateAgentLLMConfig(
              agentIdRef.current,
              modelHandle,
              model.updateArgs,
            );
            conversationModelSettings = updatedAgent?.model_settings;
          } else {
            const { updateConversationLLMConfig } = await import(
              "../agent/modify"
            );
@@ -11185,14 +11223,6 @@ ${SYSTEM_REMINDER_CLOSE}
                model_settings?: AgentState["model_settings"] | null;
              }
            ).model_settings;
          } else {
            const { updateAgentLLMConfig } = await import("../agent/modify");
            const updatedAgent = await updateAgentLLMConfig(
              agentId,
              modelHandle,
              model.updateArgs,
            );
            conversationModelSettings = updatedAgent.model_settings;
          }
          // The API may not echo reasoning_effort back, so populate it from
@@ -11206,9 +11236,23 @@ ${SYSTEM_REMINDER_CLOSE}
                  llmConfigRef.current,
                ) ?? null);
-          setHasConversationModelOverride(true);
+          if (isDefaultConversation) {
            setHasConversationModelOverride(false);
            setConversationOverrideModelSettings(null);
            if (updatedAgent) {
              setAgentState(updatedAgent);
            }
          } else {
            setHasConversationModelOverride(true);
            setConversationOverrideModelSettings(
              conversationModelSettings ?? null,
            );
          }
          setLlmConfig({
-            ...(llmConfigRef.current ?? ({} as LlmConfig)),
+            ...(updatedAgent?.llm_config ??
              llmConfigRef.current ??
              ({} as LlmConfig)),
            ...mapHandleToLlmConfigPatch(modelHandle),
            ...(typeof resolvedReasoningEffort === "string"
              ? {
@@ -11306,6 +11350,7 @@ ${SYSTEM_REMINDER_CLOSE}
      maybeRecordToolsetChangeReminder,
      resetPendingReasoningCycle,
      withCommandLock,
      setHasConversationModelOverride,
    ],
  );
@@ -11956,13 +12001,25 @@ ${SYSTEM_REMINDER_CLOSE}
        const cmd = commandRunner.start("/reasoning", "Setting reasoning...");
        try {
-          // "default" is a virtual sentinel, not a real conversation object —
+          // "default" is a virtual sentinel for the agent's primary history. When
-          // skip the API call and fall through with undefined model_settings.
+          // active, reasoning tier changes must update the agent itself so the next
          // agent sync doesn't snap back.
          const isDefaultConversation = conversationIdRef.current === "default";
          let conversationModelSettings:
            | AgentState["model_settings"]
            | null
            | undefined;
-          if (conversationIdRef.current !== "default") {
+          let updatedAgent: AgentState | null = null;
          if (isDefaultConversation) {
            const { updateAgentLLMConfig } = await import("../agent/modify");
            updatedAgent = await updateAgentLLMConfig(
              agentIdRef.current,
              desired.modelHandle,
              {
                reasoning_effort: desired.effort,
              },
            );
          } else {
            const { updateConversationLLMConfig } = await import(
              "../agent/modify"
            );
@@ -11981,14 +12038,30 @@ ${SYSTEM_REMINDER_CLOSE}
          }
          const resolvedReasoningEffort =
            deriveReasoningEffort(
-              conversationModelSettings,
+              isDefaultConversation
                ? (updatedAgent?.model_settings ?? null)
                : conversationModelSettings,
              llmConfigRef.current,
            ) ?? desired.effort;
-          setHasConversationModelOverride(true);
+          if (isDefaultConversation) {
            setHasConversationModelOverride(false);
            setConversationOverrideModelSettings(null);
            if (updatedAgent) {
              setAgentState(updatedAgent);
            }
          } else {
            setHasConversationModelOverride(true);
            setConversationOverrideModelSettings(
              conversationModelSettings ?? null,
            );
          }
          // The API may not echo reasoning_effort back; preserve explicit desired effort.
          setLlmConfig({
-            ...(llmConfigRef.current ?? ({} as LlmConfig)),
+            ...(updatedAgent?.llm_config ??
              llmConfigRef.current ??
              ({} as LlmConfig)),
            ...mapHandleToLlmConfigPatch(desired.modelHandle),
            reasoning_effort: resolvedReasoningEffort as ModelReasoningEffort,
          });
@@ -12045,8 +12118,15 @@ ${SYSTEM_REMINDER_CLOSE}
    } finally {
      reasoningCycleInFlightRef.current = false;
    }
-  }, [agentId, commandRunner, isAgentBusy, withCommandLock]);
+  }, [
    agentId,
    commandRunner,
    isAgentBusy,
    withCommandLock,
    setHasConversationModelOverride,
  ]);
  // biome-ignore lint/correctness/useExhaustiveDependencies: refs are stable objects, .current is read dynamically
  const handleCycleReasoningEffort = useCallback(() => {
    void (async () => {
      if (!agentId) return;
--- a/src/cli/commands/runner.ts
+++ b/src/cli/commands/runner.ts
@@ -76,9 +76,15 @@ export function createCommandRunner({
    const handle: CommandHandle = {
      id,
      input,
-      update: null!,
+      // Placeholders are overwritten below before the handle is returned.
-      finish: null!,
+      update: (_update: CommandUpdate) => {},
-      fail: null!,
+      finish: (
        _output: string,
        _success?: boolean,
        _dimOutput?: boolean,
        _preformatted?: boolean,
      ) => {},
      fail: (_output: string) => {},
    };
    const update = (updateData: CommandUpdate) => {
--- a/src/cli/components/ConversationSelector.tsx
+++ b/src/cli/components/ConversationSelector.tsx
@@ -243,9 +243,9 @@ export function ConversationSelector({
        if (!afterCursor) {
          try {
            const defaultMessages = await client.conversations.messages.list(
-              "default",
+              // Default conversation is represented by the agent id at the conversations endpoint.
              agentId,
              {
                agent_id: agentId,
                limit: 20,
                order: "desc",
              },
--- a/src/cli/subcommands/messages.ts
+++ b/src/cli/subcommands/messages.ts
@@ -159,8 +159,8 @@ export async function runMessagesSubcommand(argv: string[]): Promise<number> {
        return 1;
      }
-      const response = await client.conversations.messages.list("default", {
+      // Default conversation is represented by the agent id at the conversations endpoint.
-        agent_id: agentId,
+      const response = await client.conversations.messages.list(agentId, {
        limit: parseLimit(parsed.values.limit, 20),
        after: parsed.values.after,
        before: parsed.values.before,
--- a/src/tests/agent/model-preset-refresh.wiring.test.ts
+++ b/src/tests/agent/model-preset-refresh.wiring.test.ts
@@ -71,7 +71,7 @@ describe("model preset refresh wiring", () => {
    expect(updateSegment).not.toContain("client.agents.update(");
  });
-  test("/model handler updates conversation model and falls back to agent for default", () => {
+  test("/model handler updates conversation model (default updates agent)", () => {
    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
    const source = readFileSync(path, "utf-8");
@@ -85,11 +85,9 @@ describe("model preset refresh wiring", () => {
    const segment = source.slice(start, end);
    expect(segment).toContain("updateConversationLLMConfig(");
    expect(segment).toContain("conversationIdRef.current");
    // For the "default" virtual conversation (no real conversation object),
    // the handler falls back to updating the agent directly.
    expect(segment).toContain("updateAgentLLMConfig(");
-    expect(segment).toContain('conversationIdRef.current !== "default"');
+    expect(segment).toContain("conversationIdRef.current");
    expect(segment).toContain('conversationIdRef.current === "default"');
  });
  test("App defines helper to carry over active conversation model", () => {
@@ -116,6 +114,28 @@ describe("model preset refresh wiring", () => {
    );
  });
  test("conversation model override flag is synced for async callbacks", () => {
    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
    const source = readFileSync(path, "utf-8");
    // The override flag must be safe to read inside async callbacks (e.g. the
    // first streamed chunk sync) without waiting for a render/effect.
    expect(source).toMatch(
      /\[\s*hasConversationModelOverride,\s*setHasConversationModelOverride,\s*hasConversationModelOverrideRef,\s*\]\s*=\s*useSyncedState\(false\)/,
    );
  });
  test("reasoning tier prefers conversation override model_settings", () => {
    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
    const source = readFileSync(path, "utf-8");
    // When a conversation override is active, prefer the conversation model_settings
    // snapshot when deriving reasoning effort (not the base agent llm_config).
    expect(source).toMatch(
      /const effectiveModelSettings = hasConversationModelOverride\s*\?\s*conversationOverrideModelSettings\s*:\s*agentState\?\.model_settings;/,
    );
  });
  test("new conversation flows reapply active conversation model before switching", () => {
    const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
    const source = readFileSync(path, "utf-8");
--- a/src/tests/cli/reasoning-cycle-wiring.test.ts
+++ b/src/tests/cli/reasoning-cycle-wiring.test.ts
@@ -46,7 +46,7 @@ describe("reasoning tier cycle wiring", () => {
    expect(callbackBlocks.length).toBeGreaterThanOrEqual(2);
  });
-  test("flush uses conversation-scoped reasoning updates", () => {
+  test("flush uses conversation-scoped reasoning updates (default updates agent)", () => {
    const appPath = fileURLToPath(
      new URL("../../cli/App.tsx", import.meta.url),
    );
@@ -64,8 +64,9 @@ describe("reasoning tier cycle wiring", () => {
    const segment = source.slice(start, end);
    expect(segment).toContain("updateConversationLLMConfig(");
    expect(segment).toContain("updateAgentLLMConfig(");
    expect(segment).toContain("conversationIdRef.current");
-    expect(segment).not.toContain("updateAgentLLMConfig(");
+    expect(segment).toContain('conversationIdRef.current === "default"');
  });
  test("tab-based reasoning cycling is opt-in only", () => {