fix(tui): keep conversation model overrides sticky (#1238)
Co-authored-by: Letta Code <noreply@letta.com>
This commit is contained in:
@@ -474,15 +474,14 @@ export async function getResumeData(
|
||||
const retrievedMessages = await client.messages.retrieve(lastInContextId);
|
||||
|
||||
// Fetch message history for backfill through the default conversation route.
|
||||
// For default conversation, pass agent_id as query parameter.
|
||||
// Default conversation is represented by the agent id at the conversations endpoint.
|
||||
// Wrapped in try/catch so backfill failures don't crash the CLI (e.g., older servers
|
||||
// may not support this pattern)
|
||||
if (includeMessageHistory && isBackfillEnabled()) {
|
||||
try {
|
||||
const messagesPage = await client.conversations.messages.list(
|
||||
"default",
|
||||
agent.id,
|
||||
{
|
||||
agent_id: agent.id,
|
||||
limit: BACKFILL_PAGE_LIMIT,
|
||||
order: "desc",
|
||||
},
|
||||
@@ -491,7 +490,7 @@ export async function getResumeData(
|
||||
|
||||
if (process.env.DEBUG) {
|
||||
console.log(
|
||||
`[DEBUG] conversations.messages.list(default, agent_id=${agent.id}) returned ${messages.length} messages`,
|
||||
`[DEBUG] conversations.messages.list(${agent.id}) returned ${messages.length} messages`,
|
||||
);
|
||||
}
|
||||
} catch (backfillError) {
|
||||
|
||||
188
src/cli/App.tsx
188
src/cli/App.tsx
@@ -1477,16 +1477,25 @@ export default function App({
|
||||
const [currentToolsetPreference, setCurrentToolsetPreference] =
|
||||
useState<ToolsetPreference>("auto");
|
||||
const [llmConfig, setLlmConfig] = useState<LlmConfig | null>(null);
|
||||
const [hasConversationModelOverride, setHasConversationModelOverride] =
|
||||
useState(false);
|
||||
// Keep state + ref synchronized so async callbacks (e.g. syncAgentState) never
|
||||
// read a stale value and accidentally clobber conversation-scoped overrides.
|
||||
const [
|
||||
hasConversationModelOverride,
|
||||
setHasConversationModelOverride,
|
||||
hasConversationModelOverrideRef,
|
||||
] = useSyncedState(false);
|
||||
const llmConfigRef = useRef(llmConfig);
|
||||
useEffect(() => {
|
||||
llmConfigRef.current = llmConfig;
|
||||
}, [llmConfig]);
|
||||
const hasConversationModelOverrideRef = useRef(hasConversationModelOverride);
|
||||
useEffect(() => {
|
||||
hasConversationModelOverrideRef.current = hasConversationModelOverride;
|
||||
}, [hasConversationModelOverride]);
|
||||
|
||||
// Cache the conversation's model_settings when a conversation-scoped override is active.
|
||||
// On resume, llm_config may omit reasoning_effort even when the conversation model_settings
|
||||
// includes it; this snapshot prevents the footer reasoning tag from missing.
|
||||
const [
|
||||
conversationOverrideModelSettings,
|
||||
setConversationOverrideModelSettings,
|
||||
] = useState<AgentState["model_settings"] | null>(null);
|
||||
const agentStateRef = useRef(agentState);
|
||||
useEffect(() => {
|
||||
agentStateRef.current = agentState;
|
||||
@@ -1509,12 +1518,22 @@ export default function App({
|
||||
? `${llmConfig.model_endpoint_type}/${llmConfig.model}`
|
||||
: (llmConfig?.model ?? null)) ||
|
||||
null;
|
||||
|
||||
// Derive reasoning effort from model_settings (canonical) with llm_config as legacy fallback.
|
||||
// When a conversation override is active, the server may still return an agent llm_config
|
||||
// with reasoning_effort="none"; prefer the conversation model_settings snapshot.
|
||||
const effectiveModelSettings = hasConversationModelOverride
|
||||
? conversationOverrideModelSettings
|
||||
: agentState?.model_settings;
|
||||
const derivedReasoningEffort: ModelReasoningEffort | null =
|
||||
deriveReasoningEffort(effectiveModelSettings, llmConfig);
|
||||
|
||||
// Use tier-aware resolution so the display matches the agent's reasoning effort
|
||||
// (e.g. "GPT-5.3-Codex" not just "GPT-5" for the first match).
|
||||
const currentModelDisplay = useMemo(() => {
|
||||
if (!currentModelLabel) return null;
|
||||
const info = getModelInfoForLlmConfig(currentModelLabel, {
|
||||
reasoning_effort: llmConfig?.reasoning_effort ?? null,
|
||||
reasoning_effort: derivedReasoningEffort ?? null,
|
||||
enable_reasoner:
|
||||
(llmConfig as { enable_reasoner?: boolean | null })?.enable_reasoner ??
|
||||
null,
|
||||
@@ -1527,16 +1546,10 @@ export default function App({
|
||||
currentModelLabel.split("/").pop() ??
|
||||
null
|
||||
);
|
||||
}, [currentModelLabel, llmConfig]);
|
||||
}, [currentModelLabel, derivedReasoningEffort, llmConfig]);
|
||||
const currentModelProvider = llmConfig?.provider_name ?? null;
|
||||
// Derive reasoning effort from model_settings (canonical) with llm_config as legacy fallback.
|
||||
// Some providers may omit explicit effort for default tiers (e.g., Sonnet 4.6 high),
|
||||
// so fall back to the selected model preset when needed.
|
||||
const effectiveModelSettings = hasConversationModelOverride
|
||||
? undefined
|
||||
: agentState?.model_settings;
|
||||
const currentReasoningEffort: ModelReasoningEffort | null =
|
||||
deriveReasoningEffort(effectiveModelSettings, llmConfig) ??
|
||||
derivedReasoningEffort ??
|
||||
inferReasoningEffortFromModelPreset(currentModelId, currentModelLabel);
|
||||
|
||||
// Billing tier for conditional UI and error context (fetched once on mount)
|
||||
@@ -3266,6 +3279,7 @@ export default function App({
|
||||
}, [loadingState, agentId]);
|
||||
|
||||
// Keep effective model state in sync with the active conversation override.
|
||||
// biome-ignore lint/correctness/useExhaustiveDependencies: ref.current is intentionally read dynamically
|
||||
useEffect(() => {
|
||||
if (
|
||||
loadingState !== "ready" ||
|
||||
@@ -3283,6 +3297,7 @@ export default function App({
|
||||
agentState.model ??
|
||||
buildModelHandleFromLlmConfig(agentState.llm_config);
|
||||
setHasConversationModelOverride(false);
|
||||
setConversationOverrideModelSettings(null);
|
||||
setLlmConfig(agentState.llm_config);
|
||||
setCurrentModelHandle(agentModelHandle ?? null);
|
||||
|
||||
@@ -3351,6 +3366,7 @@ export default function App({
|
||||
);
|
||||
|
||||
setHasConversationModelOverride(true);
|
||||
setConversationOverrideModelSettings(conversationModelSettings ?? null);
|
||||
setCurrentModelHandle(effectiveModelHandle);
|
||||
const modelInfo = getModelInfoForLlmConfig(effectiveModelHandle, {
|
||||
reasoning_effort: reasoningEffort,
|
||||
@@ -3391,8 +3407,15 @@ export default function App({
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, [agentId, agentState, conversationId, loadingState]);
|
||||
}, [
|
||||
agentId,
|
||||
agentState,
|
||||
conversationId,
|
||||
loadingState,
|
||||
setHasConversationModelOverride,
|
||||
]);
|
||||
|
||||
// biome-ignore lint/correctness/useExhaustiveDependencies: refs are stable objects, .current is read dynamically
|
||||
const maybeCarryOverActiveConversationModel = useCallback(
|
||||
async (targetConversationId: string) => {
|
||||
if (!hasConversationModelOverrideRef.current) {
|
||||
@@ -3663,6 +3686,7 @@ export default function App({
|
||||
// removed. Git-backed memory uses standard git merge conflict resolution via the agent.
|
||||
|
||||
// Core streaming function - iterative loop that processes conversation turns
|
||||
// biome-ignore lint/correctness/useExhaustiveDependencies: refs read .current dynamically, complex callback with intentional deps
|
||||
const processConversation = useCallback(
|
||||
async (
|
||||
initialInput: Array<MessageCreate | ApprovalCreate>,
|
||||
@@ -5841,12 +5865,14 @@ export default function App({
|
||||
// causing CONFLICT on the next user message.
|
||||
getClient()
|
||||
.then((client) => {
|
||||
if (conversationIdRef.current === "default") {
|
||||
return client.conversations.cancel("default", {
|
||||
agent_id: agentIdRef.current,
|
||||
});
|
||||
const cancelConversationId =
|
||||
conversationIdRef.current === "default"
|
||||
? agentIdRef.current
|
||||
: conversationIdRef.current;
|
||||
if (!cancelConversationId || cancelConversationId === "loading") {
|
||||
return;
|
||||
}
|
||||
return client.conversations.cancel(conversationIdRef.current);
|
||||
return client.conversations.cancel(cancelConversationId);
|
||||
})
|
||||
.catch(() => {
|
||||
// Silently ignore - cancellation already happened client-side
|
||||
@@ -5961,12 +5987,14 @@ export default function App({
|
||||
// Don't wait for it or show errors since user already got feedback
|
||||
getClient()
|
||||
.then((client) => {
|
||||
if (conversationIdRef.current === "default") {
|
||||
return client.conversations.cancel("default", {
|
||||
agent_id: agentIdRef.current,
|
||||
});
|
||||
const cancelConversationId =
|
||||
conversationIdRef.current === "default"
|
||||
? agentIdRef.current
|
||||
: conversationIdRef.current;
|
||||
if (!cancelConversationId || cancelConversationId === "loading") {
|
||||
return;
|
||||
}
|
||||
return client.conversations.cancel(conversationIdRef.current);
|
||||
return client.conversations.cancel(cancelConversationId);
|
||||
})
|
||||
.catch(() => {
|
||||
// Silently ignore - cancellation already happened client-side
|
||||
@@ -5986,13 +6014,14 @@ export default function App({
|
||||
setInterruptRequested(true);
|
||||
try {
|
||||
const client = await getClient();
|
||||
if (conversationIdRef.current === "default") {
|
||||
await client.conversations.cancel("default", {
|
||||
agent_id: agentIdRef.current,
|
||||
});
|
||||
} else {
|
||||
await client.conversations.cancel(conversationIdRef.current);
|
||||
const cancelConversationId =
|
||||
conversationIdRef.current === "default"
|
||||
? agentIdRef.current
|
||||
: conversationIdRef.current;
|
||||
if (!cancelConversationId || cancelConversationId === "loading") {
|
||||
return;
|
||||
}
|
||||
await client.conversations.cancel(cancelConversationId);
|
||||
|
||||
if (abortControllerRef.current) {
|
||||
abortControllerRef.current.abort();
|
||||
@@ -11163,15 +11192,24 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
phase: "running",
|
||||
});
|
||||
|
||||
// Persist model change to the backend.
|
||||
// For real conversations, update the conversation-scoped override.
|
||||
// For "default" (virtual sentinel with no real conversation object),
|
||||
// update the agent itself so the model sticks across messages.
|
||||
// "default" is a virtual sentinel for the agent's primary history, not a
|
||||
// real conversation object. When active, model changes must update the agent
|
||||
// itself (otherwise the next agent sync will snap back).
|
||||
const isDefaultConversation = conversationIdRef.current === "default";
|
||||
let conversationModelSettings:
|
||||
| AgentState["model_settings"]
|
||||
| null
|
||||
| undefined;
|
||||
if (conversationIdRef.current !== "default") {
|
||||
let updatedAgent: AgentState | null = null;
|
||||
if (isDefaultConversation) {
|
||||
const { updateAgentLLMConfig } = await import("../agent/modify");
|
||||
updatedAgent = await updateAgentLLMConfig(
|
||||
agentIdRef.current,
|
||||
modelHandle,
|
||||
model.updateArgs,
|
||||
);
|
||||
conversationModelSettings = updatedAgent?.model_settings;
|
||||
} else {
|
||||
const { updateConversationLLMConfig } = await import(
|
||||
"../agent/modify"
|
||||
);
|
||||
@@ -11185,14 +11223,6 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
model_settings?: AgentState["model_settings"] | null;
|
||||
}
|
||||
).model_settings;
|
||||
} else {
|
||||
const { updateAgentLLMConfig } = await import("../agent/modify");
|
||||
const updatedAgent = await updateAgentLLMConfig(
|
||||
agentId,
|
||||
modelHandle,
|
||||
model.updateArgs,
|
||||
);
|
||||
conversationModelSettings = updatedAgent.model_settings;
|
||||
}
|
||||
|
||||
// The API may not echo reasoning_effort back, so populate it from
|
||||
@@ -11206,9 +11236,23 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
llmConfigRef.current,
|
||||
) ?? null);
|
||||
|
||||
setHasConversationModelOverride(true);
|
||||
if (isDefaultConversation) {
|
||||
setHasConversationModelOverride(false);
|
||||
setConversationOverrideModelSettings(null);
|
||||
if (updatedAgent) {
|
||||
setAgentState(updatedAgent);
|
||||
}
|
||||
} else {
|
||||
setHasConversationModelOverride(true);
|
||||
setConversationOverrideModelSettings(
|
||||
conversationModelSettings ?? null,
|
||||
);
|
||||
}
|
||||
|
||||
setLlmConfig({
|
||||
...(llmConfigRef.current ?? ({} as LlmConfig)),
|
||||
...(updatedAgent?.llm_config ??
|
||||
llmConfigRef.current ??
|
||||
({} as LlmConfig)),
|
||||
...mapHandleToLlmConfigPatch(modelHandle),
|
||||
...(typeof resolvedReasoningEffort === "string"
|
||||
? {
|
||||
@@ -11306,6 +11350,7 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
maybeRecordToolsetChangeReminder,
|
||||
resetPendingReasoningCycle,
|
||||
withCommandLock,
|
||||
setHasConversationModelOverride,
|
||||
],
|
||||
);
|
||||
|
||||
@@ -11956,13 +12001,25 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
const cmd = commandRunner.start("/reasoning", "Setting reasoning...");
|
||||
|
||||
try {
|
||||
// "default" is a virtual sentinel, not a real conversation object —
|
||||
// skip the API call and fall through with undefined model_settings.
|
||||
// "default" is a virtual sentinel for the agent's primary history. When
|
||||
// active, reasoning tier changes must update the agent itself so the next
|
||||
// agent sync doesn't snap back.
|
||||
const isDefaultConversation = conversationIdRef.current === "default";
|
||||
let conversationModelSettings:
|
||||
| AgentState["model_settings"]
|
||||
| null
|
||||
| undefined;
|
||||
if (conversationIdRef.current !== "default") {
|
||||
let updatedAgent: AgentState | null = null;
|
||||
if (isDefaultConversation) {
|
||||
const { updateAgentLLMConfig } = await import("../agent/modify");
|
||||
updatedAgent = await updateAgentLLMConfig(
|
||||
agentIdRef.current,
|
||||
desired.modelHandle,
|
||||
{
|
||||
reasoning_effort: desired.effort,
|
||||
},
|
||||
);
|
||||
} else {
|
||||
const { updateConversationLLMConfig } = await import(
|
||||
"../agent/modify"
|
||||
);
|
||||
@@ -11981,14 +12038,30 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
}
|
||||
const resolvedReasoningEffort =
|
||||
deriveReasoningEffort(
|
||||
conversationModelSettings,
|
||||
isDefaultConversation
|
||||
? (updatedAgent?.model_settings ?? null)
|
||||
: conversationModelSettings,
|
||||
llmConfigRef.current,
|
||||
) ?? desired.effort;
|
||||
|
||||
setHasConversationModelOverride(true);
|
||||
if (isDefaultConversation) {
|
||||
setHasConversationModelOverride(false);
|
||||
setConversationOverrideModelSettings(null);
|
||||
if (updatedAgent) {
|
||||
setAgentState(updatedAgent);
|
||||
}
|
||||
} else {
|
||||
setHasConversationModelOverride(true);
|
||||
setConversationOverrideModelSettings(
|
||||
conversationModelSettings ?? null,
|
||||
);
|
||||
}
|
||||
|
||||
// The API may not echo reasoning_effort back; preserve explicit desired effort.
|
||||
setLlmConfig({
|
||||
...(llmConfigRef.current ?? ({} as LlmConfig)),
|
||||
...(updatedAgent?.llm_config ??
|
||||
llmConfigRef.current ??
|
||||
({} as LlmConfig)),
|
||||
...mapHandleToLlmConfigPatch(desired.modelHandle),
|
||||
reasoning_effort: resolvedReasoningEffort as ModelReasoningEffort,
|
||||
});
|
||||
@@ -12045,8 +12118,15 @@ ${SYSTEM_REMINDER_CLOSE}
|
||||
} finally {
|
||||
reasoningCycleInFlightRef.current = false;
|
||||
}
|
||||
}, [agentId, commandRunner, isAgentBusy, withCommandLock]);
|
||||
}, [
|
||||
agentId,
|
||||
commandRunner,
|
||||
isAgentBusy,
|
||||
withCommandLock,
|
||||
setHasConversationModelOverride,
|
||||
]);
|
||||
|
||||
// biome-ignore lint/correctness/useExhaustiveDependencies: refs are stable objects, .current is read dynamically
|
||||
const handleCycleReasoningEffort = useCallback(() => {
|
||||
void (async () => {
|
||||
if (!agentId) return;
|
||||
|
||||
@@ -76,9 +76,15 @@ export function createCommandRunner({
|
||||
const handle: CommandHandle = {
|
||||
id,
|
||||
input,
|
||||
update: null!,
|
||||
finish: null!,
|
||||
fail: null!,
|
||||
// Placeholders are overwritten below before the handle is returned.
|
||||
update: (_update: CommandUpdate) => {},
|
||||
finish: (
|
||||
_output: string,
|
||||
_success?: boolean,
|
||||
_dimOutput?: boolean,
|
||||
_preformatted?: boolean,
|
||||
) => {},
|
||||
fail: (_output: string) => {},
|
||||
};
|
||||
|
||||
const update = (updateData: CommandUpdate) => {
|
||||
|
||||
@@ -243,9 +243,9 @@ export function ConversationSelector({
|
||||
if (!afterCursor) {
|
||||
try {
|
||||
const defaultMessages = await client.conversations.messages.list(
|
||||
"default",
|
||||
// Default conversation is represented by the agent id at the conversations endpoint.
|
||||
agentId,
|
||||
{
|
||||
agent_id: agentId,
|
||||
limit: 20,
|
||||
order: "desc",
|
||||
},
|
||||
|
||||
@@ -159,8 +159,8 @@ export async function runMessagesSubcommand(argv: string[]): Promise<number> {
|
||||
return 1;
|
||||
}
|
||||
|
||||
const response = await client.conversations.messages.list("default", {
|
||||
agent_id: agentId,
|
||||
// Default conversation is represented by the agent id at the conversations endpoint.
|
||||
const response = await client.conversations.messages.list(agentId, {
|
||||
limit: parseLimit(parsed.values.limit, 20),
|
||||
after: parsed.values.after,
|
||||
before: parsed.values.before,
|
||||
|
||||
@@ -71,7 +71,7 @@ describe("model preset refresh wiring", () => {
|
||||
expect(updateSegment).not.toContain("client.agents.update(");
|
||||
});
|
||||
|
||||
test("/model handler updates conversation model and falls back to agent for default", () => {
|
||||
test("/model handler updates conversation model (default updates agent)", () => {
|
||||
const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
|
||||
const source = readFileSync(path, "utf-8");
|
||||
|
||||
@@ -85,11 +85,9 @@ describe("model preset refresh wiring", () => {
|
||||
const segment = source.slice(start, end);
|
||||
|
||||
expect(segment).toContain("updateConversationLLMConfig(");
|
||||
expect(segment).toContain("conversationIdRef.current");
|
||||
// For the "default" virtual conversation (no real conversation object),
|
||||
// the handler falls back to updating the agent directly.
|
||||
expect(segment).toContain("updateAgentLLMConfig(");
|
||||
expect(segment).toContain('conversationIdRef.current !== "default"');
|
||||
expect(segment).toContain("conversationIdRef.current");
|
||||
expect(segment).toContain('conversationIdRef.current === "default"');
|
||||
});
|
||||
|
||||
test("App defines helper to carry over active conversation model", () => {
|
||||
@@ -116,6 +114,28 @@ describe("model preset refresh wiring", () => {
|
||||
);
|
||||
});
|
||||
|
||||
test("conversation model override flag is synced for async callbacks", () => {
|
||||
const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
|
||||
const source = readFileSync(path, "utf-8");
|
||||
|
||||
// The override flag must be safe to read inside async callbacks (e.g. the
|
||||
// first streamed chunk sync) without waiting for a render/effect.
|
||||
expect(source).toMatch(
|
||||
/\[\s*hasConversationModelOverride,\s*setHasConversationModelOverride,\s*hasConversationModelOverrideRef,\s*\]\s*=\s*useSyncedState\(false\)/,
|
||||
);
|
||||
});
|
||||
|
||||
test("reasoning tier prefers conversation override model_settings", () => {
|
||||
const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
|
||||
const source = readFileSync(path, "utf-8");
|
||||
|
||||
// When a conversation override is active, prefer the conversation model_settings
|
||||
// snapshot when deriving reasoning effort (not the base agent llm_config).
|
||||
expect(source).toMatch(
|
||||
/const effectiveModelSettings = hasConversationModelOverride\s*\?\s*conversationOverrideModelSettings\s*:\s*agentState\?\.model_settings;/,
|
||||
);
|
||||
});
|
||||
|
||||
test("new conversation flows reapply active conversation model before switching", () => {
|
||||
const path = fileURLToPath(new URL("../../cli/App.tsx", import.meta.url));
|
||||
const source = readFileSync(path, "utf-8");
|
||||
|
||||
@@ -46,7 +46,7 @@ describe("reasoning tier cycle wiring", () => {
|
||||
expect(callbackBlocks.length).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
test("flush uses conversation-scoped reasoning updates", () => {
|
||||
test("flush uses conversation-scoped reasoning updates (default updates agent)", () => {
|
||||
const appPath = fileURLToPath(
|
||||
new URL("../../cli/App.tsx", import.meta.url),
|
||||
);
|
||||
@@ -64,8 +64,9 @@ describe("reasoning tier cycle wiring", () => {
|
||||
|
||||
const segment = source.slice(start, end);
|
||||
expect(segment).toContain("updateConversationLLMConfig(");
|
||||
expect(segment).toContain("updateAgentLLMConfig(");
|
||||
expect(segment).toContain("conversationIdRef.current");
|
||||
expect(segment).not.toContain("updateAgentLLMConfig(");
|
||||
expect(segment).toContain('conversationIdRef.current === "default"');
|
||||
});
|
||||
|
||||
test("tab-based reasoning cycling is opt-in only", () => {
|
||||
|
||||
Reference in New Issue
Block a user