From caa109c10db32d2007eb9c4bd1517a64785e7eb0 Mon Sep 17 00:00:00 2001 From: jnjpng Date: Mon, 23 Feb 2026 13:38:17 -0800 Subject: [PATCH] fix: skip no-op preset refresh on resume to avoid slow agent recompile (#1110) Co-authored-by: Letta --- src/agent/model.ts | 58 +++++++++++++++++++ src/headless.ts | 28 ++------- src/index.ts | 37 ++++-------- .../agent/model-preset-refresh.wiring.test.ts | 25 ++++++-- 4 files changed, 93 insertions(+), 55 deletions(-) diff --git a/src/agent/model.ts b/src/agent/model.ts index f71d821..0f223a4 100644 --- a/src/agent/model.ts +++ b/src/agent/model.ts @@ -249,6 +249,64 @@ export function getModelPresetUpdateForAgent( }; } +/** + * Fields synced during resume preset refresh. + * This is the single source of truth for which preset fields are + * auto-applied on resume and the comparison logic that decides + * whether an update is needed. + */ +const RESUME_REFRESH_FIELDS = [ + "max_output_tokens", + "parallel_tool_calls", +] as const; + +/** + * Build the subset of preset updateArgs that should be synced on resume, + * and check whether the agent already has those values. + * + * Returns `{ updateArgs, needsUpdate }`: + * - `updateArgs` contains only the resume-scoped fields from the preset. + * - `needsUpdate` is false when the agent already matches, so the caller + * can skip the expensive PATCH. + */ +export function getResumeRefreshArgs( + presetUpdateArgs: Record, + agent: { + llm_config?: { max_tokens?: number | null } | null; + // Accept the broad AgentState union; we only read parallel_tool_calls. + model_settings?: { parallel_tool_calls?: boolean } | null; + }, +): { updateArgs: Record; needsUpdate: boolean } { + const updateArgs: Record = {}; + + // Extract only the resume-scoped fields from the full preset + for (const field of RESUME_REFRESH_FIELDS) { + const value = presetUpdateArgs[field]; + if (field === "max_output_tokens" && typeof value === "number") { + updateArgs[field] = value; + } else if (field === "parallel_tool_calls" && typeof value === "boolean") { + updateArgs[field] = value; + } + } + + if (Object.keys(updateArgs).length === 0) { + return { updateArgs, needsUpdate: false }; + } + + // Compare against the agent's current values + const currentMaxTokens = agent.llm_config?.max_tokens; + const wantMaxTokens = updateArgs.max_output_tokens as number | undefined; + const currentParallel = agent.model_settings?.parallel_tool_calls; + const wantParallel = updateArgs.parallel_tool_calls as boolean | undefined; + + const maxTokensMatch = + wantMaxTokens === undefined || currentMaxTokens === wantMaxTokens; + const parallelMatch = + wantParallel === undefined || currentParallel === wantParallel; + + return { updateArgs, needsUpdate: !(maxTokensMatch && parallelMatch) }; +} + /** * Find a model entry by handle with fuzzy matching support * @param handle - The full model handle diff --git a/src/headless.ts b/src/headless.ts index db20e57..10f3ef5 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -28,6 +28,7 @@ import { getStreamToolContextId, sendMessageStream } from "./agent/message"; import { getModelPresetUpdateForAgent, getModelUpdateArgs, + getResumeRefreshArgs, resolveModel, } from "./agent/model"; import { updateAgentLLMConfig, updateAgentSystemPrompt } from "./agent/modify"; @@ -913,36 +914,19 @@ export async function handleHeadlessCommand( // Always apply model update - different model IDs can share the same // handle but have different settings (e.g., gpt-5.2-medium vs gpt-5.2-xhigh) const updateArgs = getModelUpdateArgs(model); - await updateAgentLLMConfig(agent.id, modelHandle, updateArgs); - // Refresh agent state after model update - agent = await client.agents.retrieve(agent.id); + agent = await updateAgentLLMConfig(agent.id, modelHandle, updateArgs); } else { const presetRefresh = getModelPresetUpdateForAgent(agent); if (presetRefresh) { - // Resume preset refresh is intentionally scoped for now. - // We only force-refresh max_output_tokens + parallel_tool_calls. - // Other preset fields available in models.json (for example: - // context_window, reasoning_effort, enable_reasoner, - // max_reasoning_tokens, verbosity, temperature, - // thinking_budget) are intentionally not auto-applied yet. - const resumeRefreshUpdateArgs: Record = {}; - if (typeof presetRefresh.updateArgs.max_output_tokens === "number") { - resumeRefreshUpdateArgs.max_output_tokens = - presetRefresh.updateArgs.max_output_tokens; - } - if (typeof presetRefresh.updateArgs.parallel_tool_calls === "boolean") { - resumeRefreshUpdateArgs.parallel_tool_calls = - presetRefresh.updateArgs.parallel_tool_calls; - } + const { updateArgs: resumeRefreshUpdateArgs, needsUpdate } = + getResumeRefreshArgs(presetRefresh.updateArgs, agent); - if (Object.keys(resumeRefreshUpdateArgs).length > 0) { - await updateAgentLLMConfig( + if (needsUpdate) { + agent = await updateAgentLLMConfig( agent.id, presetRefresh.modelHandle, resumeRefreshUpdateArgs, ); - // Refresh agent state after model update - agent = await client.agents.retrieve(agent.id); } } } diff --git a/src/index.ts b/src/index.ts index 316de9f..af16387 100755 --- a/src/index.ts +++ b/src/index.ts @@ -15,6 +15,7 @@ import { ISOLATED_BLOCK_LABELS } from "./agent/memory"; import { getModelPresetUpdateForAgent, getModelUpdateArgs, + getResumeRefreshArgs, resolveModel, } from "./agent/model"; import { updateAgentLLMConfig, updateAgentSystemPrompt } from "./agent/modify"; @@ -1808,41 +1809,23 @@ async function main(): Promise { // Always apply model update - different model IDs can share the same // handle but have different settings (e.g., gpt-5.2-medium vs gpt-5.2-xhigh) const updateArgs = getModelUpdateArgs(model); - await updateAgentLLMConfig(agent.id, modelHandle, updateArgs); - // Refresh agent state after model update - agent = await client.agents.retrieve(agent.id); + agent = await updateAgentLLMConfig( + agent.id, + modelHandle, + updateArgs, + ); } else { const presetRefresh = getModelPresetUpdateForAgent(agent); if (presetRefresh) { - // Resume preset refresh is intentionally scoped for now. - // We only force-refresh max_output_tokens + parallel_tool_calls. - // Other preset fields available in models.json (for example: - // context_window, reasoning_effort, enable_reasoner, - // max_reasoning_tokens, verbosity, temperature, - // thinking_budget) are intentionally not auto-applied yet. - const resumeRefreshUpdateArgs: Record = {}; - if ( - typeof presetRefresh.updateArgs.max_output_tokens === "number" - ) { - resumeRefreshUpdateArgs.max_output_tokens = - presetRefresh.updateArgs.max_output_tokens; - } - if ( - typeof presetRefresh.updateArgs.parallel_tool_calls === - "boolean" - ) { - resumeRefreshUpdateArgs.parallel_tool_calls = - presetRefresh.updateArgs.parallel_tool_calls; - } + const { updateArgs: resumeRefreshUpdateArgs, needsUpdate } = + getResumeRefreshArgs(presetRefresh.updateArgs, agent); - if (Object.keys(resumeRefreshUpdateArgs).length > 0) { - await updateAgentLLMConfig( + if (needsUpdate) { + agent = await updateAgentLLMConfig( agent.id, presetRefresh.modelHandle, resumeRefreshUpdateArgs, ); - // Refresh agent state after model update - agent = await client.agents.retrieve(agent.id); } } } diff --git a/src/tests/agent/model-preset-refresh.wiring.test.ts b/src/tests/agent/model-preset-refresh.wiring.test.ts index 42858c8..c84e0f2 100644 --- a/src/tests/agent/model-preset-refresh.wiring.test.ts +++ b/src/tests/agent/model-preset-refresh.wiring.test.ts @@ -50,9 +50,9 @@ describe("model preset refresh wiring", () => { expect(source).toContain( "const presetRefresh = getModelPresetUpdateForAgent(agent)", ); - expect(source).toContain("resumeRefreshUpdateArgs"); - expect(source).toContain("presetRefresh.updateArgs.max_output_tokens"); - expect(source).toContain("presetRefresh.updateArgs.parallel_tool_calls"); + // Field extraction + skip logic is handled by getResumeRefreshArgs helper + expect(source).toContain("getResumeRefreshArgs(presetRefresh.updateArgs"); + expect(source).toContain("needsUpdate"); expect(source).toContain("await updateAgentLLMConfig("); expect(source).toContain("presetRefresh.modelHandle"); expect(source).not.toContain( @@ -69,13 +69,26 @@ describe("model preset refresh wiring", () => { expect(source).toContain( "const presetRefresh = getModelPresetUpdateForAgent(agent)", ); - expect(source).toContain("resumeRefreshUpdateArgs"); - expect(source).toContain("presetRefresh.updateArgs.max_output_tokens"); - expect(source).toContain("presetRefresh.updateArgs.parallel_tool_calls"); + // Field extraction + skip logic is handled by getResumeRefreshArgs helper + expect(source).toContain("getResumeRefreshArgs(presetRefresh.updateArgs"); + expect(source).toContain("needsUpdate"); expect(source).toContain("await updateAgentLLMConfig("); expect(source).toContain("presetRefresh.modelHandle"); expect(source).not.toContain( "await updateAgentLLMConfig(\n agent.id,\n presetRefresh.modelHandle,\n presetRefresh.updateArgs,", ); }); + + test("getResumeRefreshArgs helper owns field extraction and comparison", () => { + const path = fileURLToPath( + new URL("../../agent/model.ts", import.meta.url), + ); + const source = readFileSync(path, "utf-8"); + + expect(source).toContain("export function getResumeRefreshArgs("); + expect(source).toContain("RESUME_REFRESH_FIELDS"); + expect(source).toContain('"max_output_tokens"'); + expect(source).toContain('"parallel_tool_calls"'); + expect(source).toContain("needsUpdate"); + }); });