fix: skip no-op preset refresh on resume to avoid slow agent recompile (#1110)

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
jnjpng
2026-02-23 13:38:17 -08:00
committed by GitHub
parent 61b586174c
commit caa109c10d
4 changed files with 93 additions and 55 deletions

View File

@@ -249,6 +249,64 @@ export function getModelPresetUpdateForAgent(
};
}
/**
* Fields synced during resume preset refresh.
* This is the single source of truth for which preset fields are
* auto-applied on resume and the comparison logic that decides
* whether an update is needed.
*/
const RESUME_REFRESH_FIELDS = [
"max_output_tokens",
"parallel_tool_calls",
] as const;
/**
* Build the subset of preset updateArgs that should be synced on resume,
* and check whether the agent already has those values.
*
* Returns `{ updateArgs, needsUpdate }`:
* - `updateArgs` contains only the resume-scoped fields from the preset.
* - `needsUpdate` is false when the agent already matches, so the caller
* can skip the expensive PATCH.
*/
export function getResumeRefreshArgs(
presetUpdateArgs: Record<string, unknown>,
agent: {
llm_config?: { max_tokens?: number | null } | null;
// Accept the broad AgentState union; we only read parallel_tool_calls.
model_settings?: { parallel_tool_calls?: boolean } | null;
},
): { updateArgs: Record<string, unknown>; needsUpdate: boolean } {
const updateArgs: Record<string, unknown> = {};
// Extract only the resume-scoped fields from the full preset
for (const field of RESUME_REFRESH_FIELDS) {
const value = presetUpdateArgs[field];
if (field === "max_output_tokens" && typeof value === "number") {
updateArgs[field] = value;
} else if (field === "parallel_tool_calls" && typeof value === "boolean") {
updateArgs[field] = value;
}
}
if (Object.keys(updateArgs).length === 0) {
return { updateArgs, needsUpdate: false };
}
// Compare against the agent's current values
const currentMaxTokens = agent.llm_config?.max_tokens;
const wantMaxTokens = updateArgs.max_output_tokens as number | undefined;
const currentParallel = agent.model_settings?.parallel_tool_calls;
const wantParallel = updateArgs.parallel_tool_calls as boolean | undefined;
const maxTokensMatch =
wantMaxTokens === undefined || currentMaxTokens === wantMaxTokens;
const parallelMatch =
wantParallel === undefined || currentParallel === wantParallel;
return { updateArgs, needsUpdate: !(maxTokensMatch && parallelMatch) };
}
/**
* Find a model entry by handle with fuzzy matching support
* @param handle - The full model handle

View File

@@ -28,6 +28,7 @@ import { getStreamToolContextId, sendMessageStream } from "./agent/message";
import {
getModelPresetUpdateForAgent,
getModelUpdateArgs,
getResumeRefreshArgs,
resolveModel,
} from "./agent/model";
import { updateAgentLLMConfig, updateAgentSystemPrompt } from "./agent/modify";
@@ -913,36 +914,19 @@ export async function handleHeadlessCommand(
// Always apply model update - different model IDs can share the same
// handle but have different settings (e.g., gpt-5.2-medium vs gpt-5.2-xhigh)
const updateArgs = getModelUpdateArgs(model);
await updateAgentLLMConfig(agent.id, modelHandle, updateArgs);
// Refresh agent state after model update
agent = await client.agents.retrieve(agent.id);
agent = await updateAgentLLMConfig(agent.id, modelHandle, updateArgs);
} else {
const presetRefresh = getModelPresetUpdateForAgent(agent);
if (presetRefresh) {
// Resume preset refresh is intentionally scoped for now.
// We only force-refresh max_output_tokens + parallel_tool_calls.
// Other preset fields available in models.json (for example:
// context_window, reasoning_effort, enable_reasoner,
// max_reasoning_tokens, verbosity, temperature,
// thinking_budget) are intentionally not auto-applied yet.
const resumeRefreshUpdateArgs: Record<string, unknown> = {};
if (typeof presetRefresh.updateArgs.max_output_tokens === "number") {
resumeRefreshUpdateArgs.max_output_tokens =
presetRefresh.updateArgs.max_output_tokens;
}
if (typeof presetRefresh.updateArgs.parallel_tool_calls === "boolean") {
resumeRefreshUpdateArgs.parallel_tool_calls =
presetRefresh.updateArgs.parallel_tool_calls;
}
const { updateArgs: resumeRefreshUpdateArgs, needsUpdate } =
getResumeRefreshArgs(presetRefresh.updateArgs, agent);
if (Object.keys(resumeRefreshUpdateArgs).length > 0) {
await updateAgentLLMConfig(
if (needsUpdate) {
agent = await updateAgentLLMConfig(
agent.id,
presetRefresh.modelHandle,
resumeRefreshUpdateArgs,
);
// Refresh agent state after model update
agent = await client.agents.retrieve(agent.id);
}
}
}

View File

@@ -15,6 +15,7 @@ import { ISOLATED_BLOCK_LABELS } from "./agent/memory";
import {
getModelPresetUpdateForAgent,
getModelUpdateArgs,
getResumeRefreshArgs,
resolveModel,
} from "./agent/model";
import { updateAgentLLMConfig, updateAgentSystemPrompt } from "./agent/modify";
@@ -1808,41 +1809,23 @@ async function main(): Promise<void> {
// Always apply model update - different model IDs can share the same
// handle but have different settings (e.g., gpt-5.2-medium vs gpt-5.2-xhigh)
const updateArgs = getModelUpdateArgs(model);
await updateAgentLLMConfig(agent.id, modelHandle, updateArgs);
// Refresh agent state after model update
agent = await client.agents.retrieve(agent.id);
agent = await updateAgentLLMConfig(
agent.id,
modelHandle,
updateArgs,
);
} else {
const presetRefresh = getModelPresetUpdateForAgent(agent);
if (presetRefresh) {
// Resume preset refresh is intentionally scoped for now.
// We only force-refresh max_output_tokens + parallel_tool_calls.
// Other preset fields available in models.json (for example:
// context_window, reasoning_effort, enable_reasoner,
// max_reasoning_tokens, verbosity, temperature,
// thinking_budget) are intentionally not auto-applied yet.
const resumeRefreshUpdateArgs: Record<string, unknown> = {};
if (
typeof presetRefresh.updateArgs.max_output_tokens === "number"
) {
resumeRefreshUpdateArgs.max_output_tokens =
presetRefresh.updateArgs.max_output_tokens;
}
if (
typeof presetRefresh.updateArgs.parallel_tool_calls ===
"boolean"
) {
resumeRefreshUpdateArgs.parallel_tool_calls =
presetRefresh.updateArgs.parallel_tool_calls;
}
const { updateArgs: resumeRefreshUpdateArgs, needsUpdate } =
getResumeRefreshArgs(presetRefresh.updateArgs, agent);
if (Object.keys(resumeRefreshUpdateArgs).length > 0) {
await updateAgentLLMConfig(
if (needsUpdate) {
agent = await updateAgentLLMConfig(
agent.id,
presetRefresh.modelHandle,
resumeRefreshUpdateArgs,
);
// Refresh agent state after model update
agent = await client.agents.retrieve(agent.id);
}
}
}

View File

@@ -50,9 +50,9 @@ describe("model preset refresh wiring", () => {
expect(source).toContain(
"const presetRefresh = getModelPresetUpdateForAgent(agent)",
);
expect(source).toContain("resumeRefreshUpdateArgs");
expect(source).toContain("presetRefresh.updateArgs.max_output_tokens");
expect(source).toContain("presetRefresh.updateArgs.parallel_tool_calls");
// Field extraction + skip logic is handled by getResumeRefreshArgs helper
expect(source).toContain("getResumeRefreshArgs(presetRefresh.updateArgs");
expect(source).toContain("needsUpdate");
expect(source).toContain("await updateAgentLLMConfig(");
expect(source).toContain("presetRefresh.modelHandle");
expect(source).not.toContain(
@@ -69,13 +69,26 @@ describe("model preset refresh wiring", () => {
expect(source).toContain(
"const presetRefresh = getModelPresetUpdateForAgent(agent)",
);
expect(source).toContain("resumeRefreshUpdateArgs");
expect(source).toContain("presetRefresh.updateArgs.max_output_tokens");
expect(source).toContain("presetRefresh.updateArgs.parallel_tool_calls");
// Field extraction + skip logic is handled by getResumeRefreshArgs helper
expect(source).toContain("getResumeRefreshArgs(presetRefresh.updateArgs");
expect(source).toContain("needsUpdate");
expect(source).toContain("await updateAgentLLMConfig(");
expect(source).toContain("presetRefresh.modelHandle");
expect(source).not.toContain(
"await updateAgentLLMConfig(\n agent.id,\n presetRefresh.modelHandle,\n presetRefresh.updateArgs,",
);
});
test("getResumeRefreshArgs helper owns field extraction and comparison", () => {
const path = fileURLToPath(
new URL("../../agent/model.ts", import.meta.url),
);
const source = readFileSync(path, "utf-8");
expect(source).toContain("export function getResumeRefreshArgs(");
expect(source).toContain("RESUME_REFRESH_FIELDS");
expect(source).toContain('"max_output_tokens"');
expect(source).toContain('"parallel_tool_calls"');
expect(source).toContain("needsUpdate");
});
});