fix: skip no-op preset refresh on resume to avoid slow agent recompile (#1110)
Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
@@ -249,6 +249,64 @@ export function getModelPresetUpdateForAgent(
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Fields synced during resume preset refresh.
|
||||
* This is the single source of truth for which preset fields are
|
||||
* auto-applied on resume and the comparison logic that decides
|
||||
* whether an update is needed.
|
||||
*/
|
||||
const RESUME_REFRESH_FIELDS = [
|
||||
"max_output_tokens",
|
||||
"parallel_tool_calls",
|
||||
] as const;
|
||||
|
||||
/**
|
||||
* Build the subset of preset updateArgs that should be synced on resume,
|
||||
* and check whether the agent already has those values.
|
||||
*
|
||||
* Returns `{ updateArgs, needsUpdate }`:
|
||||
* - `updateArgs` contains only the resume-scoped fields from the preset.
|
||||
* - `needsUpdate` is false when the agent already matches, so the caller
|
||||
* can skip the expensive PATCH.
|
||||
*/
|
||||
export function getResumeRefreshArgs(
|
||||
presetUpdateArgs: Record<string, unknown>,
|
||||
agent: {
|
||||
llm_config?: { max_tokens?: number | null } | null;
|
||||
// Accept the broad AgentState union; we only read parallel_tool_calls.
|
||||
model_settings?: { parallel_tool_calls?: boolean } | null;
|
||||
},
|
||||
): { updateArgs: Record<string, unknown>; needsUpdate: boolean } {
|
||||
const updateArgs: Record<string, unknown> = {};
|
||||
|
||||
// Extract only the resume-scoped fields from the full preset
|
||||
for (const field of RESUME_REFRESH_FIELDS) {
|
||||
const value = presetUpdateArgs[field];
|
||||
if (field === "max_output_tokens" && typeof value === "number") {
|
||||
updateArgs[field] = value;
|
||||
} else if (field === "parallel_tool_calls" && typeof value === "boolean") {
|
||||
updateArgs[field] = value;
|
||||
}
|
||||
}
|
||||
|
||||
if (Object.keys(updateArgs).length === 0) {
|
||||
return { updateArgs, needsUpdate: false };
|
||||
}
|
||||
|
||||
// Compare against the agent's current values
|
||||
const currentMaxTokens = agent.llm_config?.max_tokens;
|
||||
const wantMaxTokens = updateArgs.max_output_tokens as number | undefined;
|
||||
const currentParallel = agent.model_settings?.parallel_tool_calls;
|
||||
const wantParallel = updateArgs.parallel_tool_calls as boolean | undefined;
|
||||
|
||||
const maxTokensMatch =
|
||||
wantMaxTokens === undefined || currentMaxTokens === wantMaxTokens;
|
||||
const parallelMatch =
|
||||
wantParallel === undefined || currentParallel === wantParallel;
|
||||
|
||||
return { updateArgs, needsUpdate: !(maxTokensMatch && parallelMatch) };
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a model entry by handle with fuzzy matching support
|
||||
* @param handle - The full model handle
|
||||
|
||||
@@ -28,6 +28,7 @@ import { getStreamToolContextId, sendMessageStream } from "./agent/message";
|
||||
import {
|
||||
getModelPresetUpdateForAgent,
|
||||
getModelUpdateArgs,
|
||||
getResumeRefreshArgs,
|
||||
resolveModel,
|
||||
} from "./agent/model";
|
||||
import { updateAgentLLMConfig, updateAgentSystemPrompt } from "./agent/modify";
|
||||
@@ -913,36 +914,19 @@ export async function handleHeadlessCommand(
|
||||
// Always apply model update - different model IDs can share the same
|
||||
// handle but have different settings (e.g., gpt-5.2-medium vs gpt-5.2-xhigh)
|
||||
const updateArgs = getModelUpdateArgs(model);
|
||||
await updateAgentLLMConfig(agent.id, modelHandle, updateArgs);
|
||||
// Refresh agent state after model update
|
||||
agent = await client.agents.retrieve(agent.id);
|
||||
agent = await updateAgentLLMConfig(agent.id, modelHandle, updateArgs);
|
||||
} else {
|
||||
const presetRefresh = getModelPresetUpdateForAgent(agent);
|
||||
if (presetRefresh) {
|
||||
// Resume preset refresh is intentionally scoped for now.
|
||||
// We only force-refresh max_output_tokens + parallel_tool_calls.
|
||||
// Other preset fields available in models.json (for example:
|
||||
// context_window, reasoning_effort, enable_reasoner,
|
||||
// max_reasoning_tokens, verbosity, temperature,
|
||||
// thinking_budget) are intentionally not auto-applied yet.
|
||||
const resumeRefreshUpdateArgs: Record<string, unknown> = {};
|
||||
if (typeof presetRefresh.updateArgs.max_output_tokens === "number") {
|
||||
resumeRefreshUpdateArgs.max_output_tokens =
|
||||
presetRefresh.updateArgs.max_output_tokens;
|
||||
}
|
||||
if (typeof presetRefresh.updateArgs.parallel_tool_calls === "boolean") {
|
||||
resumeRefreshUpdateArgs.parallel_tool_calls =
|
||||
presetRefresh.updateArgs.parallel_tool_calls;
|
||||
}
|
||||
const { updateArgs: resumeRefreshUpdateArgs, needsUpdate } =
|
||||
getResumeRefreshArgs(presetRefresh.updateArgs, agent);
|
||||
|
||||
if (Object.keys(resumeRefreshUpdateArgs).length > 0) {
|
||||
await updateAgentLLMConfig(
|
||||
if (needsUpdate) {
|
||||
agent = await updateAgentLLMConfig(
|
||||
agent.id,
|
||||
presetRefresh.modelHandle,
|
||||
resumeRefreshUpdateArgs,
|
||||
);
|
||||
// Refresh agent state after model update
|
||||
agent = await client.agents.retrieve(agent.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
37
src/index.ts
37
src/index.ts
@@ -15,6 +15,7 @@ import { ISOLATED_BLOCK_LABELS } from "./agent/memory";
|
||||
import {
|
||||
getModelPresetUpdateForAgent,
|
||||
getModelUpdateArgs,
|
||||
getResumeRefreshArgs,
|
||||
resolveModel,
|
||||
} from "./agent/model";
|
||||
import { updateAgentLLMConfig, updateAgentSystemPrompt } from "./agent/modify";
|
||||
@@ -1808,41 +1809,23 @@ async function main(): Promise<void> {
|
||||
// Always apply model update - different model IDs can share the same
|
||||
// handle but have different settings (e.g., gpt-5.2-medium vs gpt-5.2-xhigh)
|
||||
const updateArgs = getModelUpdateArgs(model);
|
||||
await updateAgentLLMConfig(agent.id, modelHandle, updateArgs);
|
||||
// Refresh agent state after model update
|
||||
agent = await client.agents.retrieve(agent.id);
|
||||
agent = await updateAgentLLMConfig(
|
||||
agent.id,
|
||||
modelHandle,
|
||||
updateArgs,
|
||||
);
|
||||
} else {
|
||||
const presetRefresh = getModelPresetUpdateForAgent(agent);
|
||||
if (presetRefresh) {
|
||||
// Resume preset refresh is intentionally scoped for now.
|
||||
// We only force-refresh max_output_tokens + parallel_tool_calls.
|
||||
// Other preset fields available in models.json (for example:
|
||||
// context_window, reasoning_effort, enable_reasoner,
|
||||
// max_reasoning_tokens, verbosity, temperature,
|
||||
// thinking_budget) are intentionally not auto-applied yet.
|
||||
const resumeRefreshUpdateArgs: Record<string, unknown> = {};
|
||||
if (
|
||||
typeof presetRefresh.updateArgs.max_output_tokens === "number"
|
||||
) {
|
||||
resumeRefreshUpdateArgs.max_output_tokens =
|
||||
presetRefresh.updateArgs.max_output_tokens;
|
||||
}
|
||||
if (
|
||||
typeof presetRefresh.updateArgs.parallel_tool_calls ===
|
||||
"boolean"
|
||||
) {
|
||||
resumeRefreshUpdateArgs.parallel_tool_calls =
|
||||
presetRefresh.updateArgs.parallel_tool_calls;
|
||||
}
|
||||
const { updateArgs: resumeRefreshUpdateArgs, needsUpdate } =
|
||||
getResumeRefreshArgs(presetRefresh.updateArgs, agent);
|
||||
|
||||
if (Object.keys(resumeRefreshUpdateArgs).length > 0) {
|
||||
await updateAgentLLMConfig(
|
||||
if (needsUpdate) {
|
||||
agent = await updateAgentLLMConfig(
|
||||
agent.id,
|
||||
presetRefresh.modelHandle,
|
||||
resumeRefreshUpdateArgs,
|
||||
);
|
||||
// Refresh agent state after model update
|
||||
agent = await client.agents.retrieve(agent.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,9 +50,9 @@ describe("model preset refresh wiring", () => {
|
||||
expect(source).toContain(
|
||||
"const presetRefresh = getModelPresetUpdateForAgent(agent)",
|
||||
);
|
||||
expect(source).toContain("resumeRefreshUpdateArgs");
|
||||
expect(source).toContain("presetRefresh.updateArgs.max_output_tokens");
|
||||
expect(source).toContain("presetRefresh.updateArgs.parallel_tool_calls");
|
||||
// Field extraction + skip logic is handled by getResumeRefreshArgs helper
|
||||
expect(source).toContain("getResumeRefreshArgs(presetRefresh.updateArgs");
|
||||
expect(source).toContain("needsUpdate");
|
||||
expect(source).toContain("await updateAgentLLMConfig(");
|
||||
expect(source).toContain("presetRefresh.modelHandle");
|
||||
expect(source).not.toContain(
|
||||
@@ -69,13 +69,26 @@ describe("model preset refresh wiring", () => {
|
||||
expect(source).toContain(
|
||||
"const presetRefresh = getModelPresetUpdateForAgent(agent)",
|
||||
);
|
||||
expect(source).toContain("resumeRefreshUpdateArgs");
|
||||
expect(source).toContain("presetRefresh.updateArgs.max_output_tokens");
|
||||
expect(source).toContain("presetRefresh.updateArgs.parallel_tool_calls");
|
||||
// Field extraction + skip logic is handled by getResumeRefreshArgs helper
|
||||
expect(source).toContain("getResumeRefreshArgs(presetRefresh.updateArgs");
|
||||
expect(source).toContain("needsUpdate");
|
||||
expect(source).toContain("await updateAgentLLMConfig(");
|
||||
expect(source).toContain("presetRefresh.modelHandle");
|
||||
expect(source).not.toContain(
|
||||
"await updateAgentLLMConfig(\n agent.id,\n presetRefresh.modelHandle,\n presetRefresh.updateArgs,",
|
||||
);
|
||||
});
|
||||
|
||||
test("getResumeRefreshArgs helper owns field extraction and comparison", () => {
|
||||
const path = fileURLToPath(
|
||||
new URL("../../agent/model.ts", import.meta.url),
|
||||
);
|
||||
const source = readFileSync(path, "utf-8");
|
||||
|
||||
expect(source).toContain("export function getResumeRefreshArgs(");
|
||||
expect(source).toContain("RESUME_REFRESH_FIELDS");
|
||||
expect(source).toContain('"max_output_tokens"');
|
||||
expect(source).toContain('"parallel_tool_calls"');
|
||||
expect(source).toContain("needsUpdate");
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user