From 4e0751f06951f519349a88b6b08f5711341cb911 Mon Sep 17 00:00:00 2001 From: jnjpng Date: Thu, 5 Mar 2026 17:28:25 -0800 Subject: [PATCH] fix: send max_tokens null to explicitly unset stale values on model switch (#1281) Co-authored-by: Letta Code --- src/agent/model.ts | 10 +++++-- src/agent/modify.ts | 8 ++++-- src/models.json | 70 ++++++++++++++++++++++++++++----------------- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/src/agent/model.ts b/src/agent/model.ts index 2830372..ae2871e 100644 --- a/src/agent/model.ts +++ b/src/agent/model.ts @@ -282,7 +282,10 @@ export function getResumeRefreshArgs( // Extract only the resume-scoped fields from the full preset for (const field of RESUME_REFRESH_FIELDS) { const value = presetUpdateArgs[field]; - if (field === "max_output_tokens" && typeof value === "number") { + if ( + field === "max_output_tokens" && + (typeof value === "number" || value === null) + ) { updateArgs[field] = value; } else if (field === "parallel_tool_calls" && typeof value === "boolean") { updateArgs[field] = value; @@ -295,7 +298,10 @@ export function getResumeRefreshArgs( // Compare against the agent's current values const currentMaxTokens = agent.llm_config?.max_tokens; - const wantMaxTokens = updateArgs.max_output_tokens as number | undefined; + const wantMaxTokens = updateArgs.max_output_tokens as + | number + | null + | undefined; const currentParallel = agent.model_settings?.parallel_tool_calls; const wantParallel = updateArgs.parallel_tool_calls as boolean | undefined; diff --git a/src/agent/modify.ts b/src/agent/modify.ts index 08925f4..bae743d 100644 --- a/src/agent/modify.ts +++ b/src/agent/modify.ts @@ -161,8 +161,11 @@ function buildModelSettings( // Apply max_output_tokens only when provider_type is present. // Without provider_type the discriminated union rejects the payload (e.g. MiniMax). + // Pass null through so the server can explicitly unset max_output_tokens + // (prevents stale values lingering from a previous model). if ( - typeof updateArgs?.max_output_tokens === "number" && + (typeof updateArgs?.max_output_tokens === "number" || + updateArgs?.max_output_tokens === null) && "provider_type" in settings ) { (settings as Record).max_output_tokens = @@ -207,7 +210,8 @@ export async function updateAgentLLMConfig( ...(isMinimax && { parallel_tool_calls: true }), ...(hasModelSettings && { model_settings: modelSettings }), ...(contextWindow && { context_window_limit: contextWindow }), - ...(typeof updateArgs?.max_output_tokens === "number" && { + ...((typeof updateArgs?.max_output_tokens === "number" || + updateArgs?.max_output_tokens === null) && { max_tokens: updateArgs.max_output_tokens, }), }); diff --git a/src/models.json b/src/models.json index ea3a345..fd2ec59 100644 --- a/src/models.json +++ b/src/models.json @@ -470,7 +470,7 @@ "reasoning_effort": "none", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -483,7 +483,7 @@ "reasoning_effort": "low", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -496,7 +496,7 @@ "reasoning_effort": "medium", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -509,7 +509,7 @@ "reasoning_effort": "high", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -522,7 +522,7 @@ "reasoning_effort": "xhigh", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -535,7 +535,7 @@ "reasoning_effort": "none", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -548,7 +548,7 @@ "reasoning_effort": "low", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -561,7 +561,7 @@ "reasoning_effort": "medium", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -574,7 +574,7 @@ "reasoning_effort": "high", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -587,7 +587,7 @@ "reasoning_effort": "xhigh", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -600,7 +600,7 @@ "reasoning_effort": "none", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -613,7 +613,7 @@ "reasoning_effort": "low", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -626,7 +626,7 @@ "reasoning_effort": "medium", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -640,7 +640,7 @@ "reasoning_effort": "high", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -653,7 +653,7 @@ "reasoning_effort": "xhigh", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -666,7 +666,7 @@ "reasoning_effort": "none", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -679,7 +679,7 @@ "reasoning_effort": "low", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -692,7 +692,7 @@ "reasoning_effort": "medium", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -706,7 +706,7 @@ "reasoning_effort": "high", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -719,7 +719,7 @@ "reasoning_effort": "xhigh", "verbosity": "medium", "context_window": 272000, - "max_output_tokens": 128000, + "max_output_tokens": null, "parallel_tool_calls": true } }, @@ -1074,42 +1074,60 @@ "handle": "google_ai/gemini-2.5-flash", "label": "Gemini 2.5 Flash", "description": "Google's fastest model", - "updateArgs": { "context_window": 180000, "parallel_tool_calls": true } + "updateArgs": { + "context_window": 180000, + "parallel_tool_calls": true + } }, { "id": "gemini-pro", "handle": "google_ai/gemini-2.5-pro", "label": "Gemini 2.5 Pro", "description": "Google's last generation flagship model", - "updateArgs": { "context_window": 180000, "parallel_tool_calls": true } + "updateArgs": { + "context_window": 180000, + "parallel_tool_calls": true + } }, { "id": "gpt-4.1", "handle": "openai/gpt-4.1", "label": "GPT-4.1", "description": "OpenAI's most recent non-reasoner model", - "updateArgs": { "context_window": 1047576, "parallel_tool_calls": true } + "updateArgs": { + "context_window": 1047576, + "parallel_tool_calls": true + } }, { "id": "gpt-4.1-mini", "handle": "openai/gpt-4.1-mini-2025-04-14", "label": "GPT-4.1-Mini", "description": "OpenAI's most recent non-reasoner model (mini version)", - "updateArgs": { "context_window": 1047576, "parallel_tool_calls": true } + "updateArgs": { + "context_window": 1047576, + "parallel_tool_calls": true + } }, { "id": "gpt-4.1-nano", "handle": "openai/gpt-4.1-nano-2025-04-14", "label": "GPT-4.1-Nano", "description": "OpenAI's most recent non-reasoner model (nano version)", - "updateArgs": { "context_window": 1047576, "parallel_tool_calls": true } + "updateArgs": { + "context_window": 1047576, + "parallel_tool_calls": true + } }, { "id": "o4-mini", "handle": "openai/o4-mini", "label": "o4-mini", "description": "OpenAI's latest o-series reasoning model", - "updateArgs": { "context_window": 180000, "parallel_tool_calls": true } + "updateArgs": { + "context_window": 180000, + "parallel_tool_calls": true + } }, { "id": "gemini-3-vertex",