Fix: Change Z.ai context window to account for max_token subtraction (#9710)

fix zai context window (functionally [advertised context window] - [max output tokens]) and properly pass in max tokens so Z.ai doesn't default to 65k for GLM-5
This commit is contained in:
amysguan
2026-02-27 17:10:46 -08:00
committed by Caren Thomas
parent aa66e81a71
commit 612a2ae98b
3 changed files with 15 additions and 8 deletions

View File

@@ -253,10 +253,10 @@ LLM_MAX_CONTEXT_WINDOW = {
"deepseek-reasoner": 64000,
# glm (Z.AI)
"glm-4.5": 128000,
"glm-4.6": 200000,
"glm-4.7": 200000,
"glm-5": 200000,
"glm-5-code": 200000,
"glm-4.6": 180000,
"glm-4.7": 180000,
"glm-5": 180000,
"glm-5-code": 180000,
## OpenAI models: https://platform.openai.com/docs/models/overview
# gpt-5
"gpt-5": 272000,

View File

@@ -68,6 +68,12 @@ class ZAIClient(OpenAIClient):
}
}
# Z.ai's API uses max_tokens, not max_completion_tokens.
# If max_completion_tokens is sent, Z.ai ignores it and falls back to its
# default of 65536, silently truncating input to ~137K of the 200K context window.
if "max_completion_tokens" in data:
data["max_tokens"] = data.pop("max_completion_tokens")
# Sanitize empty text content — ZAI rejects empty text blocks
if "messages" in data:
for msg in data["messages"]:

View File

@@ -12,12 +12,13 @@ from letta.schemas.providers.openai import OpenAIProvider
# Z.ai model context windows
# Reference: https://docs.z.ai/
# GLM-5 max context window is 200K tokens but max_output_tokens (default 16k) counts against that --> 180k
MODEL_CONTEXT_WINDOWS = {
"glm-4.5": 128000,
"glm-4.6": 200000,
"glm-4.7": 200000,
"glm-5": 200000,
"glm-5-code": 200000,
"glm-4.6": 180000,
"glm-4.7": 180000,
"glm-5": 180000,
"glm-5-code": 180000,
}