Fix: Change Z.ai context window to account for max_token subtraction (#9710)
fix zai context window (functionally [advertised context window] - [max output tokens]) and properly pass in max tokens so Z.ai doesn't default to 65k for GLM-5
This commit is contained in:
@@ -253,10 +253,10 @@ LLM_MAX_CONTEXT_WINDOW = {
|
||||
"deepseek-reasoner": 64000,
|
||||
# glm (Z.AI)
|
||||
"glm-4.5": 128000,
|
||||
"glm-4.6": 200000,
|
||||
"glm-4.7": 200000,
|
||||
"glm-5": 200000,
|
||||
"glm-5-code": 200000,
|
||||
"glm-4.6": 180000,
|
||||
"glm-4.7": 180000,
|
||||
"glm-5": 180000,
|
||||
"glm-5-code": 180000,
|
||||
## OpenAI models: https://platform.openai.com/docs/models/overview
|
||||
# gpt-5
|
||||
"gpt-5": 272000,
|
||||
|
||||
@@ -68,6 +68,12 @@ class ZAIClient(OpenAIClient):
|
||||
}
|
||||
}
|
||||
|
||||
# Z.ai's API uses max_tokens, not max_completion_tokens.
|
||||
# If max_completion_tokens is sent, Z.ai ignores it and falls back to its
|
||||
# default of 65536, silently truncating input to ~137K of the 200K context window.
|
||||
if "max_completion_tokens" in data:
|
||||
data["max_tokens"] = data.pop("max_completion_tokens")
|
||||
|
||||
# Sanitize empty text content — ZAI rejects empty text blocks
|
||||
if "messages" in data:
|
||||
for msg in data["messages"]:
|
||||
|
||||
@@ -12,12 +12,13 @@ from letta.schemas.providers.openai import OpenAIProvider
|
||||
|
||||
# Z.ai model context windows
|
||||
# Reference: https://docs.z.ai/
|
||||
# GLM-5 max context window is 200K tokens but max_output_tokens (default 16k) counts against that --> 180k
|
||||
MODEL_CONTEXT_WINDOWS = {
|
||||
"glm-4.5": 128000,
|
||||
"glm-4.6": 200000,
|
||||
"glm-4.7": 200000,
|
||||
"glm-5": 200000,
|
||||
"glm-5-code": 200000,
|
||||
"glm-4.6": 180000,
|
||||
"glm-4.7": 180000,
|
||||
"glm-5": 180000,
|
||||
"glm-5-code": 180000,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user