diff --git a/letta/constants.py b/letta/constants.py index 7121913a..04ca17d0 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -252,8 +252,11 @@ LLM_MAX_CONTEXT_WINDOW = { "deepseek-chat": 64000, "deepseek-reasoner": 64000, # glm (Z.AI) - "glm-4.6": 200000, "glm-4.5": 128000, + "glm-4.6": 200000, + "glm-4.7": 200000, + "glm-5": 200000, + "glm-5-code": 200000, ## OpenAI models: https://platform.openai.com/docs/models/overview # gpt-5 "gpt-5": 272000, diff --git a/letta/llm_api/zai_client.py b/letta/llm_api/zai_client.py index e2625e8a..d0ad8075 100644 --- a/letta/llm_api/zai_client.py +++ b/letta/llm_api/zai_client.py @@ -17,7 +17,12 @@ from letta.settings import model_settings def is_zai_reasoning_model(model_name: str) -> bool: """Check if the model is a ZAI reasoning model (GLM-4.5+).""" - return model_name.startswith("glm-4.5") or model_name.startswith("glm-4.6") or model_name.startswith("glm-4.7") + return ( + model_name.startswith("glm-4.5") + or model_name.startswith("glm-4.6") + or model_name.startswith("glm-4.7") + or model_name.startswith("glm-5") + ) class ZAIClient(OpenAIClient): diff --git a/letta/model_specs/model_prices_and_context_window.json b/letta/model_specs/model_prices_and_context_window.json index 81e4729f..7a63e5a5 100644 --- a/letta/model_specs/model_prices_and_context_window.json +++ b/letta/model_specs/model_prices_and_context_window.json @@ -29290,6 +29290,36 @@ "supports_vision": true, "supports_web_search": true }, + "zai/glm-5": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 2e-7, + "input_cost_per_token": 1e-6, + "output_cost_per_token": 3.2e-6, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, + "zai/glm-5-code": { + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 1.2e-6, + "output_cost_per_token": 5e-6, + "litellm_provider": "zai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "mode": "chat", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://docs.z.ai/guides/overview/pricing" + }, "zai/glm-4.7": { "cache_creation_input_token_cost": 0, "cache_read_input_token_cost": 1.1e-7, diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 5479898f..9639a2a2 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -473,7 +473,10 @@ class LLMConfig(BaseModel): @classmethod def is_zai_reasoning_model(cls, config: "LLMConfig") -> bool: return config.model_endpoint_type == "zai" and ( - config.model.startswith("glm-4.5") or config.model.startswith("glm-4.6") or config.model.startswith("glm-4.7") + config.model.startswith("glm-4.5") + or config.model.startswith("glm-4.6") + or config.model.startswith("glm-4.7") + or config.model.startswith("glm-5") ) @classmethod @@ -499,7 +502,7 @@ class LLMConfig(BaseModel): if "gemini" in model: return True # ZAI GLM reasoning models - if "glm-4.5" in model or "glm-4.6" in model or "glm-4.7" in model: + if "glm-4.5" in model or "glm-4.6" in model or "glm-4.7" in model or "glm-5" in model: return True # DeepSeek reasoning models if "deepseek-r1" in model or "deepseek-reasoner" in model: diff --git a/letta/schemas/providers/zai.py b/letta/schemas/providers/zai.py index f62d119d..8682e4b1 100644 --- a/letta/schemas/providers/zai.py +++ b/letta/schemas/providers/zai.py @@ -16,6 +16,8 @@ MODEL_CONTEXT_WINDOWS = { "glm-4.5": 128000, "glm-4.6": 200000, "glm-4.7": 200000, + "glm-5": 200000, + "glm-5-code": 200000, } diff --git a/tests/integration_test_send_message_v2.py b/tests/integration_test_send_message_v2.py index d5c5af9a..4ef0cd02 100644 --- a/tests/integration_test_send_message_v2.py +++ b/tests/integration_test_send_message_v2.py @@ -37,7 +37,7 @@ all_configs = [ "openai-gpt-5.json", "claude-4-5-sonnet.json", "gemini-2.5-pro.json", - "zai-glm-4.6.json", + "zai-glm-5.json", ] diff --git a/tests/model_settings/zai-glm-5.json b/tests/model_settings/zai-glm-5.json new file mode 100644 index 00000000..7c94aecd --- /dev/null +++ b/tests/model_settings/zai-glm-5.json @@ -0,0 +1,13 @@ +{ + "handle": "zai/glm-5", + "model_settings": { + "provider_type": "zai", + "temperature": 1.0, + "max_output_tokens": 4096, + "parallel_tool_calls": false, + "thinking": { + "type": "enabled", + "clear_thinking": false + } + } +}