diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py
index 89f94099..0d3f7974 100644
--- a/letta/adapters/simple_llm_stream_adapter.py
+++ b/letta/adapters/simple_llm_stream_adapter.py
@@ -75,7 +75,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
                 run_id=self.run_id,
                 step_id=step_id,
             )
-        elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.deepseek]:
+        elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.deepseek, ProviderType.zai]:
             # Decide interface based on payload shape
             use_responses = "input" in request_data and "messages" not in request_data
             # No support for Responses API proxy
diff --git a/letta/services/streaming_service.py b/letta/services/streaming_service.py
index b589ca46..081051a2 100644
--- a/letta/services/streaming_service.py
+++ b/letta/services/streaming_service.py
@@ -467,7 +467,7 @@ class StreamingService:
 
     def _is_token_streaming_compatible(self, agent: AgentState) -> bool:
         """Check if agent's model supports token-level streaming."""
-        base_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek"]
+        base_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek", "zai"]
         google_letta_v1 = agent.agent_type == AgentType.letta_v1_agent and agent.llm_config.model_endpoint_type in [
             "google_ai",
             "google_vertex",