diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py index 89f94099..0d3f7974 100644 --- a/letta/adapters/simple_llm_stream_adapter.py +++ b/letta/adapters/simple_llm_stream_adapter.py @@ -75,7 +75,7 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): run_id=self.run_id, step_id=step_id, ) - elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.deepseek]: + elif self.llm_config.model_endpoint_type in [ProviderType.openai, ProviderType.deepseek, ProviderType.zai]: # Decide interface based on payload shape use_responses = "input" in request_data and "messages" not in request_data # No support for Responses API proxy diff --git a/letta/services/streaming_service.py b/letta/services/streaming_service.py index b589ca46..081051a2 100644 --- a/letta/services/streaming_service.py +++ b/letta/services/streaming_service.py @@ -467,7 +467,7 @@ class StreamingService: def _is_token_streaming_compatible(self, agent: AgentState) -> bool: """Check if agent's model supports token-level streaming.""" - base_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek"] + base_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock", "deepseek", "zai"] google_letta_v1 = agent.agent_type == AgentType.letta_v1_agent and agent.llm_config.model_endpoint_type in [ "google_ai", "google_vertex",