feat: add streaming support to vertex client (#5106)

This commit is contained in:
cthomas
2025-10-02 21:47:18 -07:00
committed by Caren Thomas
parent af2a28b1b1
commit 6b6b82a07a

View File

@@ -1,6 +1,6 @@
import json
import uuid
from typing import List, Optional
from typing import AsyncIterator, List, Optional
from google import genai
from google.genai import errors
@@ -138,6 +138,15 @@ class GoogleVertexClient(LLMClientBase):
raise RuntimeError("Failed to get response data after all retries")
return response_data
@trace_method
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
client = self._get_client()
return await client.aio.models.generate_content_stream(
model=llm_config.model,
contents=request_data["contents"],
config=request_data["config"],
)
@staticmethod
def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.