feat: add streaming support to vertex client (#5106)
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import json
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
from typing import AsyncIterator, List, Optional
|
||||
|
||||
from google import genai
|
||||
from google.genai import errors
|
||||
@@ -138,6 +138,15 @@ class GoogleVertexClient(LLMClientBase):
|
||||
raise RuntimeError("Failed to get response data after all retries")
|
||||
return response_data
|
||||
|
||||
@trace_method
|
||||
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
|
||||
client = self._get_client()
|
||||
return await client.aio.models.generate_content_stream(
|
||||
model=llm_config.model,
|
||||
contents=request_data["contents"],
|
||||
config=request_data["config"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
|
||||
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.
|
||||
|
||||
Reference in New Issue
Block a user