fix: upgrade google-genai sdk version and fix gemini 3 streaming (#6437)

* base

* base

---------

Co-authored-by: Letta Bot <noreply@letta.com>
This commit is contained in:
jnjpng
2025-11-28 18:31:49 -08:00
committed by Caren Thomas
parent c4d5c380d6
commit c6df306ccf
4 changed files with 18 additions and 10 deletions

View File

@@ -112,6 +112,11 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
# Start the streaming request (map provider errors to common LLMError types)
try:
# Gemini uses async generator pattern (no await) to maintain connection lifecycle
# Other providers return awaitables that resolve to iterators
if self.llm_config.model_endpoint_type in [ProviderType.google_ai, ProviderType.google_vertex]:
stream = self.llm_client.stream_async(request_data, self.llm_config)
else:
stream = await self.llm_client.stream_async(request_data, self.llm_config)
except Exception as e:
raise self.llm_client.handle_llm_error(e)

View File

@@ -3,8 +3,7 @@ import json
import uuid
from typing import AsyncIterator, List, Optional
from google import genai
from google.genai import errors
from google.genai import Client, errors
from google.genai.types import (
FunctionCallingConfig,
FunctionCallingConfigMode,
@@ -51,7 +50,7 @@ class GoogleVertexClient(LLMClientBase):
def _get_client(self):
timeout_ms = int(settings.llm_request_timeout_seconds * 1000)
return genai.Client(
return Client(
vertexai=True,
project=model_settings.google_cloud_project,
location=model_settings.google_cloud_location,
@@ -142,11 +141,15 @@ class GoogleVertexClient(LLMClientBase):
@trace_method
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]:
client = self._get_client()
return await client.aio.models.generate_content_stream(
response = await client.aio.models.generate_content_stream(
model=llm_config.model,
contents=request_data["contents"],
config=request_data["config"],
)
# Direct yield - keeps response alive in generator's local scope throughout iteration
# This is required because the SDK's connection lifecycle is tied to the response object
async for chunk in response:
yield chunk
@staticmethod
def add_dummy_model_messages(messages: List[dict]) -> List[dict]:

View File

@@ -69,7 +69,7 @@ dependencies = [
"ruff[dev]>=0.12.10",
"trafilatura",
"readability-lxml",
"google-genai>=1.15.0",
"google-genai>=1.52.0",
"datadog>=0.49.1",
"psutil>=5.9.0",
]

8
uv.lock generated
View File

@@ -1487,7 +1487,7 @@ wheels = [
[[package]]
name = "google-genai"
version = "1.31.0"
version = "1.52.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@@ -1499,9 +1499,9 @@ dependencies = [
{ name = "typing-extensions" },
{ name = "websockets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e0/1b/da30fa6e2966942d7028a58eb7aa7d04544dcc3aa66194365b2e0adac570/google_genai-1.31.0.tar.gz", hash = "sha256:8572b47aa684357c3e5e10d290ec772c65414114939e3ad2955203e27cd2fcbc", size = 233482, upload-time = "2025-08-18T23:40:21.733Z" }
sdist = { url = "https://files.pythonhosted.org/packages/09/4e/0ad8585d05312074bb69711b2d81cfed69ce0ae441913d57bf169bed20a7/google_genai-1.52.0.tar.gz", hash = "sha256:a74e8a4b3025f23aa98d6a0f84783119012ca6c336fd68f73c5d2b11465d7fc5", size = 258743, upload-time = "2025-11-21T02:18:55.742Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/41/27/1525bc9cbec58660f0842ebcbfe910a1dde908c2672373804879666e0bb8/google_genai-1.31.0-py3-none-any.whl", hash = "sha256:5c6959bcf862714e8ed0922db3aaf41885bacf6318751b3421bf1e459f78892f", size = 231876, upload-time = "2025-08-18T23:40:20.385Z" },
{ url = "https://files.pythonhosted.org/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" },
]
[[package]]
@@ -2509,7 +2509,7 @@ requires-dist = [
{ name = "faker", specifier = ">=36.1.0" },
{ name = "fastapi", marker = "extra == 'desktop'", specifier = ">=0.115.6" },
{ name = "fastapi", marker = "extra == 'server'", specifier = ">=0.115.6" },
{ name = "google-genai", specifier = ">=1.15.0" },
{ name = "google-genai", specifier = ">=1.52.0" },
{ name = "granian", extras = ["uvloop", "reload"], marker = "extra == 'experimental'", specifier = ">=2.3.2" },
{ name = "grpcio", specifier = ">=1.68.1" },
{ name = "grpcio-tools", specifier = ">=1.68.1" },