diff --git a/letta/adapters/simple_llm_stream_adapter.py b/letta/adapters/simple_llm_stream_adapter.py index 1b575147..1cd2ee23 100644 --- a/letta/adapters/simple_llm_stream_adapter.py +++ b/letta/adapters/simple_llm_stream_adapter.py @@ -112,7 +112,12 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter): # Start the streaming request (map provider errors to common LLMError types) try: - stream = await self.llm_client.stream_async(request_data, self.llm_config) + # Gemini uses async generator pattern (no await) to maintain connection lifecycle + # Other providers return awaitables that resolve to iterators + if self.llm_config.model_endpoint_type in [ProviderType.google_ai, ProviderType.google_vertex]: + stream = self.llm_client.stream_async(request_data, self.llm_config) + else: + stream = await self.llm_client.stream_async(request_data, self.llm_config) except Exception as e: raise self.llm_client.handle_llm_error(e) diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index b9c19e83..26b63f44 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -3,8 +3,7 @@ import json import uuid from typing import AsyncIterator, List, Optional -from google import genai -from google.genai import errors +from google.genai import Client, errors from google.genai.types import ( FunctionCallingConfig, FunctionCallingConfigMode, @@ -51,7 +50,7 @@ class GoogleVertexClient(LLMClientBase): def _get_client(self): timeout_ms = int(settings.llm_request_timeout_seconds * 1000) - return genai.Client( + return Client( vertexai=True, project=model_settings.google_cloud_project, location=model_settings.google_cloud_location, @@ -142,11 +141,15 @@ class GoogleVertexClient(LLMClientBase): @trace_method async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncIterator[GenerateContentResponse]: client = self._get_client() - return await client.aio.models.generate_content_stream( + response = await client.aio.models.generate_content_stream( model=llm_config.model, contents=request_data["contents"], config=request_data["config"], ) + # Direct yield - keeps response alive in generator's local scope throughout iteration + # This is required because the SDK's connection lifecycle is tied to the response object + async for chunk in response: + yield chunk @staticmethod def add_dummy_model_messages(messages: List[dict]) -> List[dict]: diff --git a/pyproject.toml b/pyproject.toml index 192a8bc5..24d1cfe6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ dependencies = [ "ruff[dev]>=0.12.10", "trafilatura", "readability-lxml", - "google-genai>=1.15.0", + "google-genai>=1.52.0", "datadog>=0.49.1", "psutil>=5.9.0", ] diff --git a/uv.lock b/uv.lock index dc7c51db..fb17b0ca 100644 --- a/uv.lock +++ b/uv.lock @@ -1487,7 +1487,7 @@ wheels = [ [[package]] name = "google-genai" -version = "1.31.0" +version = "1.52.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1499,9 +1499,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e0/1b/da30fa6e2966942d7028a58eb7aa7d04544dcc3aa66194365b2e0adac570/google_genai-1.31.0.tar.gz", hash = "sha256:8572b47aa684357c3e5e10d290ec772c65414114939e3ad2955203e27cd2fcbc", size = 233482, upload-time = "2025-08-18T23:40:21.733Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/4e/0ad8585d05312074bb69711b2d81cfed69ce0ae441913d57bf169bed20a7/google_genai-1.52.0.tar.gz", hash = "sha256:a74e8a4b3025f23aa98d6a0f84783119012ca6c336fd68f73c5d2b11465d7fc5", size = 258743, upload-time = "2025-11-21T02:18:55.742Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/41/27/1525bc9cbec58660f0842ebcbfe910a1dde908c2672373804879666e0bb8/google_genai-1.31.0-py3-none-any.whl", hash = "sha256:5c6959bcf862714e8ed0922db3aaf41885bacf6318751b3421bf1e459f78892f", size = 231876, upload-time = "2025-08-18T23:40:20.385Z" }, + { url = "https://files.pythonhosted.org/packages/ec/66/03f663e7bca7abe9ccfebe6cb3fe7da9a118fd723a5abb278d6117e7990e/google_genai-1.52.0-py3-none-any.whl", hash = "sha256:c8352b9f065ae14b9322b949c7debab8562982f03bf71d44130cd2b798c20743", size = 261219, upload-time = "2025-11-21T02:18:54.515Z" }, ] [[package]] @@ -2509,7 +2509,7 @@ requires-dist = [ { name = "faker", specifier = ">=36.1.0" }, { name = "fastapi", marker = "extra == 'desktop'", specifier = ">=0.115.6" }, { name = "fastapi", marker = "extra == 'server'", specifier = ">=0.115.6" }, - { name = "google-genai", specifier = ">=1.15.0" }, + { name = "google-genai", specifier = ">=1.52.0" }, { name = "granian", extras = ["uvloop", "reload"], marker = "extra == 'experimental'", specifier = ">=2.3.2" }, { name = "grpcio", specifier = ">=1.68.1" }, { name = "grpcio-tools", specifier = ">=1.68.1" },