From a11ba9710ca0624b4bc1c51385d5131e96d3ccbd Mon Sep 17 00:00:00 2001 From: Kevin Lin Date: Fri, 27 Feb 2026 11:15:37 -0800 Subject: [PATCH] feat(core): increase Gemini timeout to 10 minutes (#9714) --- conf.yaml | 1 + letta/llm_api/google_ai_client.py | 6 +++--- letta/settings.py | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/conf.yaml b/conf.yaml index b3b2eb82..cbb160c8 100644 --- a/conf.yaml +++ b/conf.yaml @@ -260,6 +260,7 @@ model: base_url: https://generativelanguage.googleapis.com/ force_minimum_thinking_budget: false max_retries: 5 + timeout_seconds: 600.0 # Google Vertex (-> GOOGLE_CLOUD_*) # google_cloud: diff --git a/letta/llm_api/google_ai_client.py b/letta/llm_api/google_ai_client.py index f765a121..c41d9b94 100644 --- a/letta/llm_api/google_ai_client.py +++ b/letta/llm_api/google_ai_client.py @@ -9,7 +9,7 @@ from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK from letta.llm_api.google_vertex_client import GoogleVertexClient from letta.log import get_logger from letta.schemas.llm_config import LLMConfig -from letta.settings import model_settings, settings +from letta.settings import model_settings logger = get_logger(__name__) @@ -18,7 +18,7 @@ class GoogleAIClient(GoogleVertexClient): provider_label = "Google AI" def _get_client(self, llm_config: Optional[LLMConfig] = None): - timeout_ms = int(settings.llm_request_timeout_seconds * 1000) + timeout_ms = int(model_settings.gemini_timeout_seconds * 1000) api_key = None if llm_config: api_key, _, _ = self.get_byok_overrides(llm_config) @@ -30,7 +30,7 @@ class GoogleAIClient(GoogleVertexClient): ) async def _get_client_async(self, llm_config: Optional[LLMConfig] = None): - timeout_ms = int(settings.llm_request_timeout_seconds * 1000) + timeout_ms = int(model_settings.gemini_timeout_seconds * 1000) api_key = None if llm_config: api_key, _, _ = await self.get_byok_overrides_async(llm_config) diff --git a/letta/settings.py b/letta/settings.py index 3f236099..aa316385 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -204,6 +204,7 @@ class ModelSettings(BaseSettings): gemini_base_url: str = "https://generativelanguage.googleapis.com/" gemini_force_minimum_thinking_budget: bool = False gemini_max_retries: int = 5 + gemini_timeout_seconds: float = 600.0 # google vertex google_cloud_project: Optional[str] = None