fix: retry on 500 and 503 for gemini [LET-4185]

* handle 500 and 503

* timeout

---------

Co-authored-by: Letta Bot <noreply@letta.com>
This commit is contained in:
jnjpng
2025-09-03 14:02:58 -07:00
committed by GitHub
parent dd1a9a0b7c
commit 12d8242d94
2 changed files with 17 additions and 5 deletions

View File

@@ -3,6 +3,7 @@ import uuid
from typing import List, Optional
from google import genai
from google.genai import errors
from google.genai.types import (
FunctionCallingConfig,
FunctionCallingConfigMode,
@@ -67,11 +68,21 @@ class GoogleVertexClient(LLMClientBase):
retry_count = 1
should_retry = True
while should_retry and retry_count <= self.MAX_RETRIES:
response = await client.aio.models.generate_content(
model=llm_config.model,
contents=request_data["contents"],
config=request_data["config"],
)
try:
response = await client.aio.models.generate_content(
model=llm_config.model,
contents=request_data["contents"],
config=request_data["config"],
)
except errors.APIError as e:
# Retry on 503 and 500 errors as well, usually ephemeral from Gemini
if e.code == 503 or e.code == 500:
logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
retry_count += 1
continue
raise e
except Exception as e:
raise e
response_data = response.model_dump()
is_malformed_function_call = self.is_malformed_function_call(response_data)
if is_malformed_function_call:

View File

@@ -1334,6 +1334,7 @@ def test_background_token_streaming_tool_call(
messages=messages_to_send,
stream_tokens=True,
background=True,
request_options={"timeout_in_seconds": 300},
)
verify_token_streaming = (
llm_config.model_endpoint_type in ["anthropic", "openai", "bedrock"] and "claude-3-5-sonnet" not in llm_config.model