fix: gemini 2.5 thinking models fail to call functions if thinking is fully disabled

Co-authored-by: Jin Peng <jinjpeng@Jins-MacBook-Pro.local>
This commit is contained in:
jnjpng
2025-08-08 16:34:32 -07:00
committed by GitHub
parent c6002744e6
commit 243a2b65e0
3 changed files with 14 additions and 3 deletions

View File

@@ -15,7 +15,7 @@ def json_dumps(data, indent=2) -> str:
try:
return obj.decode("utf-8")
except Exception:
print(f"Error decoding bytes as utf-8: {obj}")
# TODO: this is to handle Gemini thought signatures, b64 decode this back to bytes when sending back to Gemini
return base64.b64encode(obj).decode("utf-8")
raise TypeError(f"Type {type(obj)} not serializable")

View File

@@ -254,8 +254,11 @@ class GoogleVertexClient(LLMClientBase):
# If enable_reasoner is False, set thinking_budget to 0
# Otherwise, use the value from max_reasoning_tokens
if "flash" in llm_config.model:
# Gemini flash models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure
thinking_config = ThinkingConfig(
thinking_budget=llm_config.max_reasoning_tokens if llm_config.enable_reasoner else 0,
thinking_budget=(
llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model)
),
)
request_data["config"]["thinking_config"] = thinking_config.model_dump()
@@ -292,7 +295,6 @@ class GoogleVertexClient(LLMClientBase):
}
}
"""
# print(response_data)
response = GenerateContentResponse(**response_data)
try:
@@ -494,6 +496,14 @@ class GoogleVertexClient(LLMClientBase):
"required": ["name", "args"],
}
def get_thinking_budget(self, model: str) -> bool:
if model_settings.gemini_force_minimum_thinking_budget:
if all(substring in model for substring in ["2.5", "flash", "lite"]):
return 512
elif all(substring in model for substring in ["2.5", "flash"]):
return 1
return 0
@trace_method
def handle_llm_error(self, e: Exception) -> Exception:
# Fallback to base implementation

View File

@@ -144,6 +144,7 @@ class ModelSettings(BaseSettings):
# google ai
gemini_api_key: Optional[str] = None
gemini_base_url: str = "https://generativelanguage.googleapis.com/"
gemini_force_minimum_thinking_budget: bool = False
# google vertex
google_cloud_project: Optional[str] = None