feat: set thinking level high for gemini 3 (#6330)
* base * include thoughts * update * clean --------- Co-authored-by: Letta Bot <noreply@letta.com>
This commit is contained in:
@@ -374,22 +374,28 @@ class GoogleVertexClient(LLMClientBase):
|
||||
# If enable_reasoner is False, set thinking_budget to 0
|
||||
# Otherwise, use the value from max_reasoning_tokens
|
||||
if self.is_reasoning_model(llm_config) or "flash" in llm_config.model:
|
||||
# Gemini reasoning models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure
|
||||
thinking_budget = llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model)
|
||||
if thinking_budget <= 0:
|
||||
logger.warning(
|
||||
f"Thinking budget of {thinking_budget} for Gemini reasoning model {llm_config.model}, this will likely cause tool call failures"
|
||||
if llm_config.model.startswith("gemini-3"):
|
||||
# letting thinking_level to default to high by not specifying thinking_budget
|
||||
thinking_config = ThinkingConfig(include_thoughts=True)
|
||||
else:
|
||||
# Gemini reasoning models may fail to call tools even with FunctionCallingConfigMode.ANY if thinking is fully disabled, set to minimum to prevent tool call failure
|
||||
thinking_budget = (
|
||||
llm_config.max_reasoning_tokens if llm_config.enable_reasoner else self.get_thinking_budget(llm_config.model)
|
||||
)
|
||||
# For models that require thinking mode (2.5 Pro, 3.x), override with minimum valid budget
|
||||
if llm_config.model.startswith("gemini-2.5-pro") or llm_config.model.startswith("gemini-3"):
|
||||
thinking_budget = 128
|
||||
if thinking_budget <= 0:
|
||||
logger.warning(
|
||||
f"Overriding thinking_budget to {thinking_budget} for model {llm_config.model} which requires thinking mode"
|
||||
f"Thinking budget of {thinking_budget} for Gemini reasoning model {llm_config.model}, this will likely cause tool call failures"
|
||||
)
|
||||
thinking_config = ThinkingConfig(
|
||||
thinking_budget=(thinking_budget),
|
||||
include_thoughts=(thinking_budget > 1),
|
||||
)
|
||||
# For models that require thinking mode (2.5 Pro, 3.x), override with minimum valid budget
|
||||
if llm_config.model.startswith("gemini-2.5-pro"):
|
||||
thinking_budget = 128
|
||||
logger.warning(
|
||||
f"Overriding thinking_budget to {thinking_budget} for model {llm_config.model} which requires thinking mode"
|
||||
)
|
||||
thinking_config = ThinkingConfig(
|
||||
thinking_budget=(thinking_budget),
|
||||
include_thoughts=(thinking_budget > 1),
|
||||
)
|
||||
request_data["config"]["thinking_config"] = thinking_config.model_dump()
|
||||
|
||||
return request_data
|
||||
|
||||
Reference in New Issue
Block a user