fix: google clients thinking config (#2414)

Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
This commit is contained in:
cthomas
2025-05-24 09:42:34 -07:00
committed by GitHub
parent 6c9fef84e9
commit eaeac54798
2 changed files with 6 additions and 20 deletions

View File

@@ -7,10 +7,7 @@ from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK
from letta.llm_api.google_vertex_client import GoogleVertexClient
from letta.log import get_logger
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message as PydanticMessage
from letta.settings import model_settings
from letta.tracing import trace_method
logger = get_logger(__name__)
@@ -20,18 +17,6 @@ class GoogleAIClient(GoogleVertexClient):
def _get_client(self):
return genai.Client(api_key=model_settings.gemini_api_key)
@trace_method
def build_request_data(
self,
messages: List[PydanticMessage],
llm_config: LLMConfig,
tools: List[dict],
force_tool_call: Optional[str] = None,
) -> dict:
request = super().build_request_data(messages, llm_config, tools, force_tool_call)
del request["config"]["thinking_config"]
return request
def get_gemini_endpoint_and_headers(
base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False

View File

@@ -241,13 +241,14 @@ class GoogleVertexClient(LLMClientBase):
)
request_data["config"]["tool_config"] = tool_config.model_dump()
# Add thinking_config
# Add thinking_config for flash
# If enable_reasoner is False, set thinking_budget to 0
# Otherwise, use the value from max_reasoning_tokens
thinking_config = ThinkingConfig(
thinking_budget=llm_config.max_reasoning_tokens if llm_config.enable_reasoner else 0,
)
request_data["config"]["thinking_config"] = thinking_config.model_dump()
if "flash" in llm_config.model:
thinking_config = ThinkingConfig(
thinking_budget=llm_config.max_reasoning_tokens if llm_config.enable_reasoner else 0,
)
request_data["config"]["thinking_config"] = thinking_config.model_dump()
return request_data