fix: set thinking budget for vertex tokens (#2367)

This commit is contained in:
cthomas
2025-05-23 09:07:32 -07:00
committed by GitHub
parent 9ac0fa2aef
commit f9d2793caf
2 changed files with 19 additions and 5 deletions

View File

@@ -7,7 +7,10 @@ from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK
from letta.llm_api.google_vertex_client import GoogleVertexClient
from letta.log import get_logger
from letta.schemas.llm_config import LLMConfig
from letta.schemas.message import Message as PydanticMessage
from letta.settings import model_settings
from letta.tracing import trace_method
logger = get_logger(__name__)
@@ -17,6 +20,18 @@ class GoogleAIClient(GoogleVertexClient):
def _get_client(self):
return genai.Client(api_key=model_settings.gemini_api_key)
@trace_method
def build_request_data(
self,
messages: List[PydanticMessage],
llm_config: LLMConfig,
tools: List[dict],
force_tool_call: Optional[str] = None,
) -> dict:
request = super().build_request_data(messages, llm_config, tools, force_tool_call)
del request["config"]["thinking_config"]
return request
def get_gemini_endpoint_and_headers(
base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False

View File

@@ -244,11 +244,10 @@ class GoogleVertexClient(LLMClientBase):
# Add thinking_config
# If enable_reasoner is False, set thinking_budget to 0
# Otherwise, use the value from max_reasoning_tokens
if llm_config.enable_reasoner:
thinking_config = ThinkingConfig(
thinking_budget=llm_config.max_reasoning_tokens,
)
request_data["config"]["thinking_config"] = thinking_config.model_dump()
thinking_config = ThinkingConfig(
thinking_budget=llm_config.max_reasoning_tokens if llm_config.enable_reasoner else 0,
)
request_data["config"]["thinking_config"] = thinking_config.model_dump()
return request_data