fix(core): update default value [LET-4117] (#4321)

* fix(core): update default value

* fix: just stage just publish

* fix(core): temporary hardcoding of the anthropic max tokens

* fix(core): patch the gemini
This commit is contained in:
Charles Packer
2025-08-29 16:42:19 -07:00
committed by GitHub
parent b8c2f42d33
commit 5f9210b808
4 changed files with 10 additions and 5 deletions

View File

@@ -176,11 +176,14 @@ class AnthropicClient(LLMClientBase):
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
if not llm_config.max_tokens:
raise ValueError("Max tokens must be set for anthropic")
# TODO strip this default once we add provider-specific defaults
max_output_tokens = 4096 # the minimum max tokens (for Haiku 3)
else:
max_output_tokens = llm_config.max_tokens
data = {
"model": llm_config.model,
"max_tokens": llm_config.max_tokens,
"max_tokens": max_output_tokens,
"temperature": llm_config.temperature,
}

View File

@@ -229,10 +229,12 @@ class GoogleVertexClient(LLMClientBase):
"contents": contents,
"config": {
"temperature": llm_config.temperature,
"max_output_tokens": llm_config.max_tokens,
"tools": formatted_tools,
},
}
# Make tokens is optional
if llm_config.max_tokens:
request_data["config"]["max_output_tokens"] = llm_config.max_tokens
if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
request_data["config"]["response_mime_type"] = "application/json"

View File

@@ -51,7 +51,7 @@ class LLMConfig(BaseModel):
description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
)
max_tokens: Optional[int] = Field(
4096,
None,
description="The maximum number of tokens to generate. If not set, the model will use its default value.",
)
enable_reasoner: bool = Field(

View File

@@ -1212,7 +1212,7 @@ def test_preview_payload(client: LettaSDKClient):
assert tool["function"]["strict"] is True
assert payload["frequency_penalty"] == 1.0
assert payload["max_completion_tokens"] == 4096
assert payload["max_completion_tokens"] is None
assert payload["temperature"] == 0.7
assert payload["parallel_tool_calls"] is False
assert payload["tool_choice"] == "required"