fix(core): update default value [LET-4117] (#4321)
* fix(core): update default value * fix: just stage just publish * fix(core): temporary hardcoding of the anthropic max tokens * fix(core): patch the gemini
This commit is contained in:
@@ -176,11 +176,14 @@ class AnthropicClient(LLMClientBase):
|
||||
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
||||
|
||||
if not llm_config.max_tokens:
|
||||
raise ValueError("Max tokens must be set for anthropic")
|
||||
# TODO strip this default once we add provider-specific defaults
|
||||
max_output_tokens = 4096 # the minimum max tokens (for Haiku 3)
|
||||
else:
|
||||
max_output_tokens = llm_config.max_tokens
|
||||
|
||||
data = {
|
||||
"model": llm_config.model,
|
||||
"max_tokens": llm_config.max_tokens,
|
||||
"max_tokens": max_output_tokens,
|
||||
"temperature": llm_config.temperature,
|
||||
}
|
||||
|
||||
|
||||
@@ -229,10 +229,12 @@ class GoogleVertexClient(LLMClientBase):
|
||||
"contents": contents,
|
||||
"config": {
|
||||
"temperature": llm_config.temperature,
|
||||
"max_output_tokens": llm_config.max_tokens,
|
||||
"tools": formatted_tools,
|
||||
},
|
||||
}
|
||||
# Make tokens is optional
|
||||
if llm_config.max_tokens:
|
||||
request_data["config"]["max_output_tokens"] = llm_config.max_tokens
|
||||
|
||||
if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
|
||||
request_data["config"]["response_mime_type"] = "application/json"
|
||||
|
||||
@@ -51,7 +51,7 @@ class LLMConfig(BaseModel):
|
||||
description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
|
||||
)
|
||||
max_tokens: Optional[int] = Field(
|
||||
4096,
|
||||
None,
|
||||
description="The maximum number of tokens to generate. If not set, the model will use its default value.",
|
||||
)
|
||||
enable_reasoner: bool = Field(
|
||||
|
||||
@@ -1212,7 +1212,7 @@ def test_preview_payload(client: LettaSDKClient):
|
||||
assert tool["function"]["strict"] is True
|
||||
|
||||
assert payload["frequency_penalty"] == 1.0
|
||||
assert payload["max_completion_tokens"] == 4096
|
||||
assert payload["max_completion_tokens"] is None
|
||||
assert payload["temperature"] == 0.7
|
||||
assert payload["parallel_tool_calls"] is False
|
||||
assert payload["tool_choice"] == "required"
|
||||
|
||||
Reference in New Issue
Block a user