From 5f9210b808420a835e466692b5a79c1e97930640 Mon Sep 17 00:00:00 2001 From: Charles Packer Date: Fri, 29 Aug 2025 16:42:19 -0700 Subject: [PATCH] fix(core): update default value [LET-4117] (#4321) * fix(core): update default value * fix: just stage just publish * fix(core): temporary hardcoding of the anthropic max tokens * fix(core): patch the gemini --- letta/llm_api/anthropic_client.py | 7 +++++-- letta/llm_api/google_vertex_client.py | 4 +++- letta/schemas/llm_config.py | 2 +- tests/test_sdk_client.py | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index e8335495..97198cd7 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -176,11 +176,14 @@ class AnthropicClient(LLMClientBase): raise NotImplementedError("Only tool calling supported on Anthropic API requests") if not llm_config.max_tokens: - raise ValueError("Max tokens must be set for anthropic") + # TODO strip this default once we add provider-specific defaults + max_output_tokens = 4096 # the minimum max tokens (for Haiku 3) + else: + max_output_tokens = llm_config.max_tokens data = { "model": llm_config.model, - "max_tokens": llm_config.max_tokens, + "max_tokens": max_output_tokens, "temperature": llm_config.temperature, } diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py index 42cbf5de..773d9599 100644 --- a/letta/llm_api/google_vertex_client.py +++ b/letta/llm_api/google_vertex_client.py @@ -229,10 +229,12 @@ class GoogleVertexClient(LLMClientBase): "contents": contents, "config": { "temperature": llm_config.temperature, - "max_output_tokens": llm_config.max_tokens, "tools": formatted_tools, }, } + # Make tokens is optional + if llm_config.max_tokens: + request_data["config"]["max_output_tokens"] = llm_config.max_tokens if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental: request_data["config"]["response_mime_type"] = "application/json" diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py index 20cf676e..3b1c97b2 100644 --- a/letta/schemas/llm_config.py +++ b/letta/schemas/llm_config.py @@ -51,7 +51,7 @@ class LLMConfig(BaseModel): description="The temperature to use when generating text with the model. A higher temperature will result in more random text.", ) max_tokens: Optional[int] = Field( - 4096, + None, description="The maximum number of tokens to generate. If not set, the model will use its default value.", ) enable_reasoner: bool = Field( diff --git a/tests/test_sdk_client.py b/tests/test_sdk_client.py index b5bcd674..83522f6e 100644 --- a/tests/test_sdk_client.py +++ b/tests/test_sdk_client.py @@ -1212,7 +1212,7 @@ def test_preview_payload(client: LettaSDKClient): assert tool["function"]["strict"] is True assert payload["frequency_penalty"] == 1.0 - assert payload["max_completion_tokens"] == 4096 + assert payload["max_completion_tokens"] is None assert payload["temperature"] == 0.7 assert payload["parallel_tool_calls"] is False assert payload["tool_choice"] == "required"