From 5f9210b808420a835e466692b5a79c1e97930640 Mon Sep 17 00:00:00 2001
From: Charles Packer <packercharles@gmail.com>
Date: Fri, 29 Aug 2025 16:42:19 -0700
Subject: [PATCH] fix(core): update default value [LET-4117] (#4321)

* fix(core): update default value

* fix: just stage just publish

* fix(core): temporary hardcoding of the anthropic max tokens

* fix(core): patch the gemini
---
 letta/llm_api/anthropic_client.py     | 7 +++++--
 letta/llm_api/google_vertex_client.py | 4 +++-
 letta/schemas/llm_config.py           | 2 +-
 tests/test_sdk_client.py              | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index e8335495..97198cd7 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -176,11 +176,14 @@ class AnthropicClient(LLMClientBase):
             raise NotImplementedError("Only tool calling supported on Anthropic API requests")
 
         if not llm_config.max_tokens:
-            raise ValueError("Max  tokens must be set for anthropic")
+            # TODO strip this default once we add provider-specific defaults
+            max_output_tokens = 4096  # the minimum max tokens (for Haiku 3)
+        else:
+            max_output_tokens = llm_config.max_tokens
 
         data = {
             "model": llm_config.model,
-            "max_tokens": llm_config.max_tokens,
+            "max_tokens": max_output_tokens,
             "temperature": llm_config.temperature,
         }
 
diff --git a/letta/llm_api/google_vertex_client.py b/letta/llm_api/google_vertex_client.py
index 42cbf5de..773d9599 100644
--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -229,10 +229,12 @@ class GoogleVertexClient(LLMClientBase):
             "contents": contents,
             "config": {
                 "temperature": llm_config.temperature,
-                "max_output_tokens": llm_config.max_tokens,
                 "tools": formatted_tools,
             },
         }
+        # Make tokens is optional
+        if llm_config.max_tokens:
+            request_data["config"]["max_output_tokens"] = llm_config.max_tokens
 
         if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
             request_data["config"]["response_mime_type"] = "application/json"
diff --git a/letta/schemas/llm_config.py b/letta/schemas/llm_config.py
index 20cf676e..3b1c97b2 100644
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -51,7 +51,7 @@ class LLMConfig(BaseModel):
         description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
     )
     max_tokens: Optional[int] = Field(
-        4096,
+        None,
         description="The maximum number of tokens to generate. If not set, the model will use its default value.",
     )
     enable_reasoner: bool = Field(
diff --git a/tests/test_sdk_client.py b/tests/test_sdk_client.py
index b5bcd674..83522f6e 100644
--- a/tests/test_sdk_client.py
+++ b/tests/test_sdk_client.py
@@ -1212,7 +1212,7 @@ def test_preview_payload(client: LettaSDKClient):
             assert tool["function"]["strict"] is True
 
         assert payload["frequency_penalty"] == 1.0
-        assert payload["max_completion_tokens"] == 4096
+        assert payload["max_completion_tokens"] is None
         assert payload["temperature"] == 0.7
         assert payload["parallel_tool_calls"] is False
         assert payload["tool_choice"] == "required"