fix(core): update default value [LET-4117] (#4321)

* fix(core): update default value * fix: just stage just publish * fix(core): temporary hardcoding of the anthropic max tokens * fix(core): patch the gemini
2025-08-29 16:42:19 -07:00
parent b8c2f42d33
commit 5f9210b808
4 changed files with 10 additions and 5 deletions
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -176,11 +176,14 @@ class AnthropicClient(LLMClientBase):
            raise NotImplementedError("Only tool calling supported on Anthropic API requests")

        if not llm_config.max_tokens:
-            raise ValueError("Max  tokens must be set for anthropic")
+            # TODO strip this default once we add provider-specific defaults
+            max_output_tokens = 4096  # the minimum max tokens (for Haiku 3)
+        else:
+            max_output_tokens = llm_config.max_tokens

        data = {
            "model": llm_config.model,
-            "max_tokens": llm_config.max_tokens,
+            "max_tokens": max_output_tokens,
            "temperature": llm_config.temperature,
        }

--- a/letta/llm_api/google_vertex_client.py
+++ b/letta/llm_api/google_vertex_client.py
@@ -229,10 +229,12 @@ class GoogleVertexClient(LLMClientBase):
            "contents": contents,
            "config": {
                "temperature": llm_config.temperature,
-                "max_output_tokens": llm_config.max_tokens,
                "tools": formatted_tools,
            },
        }
+        # Make tokens is optional
+        if llm_config.max_tokens:
+            request_data["config"]["max_output_tokens"] = llm_config.max_tokens

        if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
            request_data["config"]["response_mime_type"] = "application/json"
--- a/letta/schemas/llm_config.py
+++ b/letta/schemas/llm_config.py
@@ -51,7 +51,7 @@ class LLMConfig(BaseModel):
        description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
    )
    max_tokens: Optional[int] = Field(
-        4096,
+        None,
        description="The maximum number of tokens to generate. If not set, the model will use its default value.",
    )
    enable_reasoner: bool = Field(
--- a/tests/test_sdk_client.py
+++ b/tests/test_sdk_client.py
@@ -1212,7 +1212,7 @@ def test_preview_payload(client: LettaSDKClient):
            assert tool["function"]["strict"] is True

        assert payload["frequency_penalty"] == 1.0
-        assert payload["max_completion_tokens"] == 4096
+        assert payload["max_completion_tokens"] is None
        assert payload["temperature"] == 0.7
        assert payload["parallel_tool_calls"] is False
        assert payload["tool_choice"] == "required"