From 194fa7d1c6178244823565d04ae21660a38e8371 Mon Sep 17 00:00:00 2001
From: Kian Jones <11655409+kianjones9@users.noreply.github.com>
Date: Wed, 21 Jan 2026 17:15:21 -0800
Subject: [PATCH] fix: anthropic message packing bugs (#9017)

* fix: anthroppic message packing bugs - traling whitespace and final assistant message missing thinking

* revert bug Caren will fix upstream?
---
 letta/llm_api/anthropic_client.py | 12 ++++++
 tests/test_llm_clients.py         | 65 +++++++++++++++++++++++++++++--
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py
index 8461167a..dc76a248 100644
--- a/letta/llm_api/anthropic_client.py
+++ b/letta/llm_api/anthropic_client.py
@@ -777,6 +777,18 @@ class AnthropicClient(LLMClientBase):
                                 if not block.get("text", "").strip():
                                     block["text"] = "."
 
+                # Strip trailing whitespace from final assistant message
+                # Anthropic API rejects messages where "final assistant content cannot end with trailing whitespace"
+                if is_final_assistant:
+                    if isinstance(content, str):
+                        msg["content"] = content.rstrip()
+                    elif isinstance(content, list) and len(content) > 0:
+                        # Find and strip trailing whitespace from the last text block
+                        for block in reversed(content):
+                            if isinstance(block, dict) and block.get("type") == "text":
+                                block["text"] = block.get("text", "").rstrip()
+                                break
+
         try:
             count_params = {
                 "model": model or "claude-3-7-sonnet-20250219",
diff --git a/tests/test_llm_clients.py b/tests/test_llm_clients.py
index 934b1a95..7ceb278c 100644
--- a/tests/test_llm_clients.py
+++ b/tests/test_llm_clients.py
@@ -113,13 +113,10 @@ async def test_count_tokens_with_empty_messages(anthropic_client, llm_config):
     Test that count_tokens properly handles empty messages by replacing them with placeholders,
     while preserving the exemption for the final assistant message.
     """
-    import anthropic
-
     with patch("anthropic.AsyncAnthropic") as mock_anthropic_class:
         mock_client = AsyncMock()
         mock_count_tokens = AsyncMock()
 
-        # Create a mock return value with input_tokens attribute
         mock_response = AsyncMock()
         mock_response.input_tokens = 100
         mock_count_tokens.return_value = mock_response
@@ -198,3 +195,65 @@ async def test_count_tokens_with_empty_messages(anthropic_client, llm_config):
         call_args = mock_count_tokens.call_args[1]
         assert call_args["messages"][0]["content"] == "."
         assert call_args["messages"][1]["content"] == "response"
+
+
+@pytest.mark.asyncio
+async def test_count_tokens_strips_trailing_whitespace_from_final_assistant(anthropic_client, llm_config):
+    """
+    Test that count_tokens strips trailing whitespace from the final assistant message.
+    Anthropic API rejects: "messages: final assistant content cannot end with trailing whitespace"
+    """
+    with patch("anthropic.AsyncAnthropic") as mock_anthropic_class:
+        mock_client = AsyncMock()
+        mock_count_tokens = AsyncMock()
+
+        mock_response = AsyncMock()
+        mock_response.input_tokens = 100
+        mock_count_tokens.return_value = mock_response
+
+        mock_client.beta.messages.count_tokens = mock_count_tokens
+        mock_anthropic_class.return_value = mock_client
+
+        # Test case 1: String content with trailing whitespace
+        messages_with_trailing_space = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "response "},
+        ]
+        await anthropic_client.count_tokens(messages=messages_with_trailing_space, model=llm_config.model)
+
+        call_args = mock_count_tokens.call_args[1]
+        assert call_args["messages"][1]["content"] == "response"  # trailing space stripped
+
+        # Test case 2: String content with trailing newline
+        mock_count_tokens.reset_mock()
+        messages_with_trailing_newline = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "response\n"},
+        ]
+        await anthropic_client.count_tokens(messages=messages_with_trailing_newline, model=llm_config.model)
+
+        call_args = mock_count_tokens.call_args[1]
+        assert call_args["messages"][1]["content"] == "response"  # trailing newline stripped
+
+        # Test case 3: List content with trailing whitespace in last text block
+        mock_count_tokens.reset_mock()
+        messages_with_trailing_space_in_block = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": [{"type": "text", "text": "response "}]},
+        ]
+        await anthropic_client.count_tokens(messages=messages_with_trailing_space_in_block, model=llm_config.model)
+
+        call_args = mock_count_tokens.call_args[1]
+        assert call_args["messages"][1]["content"][0]["text"] == "response"  # trailing space stripped
+
+        # Test case 4: Non-final assistant message should NOT have trailing whitespace stripped
+        mock_count_tokens.reset_mock()
+        messages_non_final = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "first response "},
+            {"role": "user", "content": "followup"},
+        ]
+        await anthropic_client.count_tokens(messages=messages_non_final, model=llm_config.model)
+
+        call_args = mock_count_tokens.call_args[1]
+        assert call_args["messages"][1]["content"] == "first response "  # preserved for non-final