From 194fa7d1c6178244823565d04ae21660a38e8371 Mon Sep 17 00:00:00 2001 From: Kian Jones <11655409+kianjones9@users.noreply.github.com> Date: Wed, 21 Jan 2026 17:15:21 -0800 Subject: [PATCH] fix: anthropic message packing bugs (#9017) * fix: anthroppic message packing bugs - traling whitespace and final assistant message missing thinking * revert bug Caren will fix upstream? --- letta/llm_api/anthropic_client.py | 12 ++++++ tests/test_llm_clients.py | 65 +++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/letta/llm_api/anthropic_client.py b/letta/llm_api/anthropic_client.py index 8461167a..dc76a248 100644 --- a/letta/llm_api/anthropic_client.py +++ b/letta/llm_api/anthropic_client.py @@ -777,6 +777,18 @@ class AnthropicClient(LLMClientBase): if not block.get("text", "").strip(): block["text"] = "." + # Strip trailing whitespace from final assistant message + # Anthropic API rejects messages where "final assistant content cannot end with trailing whitespace" + if is_final_assistant: + if isinstance(content, str): + msg["content"] = content.rstrip() + elif isinstance(content, list) and len(content) > 0: + # Find and strip trailing whitespace from the last text block + for block in reversed(content): + if isinstance(block, dict) and block.get("type") == "text": + block["text"] = block.get("text", "").rstrip() + break + try: count_params = { "model": model or "claude-3-7-sonnet-20250219", diff --git a/tests/test_llm_clients.py b/tests/test_llm_clients.py index 934b1a95..7ceb278c 100644 --- a/tests/test_llm_clients.py +++ b/tests/test_llm_clients.py @@ -113,13 +113,10 @@ async def test_count_tokens_with_empty_messages(anthropic_client, llm_config): Test that count_tokens properly handles empty messages by replacing them with placeholders, while preserving the exemption for the final assistant message. """ - import anthropic - with patch("anthropic.AsyncAnthropic") as mock_anthropic_class: mock_client = AsyncMock() mock_count_tokens = AsyncMock() - # Create a mock return value with input_tokens attribute mock_response = AsyncMock() mock_response.input_tokens = 100 mock_count_tokens.return_value = mock_response @@ -198,3 +195,65 @@ async def test_count_tokens_with_empty_messages(anthropic_client, llm_config): call_args = mock_count_tokens.call_args[1] assert call_args["messages"][0]["content"] == "." assert call_args["messages"][1]["content"] == "response" + + +@pytest.mark.asyncio +async def test_count_tokens_strips_trailing_whitespace_from_final_assistant(anthropic_client, llm_config): + """ + Test that count_tokens strips trailing whitespace from the final assistant message. + Anthropic API rejects: "messages: final assistant content cannot end with trailing whitespace" + """ + with patch("anthropic.AsyncAnthropic") as mock_anthropic_class: + mock_client = AsyncMock() + mock_count_tokens = AsyncMock() + + mock_response = AsyncMock() + mock_response.input_tokens = 100 + mock_count_tokens.return_value = mock_response + + mock_client.beta.messages.count_tokens = mock_count_tokens + mock_anthropic_class.return_value = mock_client + + # Test case 1: String content with trailing whitespace + messages_with_trailing_space = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "response "}, + ] + await anthropic_client.count_tokens(messages=messages_with_trailing_space, model=llm_config.model) + + call_args = mock_count_tokens.call_args[1] + assert call_args["messages"][1]["content"] == "response" # trailing space stripped + + # Test case 2: String content with trailing newline + mock_count_tokens.reset_mock() + messages_with_trailing_newline = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "response\n"}, + ] + await anthropic_client.count_tokens(messages=messages_with_trailing_newline, model=llm_config.model) + + call_args = mock_count_tokens.call_args[1] + assert call_args["messages"][1]["content"] == "response" # trailing newline stripped + + # Test case 3: List content with trailing whitespace in last text block + mock_count_tokens.reset_mock() + messages_with_trailing_space_in_block = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": [{"type": "text", "text": "response "}]}, + ] + await anthropic_client.count_tokens(messages=messages_with_trailing_space_in_block, model=llm_config.model) + + call_args = mock_count_tokens.call_args[1] + assert call_args["messages"][1]["content"][0]["text"] == "response" # trailing space stripped + + # Test case 4: Non-final assistant message should NOT have trailing whitespace stripped + mock_count_tokens.reset_mock() + messages_non_final = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "first response "}, + {"role": "user", "content": "followup"}, + ] + await anthropic_client.count_tokens(messages=messages_non_final, model=llm_config.model) + + call_args = mock_count_tokens.call_args[1] + assert call_args["messages"][1]["content"] == "first response " # preserved for non-final