feat: change default context window from 32000 to 128000 (#9673)

* feat: change default context window from 32000 to 128000 Update DEFAULT_CONTEXT_WINDOW and global_max_context_window_limit from 32000 to 128000. Also update all .af (agent files), cypress test fixtures, and integration tests to use the new default. Closes #9672 Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com> 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): update conversation manager tests for auto-created system message create_conversation now auto-creates a system message at position 0 (from #9508), but the test assertions weren't updated. Adjust expected message counts and ordering to account for the initial system message. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix(core): fix mock Anthropic models.list() to return async iterable, not coroutine The real Anthropic SDK's models.list() returns an AsyncPage (with __aiter__) directly, but the mock used `async def list()` which returns a coroutine. The code does `async for model in client.models.list()` which needs an async iterable, not a coroutine. Fix by making list() a regular method. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Letta <noreply@letta.com> Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
2026-02-25 18:33:11 -08:00
parent 357a3ad15b
commit bf80de214d
15 changed files with 50 additions and 34 deletions
--- a/letta/config_file.py
+++ b/letta/config_file.py
@@ -134,7 +134,7 @@ def _flatten_model_settings(d: dict, env_vars: dict[str, str]) -> None:
            api_base: yyy    -> OPENAI_API_BASE
        anthropic:
            api_key: zzz     -> ANTHROPIC_API_KEY
-        global_max_context_window_limit: 32000  -> GLOBAL_MAX_CONTEXT_WINDOW_LIMIT
+        global_max_context_window_limit: 128000  -> GLOBAL_MAX_CONTEXT_WINDOW_LIMIT
    """
    for key, value in d.items():
        if isinstance(value, dict):
--- a/letta/constants.py
+++ b/letta/constants.py
@@ -74,7 +74,7 @@ DEFAULT_MAX_STEPS = 50

 # context window size
 MIN_CONTEXT_WINDOW = 4096
-DEFAULT_CONTEXT_WINDOW = 32000
+DEFAULT_CONTEXT_WINDOW = 128000

 # Summarization trigger threshold (multiplier of context_window limit)
 # Summarization triggers when step usage > context_window * SUMMARIZATION_TRIGGER_MULTIPLIER
--- a/letta/settings.py
+++ b/letta/settings.py
@@ -114,7 +114,7 @@ class SummarizerSettings(BaseSettings):
 class ModelSettings(BaseSettings):
    model_config = SettingsConfigDict(env_file=".env", extra="ignore")

-    global_max_context_window_limit: int = 32000
+    global_max_context_window_limit: int = 128000

    inner_thoughts_kwarg: str | None = Field(default=INNER_THOUGHTS_KWARG, description="Key used for passing in inner thoughts.")

--- a/tests/integration_test_batch_api_cron_jobs.py
+++ b/tests/integration_test_batch_api_cron_jobs.py
@@ -141,7 +141,7 @@ async def create_test_agent(name, actor, test_id: Optional[str] = None, model="a
        model="claude-3-7-sonnet-latest",
        model_endpoint_type="anthropic",
        model_endpoint="https://api.anthropic.com/v1",
-        context_window=32000,
+        context_window=128000,
        handle="anthropic/claude-3-7-sonnet-latest",
        put_inner_thoughts_in_kwargs=True,
        max_tokens=4096,
@@ -193,7 +193,7 @@ async def create_test_batch_item(server, batch_id, agent_id, default_user):
        model="claude-3-7-sonnet-latest",
        model_endpoint_type="anthropic",
        model_endpoint="https://api.anthropic.com/v1",
-        context_window=32000,
+        context_window=128000,
        handle="anthropic/claude-3-7-sonnet-latest",
        put_inner_thoughts_in_kwargs=True,
        max_tokens=4096,
--- a/tests/integration_test_multi_agent.py
+++ b/tests/integration_test_multi_agent.py
@@ -93,7 +93,7 @@ def agent_obj(client: Letta) -> AgentState:
        tool_ids=[send_message_to_agent_tool.id],
        model="openai/gpt-4o",
        embedding="openai/text-embedding-3-small",
-        context_window_limit=32000,
+        context_window_limit=128000,
    )
    yield agent_state_instance

@@ -107,7 +107,7 @@ def other_agent_obj(client: Letta) -> AgentState:
        include_multi_agent_tools=False,
        model="openai/gpt-4o",
        embedding="openai/text-embedding-3-small",
-        context_window_limit=32000,
+        context_window_limit=128000,
    )

    yield agent_state_instance
--- a/tests/managers/test_conversation_manager.py
+++ b/tests/managers/test_conversation_manager.py
@@ -355,8 +355,9 @@ async def test_add_messages_to_conversation(
        actor=default_user,
    )

-    assert len(message_ids) == 1
-    assert message_ids[0] == hello_world_message_fixture.id
+    # create_conversation auto-creates a system message at position 0
+    assert len(message_ids) == 2
+    assert hello_world_message_fixture.id in message_ids


@pytest.mark.asyncio
@@ -385,8 +386,9 @@ async def test_get_messages_for_conversation(
        actor=default_user,
    )

-    assert len(messages) == 1
-    assert messages[0].id == hello_world_message_fixture.id
+    # create_conversation auto-creates a system message at position 0
+    assert len(messages) == 2
+    assert any(m.id == hello_world_message_fixture.id for m in messages)


@pytest.mark.asyncio
@@ -430,7 +432,10 @@ async def test_message_ordering_in_conversation(conversation_manager, server: Sy
        actor=default_user,
    )

-    assert retrieved_ids == [m.id for m in messages]
+    # create_conversation auto-creates a system message at position 0,
+    # so the user messages start at index 1
+    assert len(retrieved_ids) == len(messages) + 1
+    assert retrieved_ids[1:] == [m.id for m in messages]


@pytest.mark.asyncio
@@ -489,7 +494,7 @@ async def test_update_in_context_messages(conversation_manager, server: SyncServ

@pytest.mark.asyncio
 async def test_empty_conversation_message_ids(conversation_manager, server: SyncServer, sarah_agent, default_user):
-    """Test getting message IDs from an empty conversation."""
+    """Test getting message IDs from a newly created conversation (has auto-created system message)."""
    # Create a conversation
    conversation = await conversation_manager.create_conversation(
        agent_id=sarah_agent.id,
@@ -497,13 +502,14 @@ async def test_empty_conversation_message_ids(conversation_manager, server: Sync
        actor=default_user,
    )

-    # Get message IDs (should be empty)
+    # create_conversation auto-creates a system message at position 0,
+    # so a newly created conversation has exactly one message
    message_ids = await conversation_manager.get_message_ids_for_conversation(
        conversation_id=conversation.id,
        actor=default_user,
    )

-    assert message_ids == []
+    assert len(message_ids) == 1


@pytest.mark.asyncio
@@ -551,9 +557,11 @@ async def test_list_conversation_messages(conversation_manager, server: SyncServ
        actor=default_user,
    )

-    assert len(letta_messages) == 2
+    # create_conversation auto-creates a system message, so we get 3 total
+    assert len(letta_messages) == 3
    # Check message types
    message_types = [m.message_type for m in letta_messages]
+    assert "system_message" in message_types
    assert "user_message" in message_types
    assert "assistant_message" in message_types

@@ -902,9 +910,12 @@ async def test_list_conversation_messages_ascending_order(conversation_manager,
        reverse=False,
    )

-    # First message should be "Message 0" (oldest)
-    assert len(letta_messages) == 3
-    assert "Message 0" in letta_messages[0].content
+    # create_conversation auto-creates a system message at position 0,
+    # so we get 4 messages total (system + 3 user messages)
+    assert len(letta_messages) == 4
+    # First message is the auto-created system message; "Message 0" is second
+    assert letta_messages[0].message_type == "system_message"
+    assert "Message 0" in letta_messages[1].content


@pytest.mark.asyncio
@@ -949,8 +960,9 @@ async def test_list_conversation_messages_descending_order(conversation_manager,
        reverse=True,
    )

-    # First message should be "Message 2" (newest)
-    assert len(letta_messages) == 3
+    # create_conversation auto-creates a system message, so 4 total
+    # First message should be "Message 2" (newest) in descending order
+    assert len(letta_messages) == 4
    assert "Message 2" in letta_messages[0].content


@@ -1081,7 +1093,8 @@ async def test_list_conversation_messages_no_group_id_returns_all(conversation_m
        actor=default_user,
    )

-    assert len(all_messages) == 3
+    # create_conversation auto-creates a system message, so 4 total
+    assert len(all_messages) == 4


@pytest.mark.asyncio
@@ -1137,8 +1150,8 @@ async def test_list_conversation_messages_order_with_pagination(conversation_man

    # The first messages should be different
    assert page_asc[0].content != page_desc[0].content
-    # In ascending, first should be "Message 0"
-    assert "Message 0" in page_asc[0].content
+    # In ascending, first is the auto-created system message, second is "Message 0"
+    assert page_asc[0].message_type == "system_message"
    # In descending, first should be "Message 4"
    assert "Message 4" in page_desc[0].content

--- a/tests/managers/test_provider_manager.py
+++ b/tests/managers/test_provider_manager.py
@@ -579,8 +579,11 @@ async def test_server_startup_syncs_base_providers(default_user, default_organiz
                yield item

    # Mock the Anthropic AsyncAnthropic client
+    # NOTE: list() must be a regular (non-async) method that returns an async iterable,
+    # because the real Anthropic SDK's models.list() returns an AsyncPage (which has __aiter__)
+    # directly, and the code uses `async for model in client.models.list()`.
    class MockAnthropicModels:
-        async def list(self):
+        def list(self):
            return MockAnthropicAsyncPage(mock_anthropic_models["data"])

    class MockAsyncAnthropic:
@@ -878,7 +881,7 @@ async def test_server_startup_handles_api_errors_gracefully(default_user, defaul
                yield item

    class MockAnthropicModels:
-        async def list(self):
+        def list(self):
            return MockAnthropicAsyncPage(mock_anthropic_data)

    class MockAsyncAnthropic:
--- a/tests/test_agent_files/customer_service.af
+++ b/tests/test_agent_files/customer_service.af
--- a/tests/test_agent_files/deep_research_agent.af
+++ b/tests/test_agent_files/deep_research_agent.af
--- a/tests/test_agent_files/knowledge-base.af
+++ b/tests/test_agent_files/knowledge-base.af
@@ -44,7 +44,7 @@
    "provider_name": null,
    "provider_category": null,
    "model_wrapper": null,
-    "context_window": 32000,
+    "context_window": 128000,
    "put_inner_thoughts_in_kwargs": false,
    "handle": "anthropic/claude-3.5-sonnet",
    "temperature": 1.0,
--- a/tests/test_agent_files/memgpt_agent_with_convo.af
+++ b/tests/test_agent_files/memgpt_agent_with_convo.af
--- a/tests/test_agent_files/outreach_workflow_agent.af
+++ b/tests/test_agent_files/outreach_workflow_agent.af
--- a/tests/test_agent_files/test_agent_with_files_and_sources.af
+++ b/tests/test_agent_files/test_agent_with_files_and_sources.af
@@ -56,7 +56,7 @@
        "provider_name": "openai",
        "provider_category": "base",
        "model_wrapper": null,
-        "context_window": 32000,
+        "context_window": 128000,
        "put_inner_thoughts_in_kwargs": true,
        "handle": "openai/gpt-4o-mini",
        "temperature": 1.0,
--- a/tests/test_agent_files/test_basic_agent_with_blocks_tools_messages_v2.af
+++ b/tests/test_agent_files/test_basic_agent_with_blocks_tools_messages_v2.af
@@ -55,7 +55,7 @@
        "provider_name": "openai",
        "provider_category": "base",
        "model_wrapper": null,
-        "context_window": 32000,
+        "context_window": 128000,
        "put_inner_thoughts_in_kwargs": true,
        "handle": "openai/gpt-4.1-mini",
        "temperature": 1.0,
--- a/tests/test_llm_clients.py
+++ b/tests/test_llm_clients.py
@@ -16,7 +16,7 @@ def llm_config():
        model="claude-3-7-sonnet-20250219",
        model_endpoint_type="anthropic",
        model_endpoint="https://api.anthropic.com/v1",
-        context_window=32000,
+        context_window=128000,
        handle="anthropic/claude-sonnet-4-20250514",
        put_inner_thoughts_in_kwargs=False,
        max_tokens=4096,