feat: change default context window from 32000 to 128000 (#9673)

* feat: change default context window from 32000 to 128000

Update DEFAULT_CONTEXT_WINDOW and global_max_context_window_limit from
32000 to 128000. Also update all .af (agent files), cypress test
fixtures, and integration tests to use the new default.

Closes #9672

Co-authored-by: Sarah Wooders <sarahwooders@users.noreply.github.com>

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): update conversation manager tests for auto-created system message

create_conversation now auto-creates a system message at position 0
(from #9508), but the test assertions weren't updated. Adjust expected
message counts and ordering to account for the initial system message.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix(core): fix mock Anthropic models.list() to return async iterable, not coroutine

The real Anthropic SDK's models.list() returns an AsyncPage (with __aiter__)
directly, but the mock used `async def list()` which returns a coroutine.
The code does `async for model in client.models.list()` which needs an
async iterable, not a coroutine. Fix by making list() a regular method.

🐾 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

---------

Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com>
Co-authored-by: Letta <noreply@letta.com>
Co-authored-by: Sarah Wooders <sarahwooders@gmail.com>
This commit is contained in:
github-actions[bot]
2026-02-25 18:33:11 -08:00
committed by Caren Thomas
parent 357a3ad15b
commit bf80de214d
15 changed files with 50 additions and 34 deletions

View File

@@ -134,7 +134,7 @@ def _flatten_model_settings(d: dict, env_vars: dict[str, str]) -> None:
api_base: yyy -> OPENAI_API_BASE
anthropic:
api_key: zzz -> ANTHROPIC_API_KEY
global_max_context_window_limit: 32000 -> GLOBAL_MAX_CONTEXT_WINDOW_LIMIT
global_max_context_window_limit: 128000 -> GLOBAL_MAX_CONTEXT_WINDOW_LIMIT
"""
for key, value in d.items():
if isinstance(value, dict):

View File

@@ -74,7 +74,7 @@ DEFAULT_MAX_STEPS = 50
# context window size
MIN_CONTEXT_WINDOW = 4096
DEFAULT_CONTEXT_WINDOW = 32000
DEFAULT_CONTEXT_WINDOW = 128000
# Summarization trigger threshold (multiplier of context_window limit)
# Summarization triggers when step usage > context_window * SUMMARIZATION_TRIGGER_MULTIPLIER

View File

@@ -114,7 +114,7 @@ class SummarizerSettings(BaseSettings):
class ModelSettings(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", extra="ignore")
global_max_context_window_limit: int = 32000
global_max_context_window_limit: int = 128000
inner_thoughts_kwarg: str | None = Field(default=INNER_THOUGHTS_KWARG, description="Key used for passing in inner thoughts.")

View File

@@ -141,7 +141,7 @@ async def create_test_agent(name, actor, test_id: Optional[str] = None, model="a
model="claude-3-7-sonnet-latest",
model_endpoint_type="anthropic",
model_endpoint="https://api.anthropic.com/v1",
context_window=32000,
context_window=128000,
handle="anthropic/claude-3-7-sonnet-latest",
put_inner_thoughts_in_kwargs=True,
max_tokens=4096,
@@ -193,7 +193,7 @@ async def create_test_batch_item(server, batch_id, agent_id, default_user):
model="claude-3-7-sonnet-latest",
model_endpoint_type="anthropic",
model_endpoint="https://api.anthropic.com/v1",
context_window=32000,
context_window=128000,
handle="anthropic/claude-3-7-sonnet-latest",
put_inner_thoughts_in_kwargs=True,
max_tokens=4096,

View File

@@ -93,7 +93,7 @@ def agent_obj(client: Letta) -> AgentState:
tool_ids=[send_message_to_agent_tool.id],
model="openai/gpt-4o",
embedding="openai/text-embedding-3-small",
context_window_limit=32000,
context_window_limit=128000,
)
yield agent_state_instance
@@ -107,7 +107,7 @@ def other_agent_obj(client: Letta) -> AgentState:
include_multi_agent_tools=False,
model="openai/gpt-4o",
embedding="openai/text-embedding-3-small",
context_window_limit=32000,
context_window_limit=128000,
)
yield agent_state_instance

View File

@@ -355,8 +355,9 @@ async def test_add_messages_to_conversation(
actor=default_user,
)
assert len(message_ids) == 1
assert message_ids[0] == hello_world_message_fixture.id
# create_conversation auto-creates a system message at position 0
assert len(message_ids) == 2
assert hello_world_message_fixture.id in message_ids
@pytest.mark.asyncio
@@ -385,8 +386,9 @@ async def test_get_messages_for_conversation(
actor=default_user,
)
assert len(messages) == 1
assert messages[0].id == hello_world_message_fixture.id
# create_conversation auto-creates a system message at position 0
assert len(messages) == 2
assert any(m.id == hello_world_message_fixture.id for m in messages)
@pytest.mark.asyncio
@@ -430,7 +432,10 @@ async def test_message_ordering_in_conversation(conversation_manager, server: Sy
actor=default_user,
)
assert retrieved_ids == [m.id for m in messages]
# create_conversation auto-creates a system message at position 0,
# so the user messages start at index 1
assert len(retrieved_ids) == len(messages) + 1
assert retrieved_ids[1:] == [m.id for m in messages]
@pytest.mark.asyncio
@@ -489,7 +494,7 @@ async def test_update_in_context_messages(conversation_manager, server: SyncServ
@pytest.mark.asyncio
async def test_empty_conversation_message_ids(conversation_manager, server: SyncServer, sarah_agent, default_user):
"""Test getting message IDs from an empty conversation."""
"""Test getting message IDs from a newly created conversation (has auto-created system message)."""
# Create a conversation
conversation = await conversation_manager.create_conversation(
agent_id=sarah_agent.id,
@@ -497,13 +502,14 @@ async def test_empty_conversation_message_ids(conversation_manager, server: Sync
actor=default_user,
)
# Get message IDs (should be empty)
# create_conversation auto-creates a system message at position 0,
# so a newly created conversation has exactly one message
message_ids = await conversation_manager.get_message_ids_for_conversation(
conversation_id=conversation.id,
actor=default_user,
)
assert message_ids == []
assert len(message_ids) == 1
@pytest.mark.asyncio
@@ -551,9 +557,11 @@ async def test_list_conversation_messages(conversation_manager, server: SyncServ
actor=default_user,
)
assert len(letta_messages) == 2
# create_conversation auto-creates a system message, so we get 3 total
assert len(letta_messages) == 3
# Check message types
message_types = [m.message_type for m in letta_messages]
assert "system_message" in message_types
assert "user_message" in message_types
assert "assistant_message" in message_types
@@ -902,9 +910,12 @@ async def test_list_conversation_messages_ascending_order(conversation_manager,
reverse=False,
)
# First message should be "Message 0" (oldest)
assert len(letta_messages) == 3
assert "Message 0" in letta_messages[0].content
# create_conversation auto-creates a system message at position 0,
# so we get 4 messages total (system + 3 user messages)
assert len(letta_messages) == 4
# First message is the auto-created system message; "Message 0" is second
assert letta_messages[0].message_type == "system_message"
assert "Message 0" in letta_messages[1].content
@pytest.mark.asyncio
@@ -949,8 +960,9 @@ async def test_list_conversation_messages_descending_order(conversation_manager,
reverse=True,
)
# First message should be "Message 2" (newest)
assert len(letta_messages) == 3
# create_conversation auto-creates a system message, so 4 total
# First message should be "Message 2" (newest) in descending order
assert len(letta_messages) == 4
assert "Message 2" in letta_messages[0].content
@@ -1081,7 +1093,8 @@ async def test_list_conversation_messages_no_group_id_returns_all(conversation_m
actor=default_user,
)
assert len(all_messages) == 3
# create_conversation auto-creates a system message, so 4 total
assert len(all_messages) == 4
@pytest.mark.asyncio
@@ -1137,8 +1150,8 @@ async def test_list_conversation_messages_order_with_pagination(conversation_man
# The first messages should be different
assert page_asc[0].content != page_desc[0].content
# In ascending, first should be "Message 0"
assert "Message 0" in page_asc[0].content
# In ascending, first is the auto-created system message, second is "Message 0"
assert page_asc[0].message_type == "system_message"
# In descending, first should be "Message 4"
assert "Message 4" in page_desc[0].content

View File

@@ -579,8 +579,11 @@ async def test_server_startup_syncs_base_providers(default_user, default_organiz
yield item
# Mock the Anthropic AsyncAnthropic client
# NOTE: list() must be a regular (non-async) method that returns an async iterable,
# because the real Anthropic SDK's models.list() returns an AsyncPage (which has __aiter__)
# directly, and the code uses `async for model in client.models.list()`.
class MockAnthropicModels:
async def list(self):
def list(self):
return MockAnthropicAsyncPage(mock_anthropic_models["data"])
class MockAsyncAnthropic:
@@ -878,7 +881,7 @@ async def test_server_startup_handles_api_errors_gracefully(default_user, defaul
yield item
class MockAnthropicModels:
async def list(self):
def list(self):
return MockAnthropicAsyncPage(mock_anthropic_data)
class MockAsyncAnthropic:

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -44,7 +44,7 @@
"provider_name": null,
"provider_category": null,
"model_wrapper": null,
"context_window": 32000,
"context_window": 128000,
"put_inner_thoughts_in_kwargs": false,
"handle": "anthropic/claude-3.5-sonnet",
"temperature": 1.0,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -56,7 +56,7 @@
"provider_name": "openai",
"provider_category": "base",
"model_wrapper": null,
"context_window": 32000,
"context_window": 128000,
"put_inner_thoughts_in_kwargs": true,
"handle": "openai/gpt-4o-mini",
"temperature": 1.0,

View File

@@ -55,7 +55,7 @@
"provider_name": "openai",
"provider_category": "base",
"model_wrapper": null,
"context_window": 32000,
"context_window": 128000,
"put_inner_thoughts_in_kwargs": true,
"handle": "openai/gpt-4.1-mini",
"temperature": 1.0,

View File

@@ -16,7 +16,7 @@ def llm_config():
model="claude-3-7-sonnet-20250219",
model_endpoint_type="anthropic",
model_endpoint="https://api.anthropic.com/v1",
context_window=32000,
context_window=128000,
handle="anthropic/claude-sonnet-4-20250514",
put_inner_thoughts_in_kwargs=False,
max_tokens=4096,