diff --git a/fern/openapi.json b/fern/openapi.json index 6c15613b..335c832d 100644 --- a/fern/openapi.json +++ b/fern/openapi.json @@ -20645,7 +20645,7 @@ "compaction_settings": { "anyOf": [ { - "$ref": "#/components/schemas/CompactionSettings" + "$ref": "#/components/schemas/CompactionSettings-Output" }, { "type": "null" @@ -24304,11 +24304,70 @@ "required": ["code"], "title": "CodeInput" }, - "CompactionSettings": { + "CompactionSettings-Input": { "properties": { + "model": { + "type": "string", + "title": "Model", + "description": "Model handle to use for summarization (format: provider/model-name)." + }, "model_settings": { - "$ref": "#/components/schemas/ModelSettings", - "description": "The model settings to use for summarization." + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "Optional model settings used to override defaults for the summarizer model." }, "prompt": { "type": "string", @@ -24348,8 +24407,116 @@ } }, "type": "object", - "required": ["model_settings", "prompt", "prompt_acknowledgement"], - "title": "CompactionSettings" + "required": ["model", "prompt", "prompt_acknowledgement"], + "title": "CompactionSettings", + "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." + }, + "CompactionSettings-Output": { + "properties": { + "model": { + "type": "string", + "title": "Model", + "description": "Model handle to use for summarization (format: provider/model-name)." + }, + "model_settings": { + "anyOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/OpenAIModelSettings" + }, + { + "$ref": "#/components/schemas/AnthropicModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleAIModelSettings" + }, + { + "$ref": "#/components/schemas/GoogleVertexModelSettings" + }, + { + "$ref": "#/components/schemas/AzureModelSettings" + }, + { + "$ref": "#/components/schemas/XAIModelSettings" + }, + { + "$ref": "#/components/schemas/GroqModelSettings" + }, + { + "$ref": "#/components/schemas/DeepseekModelSettings" + }, + { + "$ref": "#/components/schemas/TogetherModelSettings" + }, + { + "$ref": "#/components/schemas/BedrockModelSettings" + } + ], + "discriminator": { + "propertyName": "provider_type", + "mapping": { + "anthropic": "#/components/schemas/AnthropicModelSettings", + "azure": "#/components/schemas/AzureModelSettings", + "bedrock": "#/components/schemas/BedrockModelSettings", + "deepseek": "#/components/schemas/DeepseekModelSettings", + "google_ai": "#/components/schemas/GoogleAIModelSettings", + "google_vertex": "#/components/schemas/GoogleVertexModelSettings", + "groq": "#/components/schemas/GroqModelSettings", + "openai": "#/components/schemas/OpenAIModelSettings", + "together": "#/components/schemas/TogetherModelSettings", + "xai": "#/components/schemas/XAIModelSettings" + } + } + }, + { + "type": "null" + } + ], + "title": "Model Settings", + "description": "Optional model settings used to override defaults for the summarizer model." + }, + "prompt": { + "type": "string", + "title": "Prompt", + "description": "The prompt to use for summarization." + }, + "prompt_acknowledgement": { + "type": "string", + "title": "Prompt Acknowledgement", + "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)." + }, + "clip_chars": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "title": "Clip Chars", + "description": "The maximum length of the summary in characters. If none, no clipping is performed.", + "default": 2000 + }, + "mode": { + "type": "string", + "enum": ["all", "sliding_window"], + "title": "Mode", + "description": "The type of summarization technique use.", + "default": "sliding_window" + }, + "sliding_window_percentage": { + "type": "number", + "title": "Sliding Window Percentage", + "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).", + "default": 0.3 + } + }, + "type": "object", + "required": ["model", "prompt", "prompt_acknowledgement"], + "title": "CompactionSettings", + "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle." }, "ComparisonOperator": { "type": "string", @@ -25153,7 +25320,7 @@ "compaction_settings": { "anyOf": [ { - "$ref": "#/components/schemas/CompactionSettings" + "$ref": "#/components/schemas/CompactionSettings-Input" }, { "type": "null" @@ -29386,7 +29553,7 @@ "compaction_settings": { "anyOf": [ { - "$ref": "#/components/schemas/CompactionSettings" + "$ref": "#/components/schemas/CompactionSettings-Input" }, { "type": "null" @@ -32764,25 +32931,6 @@ ], "title": "Model" }, - "ModelSettings": { - "properties": { - "max_output_tokens": { - "type": "integer", - "title": "Max Output Tokens", - "description": "The maximum number of tokens the model can generate.", - "default": 4096 - }, - "parallel_tool_calls": { - "type": "boolean", - "title": "Parallel Tool Calls", - "description": "Whether to enable parallel tool calling.", - "default": false - } - }, - "type": "object", - "title": "ModelSettings", - "description": "Schema for defining settings for a model" - }, "ModifyApprovalRequest": { "properties": { "requires_approval": { @@ -38722,7 +38870,7 @@ "compaction_settings": { "anyOf": [ { - "$ref": "#/components/schemas/CompactionSettings" + "$ref": "#/components/schemas/CompactionSettings-Input" }, { "type": "null" @@ -39995,7 +40143,7 @@ "compaction_settings": { "anyOf": [ { - "$ref": "#/components/schemas/CompactionSettings" + "$ref": "#/components/schemas/CompactionSettings-Input" }, { "type": "null" diff --git a/letta/agents/letta_agent_v3.py b/letta/agents/letta_agent_v3.py index 176717b1..2c37c208 100644 --- a/letta/agents/letta_agent_v3.py +++ b/letta/agents/letta_agent_v3.py @@ -1331,19 +1331,39 @@ class LettaAgentV3(LettaAgentV2): @trace_method async def compact(self, messages, trigger_threshold: Optional[int] = None) -> Message: + """Compact the current in-context messages for this agent. + + Compaction uses a summarizer LLM configuration derived from + ``compaction_settings.model`` when provided. This mirrors how agent + creation derives defaults from provider-specific ModelSettings, but is + localized to summarization. """ - Simplified compaction method. Does NOT do any persistence (handled in the loop) - """ - # compact the current in-context messages (self.in_context_messages) - # Use agent's compaction_settings if set, otherwise fall back to defaults - summarizer_config = self.agent_state.compaction_settings or get_default_compaction_settings( - self.agent_state.llm_config._to_model_settings() + + # Use agent's compaction_settings if set, otherwise fall back to + # global defaults based on the agent's model handle. + if self.agent_state.compaction_settings is not None: + summarizer_config = self.agent_state.compaction_settings + else: + # Prefer the new handle field if set, otherwise derive from llm_config + if self.agent_state.model is not None: + handle = self.agent_state.model + else: + llm_cfg = self.agent_state.llm_config + handle = llm_cfg.handle or f"{llm_cfg.model_endpoint_type}/{llm_cfg.model}" + + summarizer_config = get_default_compaction_settings(handle) + + # Build the LLMConfig used for summarization + summarizer_llm_config = self._build_summarizer_llm_config( + agent_llm_config=self.agent_state.llm_config, + summarizer_config=summarizer_config, ) + summarization_mode_used = summarizer_config.mode if summarizer_config.mode == "all": summary, compacted_messages = await summarize_all( actor=self.actor, - llm_config=self.agent_state.llm_config, + llm_config=summarizer_llm_config, summarizer_config=summarizer_config, in_context_messages=messages, ) @@ -1351,7 +1371,7 @@ class LettaAgentV3(LettaAgentV2): try: summary, compacted_messages = await summarize_via_sliding_window( actor=self.actor, - llm_config=self.agent_state.llm_config, + llm_config=summarizer_llm_config, summarizer_config=summarizer_config, in_context_messages=messages, ) @@ -1359,7 +1379,7 @@ class LettaAgentV3(LettaAgentV2): self.logger.error(f"Sliding window summarization failed with exception: {str(e)}. Falling back to all mode.") summary, compacted_messages = await summarize_all( actor=self.actor, - llm_config=self.agent_state.llm_config, + llm_config=summarizer_llm_config, summarizer_config=summarizer_config, in_context_messages=messages, ) @@ -1445,3 +1465,46 @@ class LettaAgentV3(LettaAgentV2): final_messages += compacted_messages[1:] return summary_message_obj, final_messages + + @staticmethod + def _build_summarizer_llm_config( + agent_llm_config: LLMConfig, + summarizer_config: CompactionSettings, + ) -> LLMConfig: + """Derive an LLMConfig for summarization from a model handle. + + This mirrors the agent-creation path: start from the agent's LLMConfig, + override provider/model/handle from ``compaction_settings.model``, and + then apply any explicit ``compaction_settings.model_settings`` via + ``_to_legacy_config_params``. + """ + + # If no summarizer model handle is provided, fall back to the agent's config + if not summarizer_config.model: + return agent_llm_config + + try: + # Parse provider/model from the handle, falling back to the agent's + # provider type when only a model name is given. + if "/" in summarizer_config.model: + provider, model_name = summarizer_config.model.split("/", 1) + else: + provider = agent_llm_config.model_endpoint_type + model_name = summarizer_config.model + + # Start from the agent's config and override model + provider + handle + base = agent_llm_config.model_copy() + base.model_endpoint_type = provider + base.model = model_name + base.handle = summarizer_config.model + + # If explicit model_settings are provided for the summarizer, apply + # them just like server.create_agent_async does for agents. + if summarizer_config.model_settings is not None: + update_params = summarizer_config.model_settings._to_legacy_config_params() + return base.model_copy(update=update_params) + + return base + except Exception: + # On any error, do not break the agent – just fall back + return agent_llm_config diff --git a/letta/services/summarizer/summarizer_config.py b/letta/services/summarizer/summarizer_config.py index 00635902..cbc40885 100644 --- a/letta/services/summarizer/summarizer_config.py +++ b/letta/services/summarizer/summarizer_config.py @@ -2,13 +2,30 @@ from typing import Literal from pydantic import BaseModel, Field -from letta.schemas.llm_config import LLMConfig -from letta.schemas.model import ModelSettings +from letta.schemas.model import ModelSettingsUnion class CompactionSettings(BaseModel): - # summarizer_model: LLMConfig = Field(default=..., description="The model to use for summarization.") - model_settings: ModelSettings = Field(default=..., description="The model settings to use for summarization.") + """Configuration for conversation compaction / summarization. + + ``model`` is the only required user-facing field – it specifies the summarizer + model handle (e.g. ``"openai/gpt-4o-mini"``). Per-model settings (temperature, + max tokens, etc.) are derived from the default configuration for that handle. + """ + + # Summarizer model handle (provider/model-name). + # This is required whenever compaction_settings is provided. + model: str = Field( + ..., + description="Model handle to use for summarization (format: provider/model-name).", + ) + + # Optional provider-specific model settings for the summarizer model + model_settings: ModelSettingsUnion | None = Field( + default=None, + description="Optional model settings used to override defaults for the summarizer model.", + ) + prompt: str = Field(default=..., description="The prompt to use for summarization.") prompt_acknowledgement: str = Field( default=..., description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)." @@ -23,22 +40,25 @@ class CompactionSettings(BaseModel): ) -def get_default_compaction_settings(model_settings: ModelSettings) -> CompactionSettings: - """Build a default CompactionSettings from global settings for backward compatibility. +def get_default_compaction_settings(model_handle: str) -> CompactionSettings: + """Build a default :class:`CompactionSettings` from a model handle. Args: - llm_config: The LLMConfig to use for the summarizer model (typically the agent's llm_config). + model_handle: The model handle to use for summarization + (format: provider/model-name). Returns: - A CompactionSettings with default values from global settings. + A :class:`CompactionSettings` populated with sane defaults. """ + from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK from letta.prompts import gpt_summarize from letta.settings import summarizer_settings return CompactionSettings( mode="sliding_window", - model_settings=model_settings, + model=model_handle, + model_settings=None, prompt=gpt_summarize.SYSTEM, prompt_acknowledgement=MESSAGE_SUMMARY_REQUEST_ACK, clip_chars=2000, diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py index 65af7c1b..2fcaecb7 100644 --- a/tests/integration_test_summarizer.py +++ b/tests/integration_test_summarizer.py @@ -675,11 +675,11 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon new_letta_messages = await server.message_manager.create_many_messages_async(new_letta_messages, actor=actor) # Create a custom CompactionSettings with the desired mode - def mock_get_default_compaction_settings(model_settings): - config = get_default_compaction_settings(model_settings) + def mock_get_default_compaction_settings(llm_config_inner): + config = get_default_compaction_settings(llm_config_inner) # Override the mode return CompactionSettings( - model_settings=config.model_settings, + model=config.model, prompt=config.prompt, prompt_acknowledgement=config.prompt_acknowledgement, clip_chars=config.clip_chars, @@ -724,6 +724,86 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon assert result[1].role == MessageRole.user +@pytest.mark.asyncio +async def test_v3_compact_uses_compaction_settings_model_and_model_settings(server: SyncServer, actor): + """Integration test: LettaAgentV3.compact uses the LLMConfig implied by CompactionSettings. + + We set a different summarizer model handle + model_settings and verify that + the LLMConfig passed into simple_summary reflects both the handle and + the model_settings overrides. + """ + + from letta.agents.letta_agent_v3 import LettaAgentV3 + from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning + from letta.services.summarizer import summarizer_all + + base_llm_config = LLMConfig.default_config("gpt-4o-mini") + + messages = [ + PydanticMessage( + role=MessageRole.system, + content=[TextContent(type="text", text="You are a helpful assistant.")], + ), + PydanticMessage( + role=MessageRole.user, + content=[TextContent(type="text", text="Hello")], + ), + PydanticMessage( + role=MessageRole.assistant, + content=[TextContent(type="text", text="Hi there")], + ), + ] + + # Create agent + messages via helper to get a real AgentState + agent_state, in_context_messages = await create_agent_with_messages( + server=server, + actor=actor, + llm_config=base_llm_config, + messages=messages, + ) + + summarizer_handle = "openai/gpt-5-mini" + summarizer_model_settings = OpenAIModelSettings( + max_output_tokens=4321, + temperature=0.05, + reasoning=OpenAIReasoning(reasoning_effort="high"), + response_format=None, + ) + agent_state.compaction_settings = CompactionSettings( + model=summarizer_handle, + model_settings=summarizer_model_settings, + prompt="You are a summarizer.", + prompt_acknowledgement="ack", + clip_chars=2000, + mode="all", + sliding_window_percentage=0.3, + ) + + captured_llm_config: dict = {} + + async def fake_simple_summary(messages, llm_config, actor, include_ack=True, prompt=None): # type: ignore[override] + captured_llm_config["value"] = llm_config + return "summary text" + + # Patch simple_summary so we don't hit the real LLM and can inspect llm_config + with patch.object(summarizer_all, "simple_summary", new=fake_simple_summary): + agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor) + summary_msg, compacted = await agent_loop.compact(messages=in_context_messages) + + assert summary_msg is not None + assert "value" in captured_llm_config + summarizer_llm_config = captured_llm_config["value"] + + # Agent's llm_config remains the base config + assert agent_state.llm_config.model == "gpt-4o-mini" + + # Summarizer llm_config should reflect compaction_settings.model and model_settings + assert summarizer_llm_config.handle == summarizer_handle + assert summarizer_llm_config.model == "gpt-5-mini" + assert summarizer_llm_config.max_tokens == 4321 + assert summarizer_llm_config.temperature == 0.05 + + @pytest.mark.asyncio @pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS]) async def test_v3_summarize_hard_eviction_when_still_over_threshold( @@ -847,14 +927,13 @@ async def test_sliding_window_cutoff_index_does_not_exceed_message_count(server: This test uses the real token counter (via create_token_counter) to verify the sliding window logic works with actual token counting. """ - from letta.schemas.model import ModelSettings from letta.services.summarizer.summarizer_config import get_default_compaction_settings from letta.services.summarizer.summarizer_sliding_window import summarize_via_sliding_window # Create a real summarizer config using the default factory # Override sliding_window_percentage to 0.3 for this test - model_settings = ModelSettings() # Use defaults - summarizer_config = get_default_compaction_settings(model_settings) + handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}" + summarizer_config = get_default_compaction_settings(handle) summarizer_config.sliding_window_percentage = 0.3 # Create 65 messages (similar to the failing case in the bug report) @@ -1399,13 +1478,12 @@ async def test_summarize_all(server: SyncServer, actor, llm_config: LLMConfig): This test verifies that the 'all' summarization mode works correctly, summarizing the entire conversation into a single summary string. """ - from letta.schemas.model import ModelSettings from letta.services.summarizer.summarizer_all import summarize_all from letta.services.summarizer.summarizer_config import get_default_compaction_settings # Create a summarizer config with "all" mode - model_settings = ModelSettings() - summarizer_config = get_default_compaction_settings(model_settings) + handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}" + summarizer_config = get_default_compaction_settings(handle) summarizer_config.mode = "all" # Create test messages - a simple conversation diff --git a/tests/managers/test_agent_manager.py b/tests/managers/test_agent_manager.py index 0804bafe..0b567510 100644 --- a/tests/managers/test_agent_manager.py +++ b/tests/managers/test_agent_manager.py @@ -253,6 +253,177 @@ async def test_create_agent_base_tool_rules_non_excluded_providers(server: SyncS assert len(created_agent.tool_rules) > 0 +@pytest.mark.asyncio +async def test_create_agent_with_model_handle_uses_correct_llm_config(server: SyncServer, default_user): + """When CreateAgent.model is provided, ensure the correct handle is used to resolve llm_config. + + This verifies that the model handle passed by the client is forwarded into + SyncServer.get_cached_llm_config_async and that the resulting AgentState + carries an llm_config with the same handle. + """ + + # Track the arguments used to resolve the LLM config + captured_kwargs: dict = {} + + async def fake_get_cached_llm_config_async(self, actor, **kwargs): # type: ignore[override] + from letta.schemas.llm_config import LLMConfig as PydanticLLMConfig + + captured_kwargs.update(kwargs) + handle = kwargs["handle"] + + # Return a minimal but valid LLMConfig with the requested handle + return PydanticLLMConfig( + model="test-model-name", + model_endpoint_type="openai", + model_endpoint="https://api.openai.com/v1", + context_window=8192, + handle=handle, + ) + + model_handle = "openai/gpt-4o-mini" + + # Patch SyncServer.get_cached_llm_config_async so we don't depend on provider DB state + with patch.object(SyncServer, "get_cached_llm_config_async", new=fake_get_cached_llm_config_async): + created_agent = await server.create_agent_async( + request=CreateAgent( + name="agent_with_model_handle", + agent_type="memgpt_v2_agent", + # Use new model handle field instead of llm_config + model=model_handle, + embedding_config=EmbeddingConfig.default_config(provider="openai"), + memory_blocks=[], + include_base_tools=False, + ), + actor=default_user, + ) + + # Ensure we resolved the config using the provided handle + assert captured_kwargs["handle"] == model_handle + + # And that the resulting agent's llm_config reflects the same handle + assert created_agent.llm_config is not None + assert created_agent.llm_config.handle == model_handle + + +@pytest.mark.asyncio +async def test_compaction_settings_model_uses_separate_llm_config_for_summarization(default_user): + """When compaction_settings.model differs from the agent model, use a separate llm_config. + + This test exercises the summarization helpers directly to avoid external + provider dependencies. It verifies that CompactionSettings.model controls + the LLMConfig used for the summarizer request. + """ + + from letta.agents.letta_agent_v3 import LettaAgentV3 + from letta.schemas.agent import AgentState as PydanticAgentState + from letta.schemas.enums import AgentType, MessageRole + from letta.schemas.memory import Memory + from letta.schemas.message import Message as PydanticMessage + from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning + + # Base agent LLM config + base_llm_config = LLMConfig.default_config("gpt-4o-mini") + assert base_llm_config.model == "gpt-4o-mini" + + # Configure compaction to use a different summarizer model + summarizer_handle = "openai/gpt-5-mini" + summarizer_model_settings = OpenAIModelSettings( + max_output_tokens=1234, + temperature=0.1, + reasoning=OpenAIReasoning(reasoning_effort="high"), + response_format=None, + ) + summarizer_config = CompactionSettings( + model=summarizer_handle, + model_settings=summarizer_model_settings, + prompt="You are a summarizer.", + prompt_acknowledgement="ack", + clip_chars=2000, + mode="all", + sliding_window_percentage=0.3, + ) + + # Minimal message buffer: system + one user + one assistant + messages = [ + PydanticMessage( + role=MessageRole.system, + content=[TextContent(type="text", text="You are a helpful assistant.")], + ), + PydanticMessage( + role=MessageRole.user, + content=[TextContent(type="text", text="Hello")], + ), + PydanticMessage( + role=MessageRole.assistant, + content=[TextContent(type="text", text="Hi there")], + ), + ] + + # Build a minimal AgentState for LettaAgentV3 using the base llm_config + agent_state = PydanticAgentState( + id="agent-test-compaction-llm-config", + name="test-agent", + system="You are a helpful assistant.", + agent_type=AgentType.letta_v1_agent, + llm_config=base_llm_config, + embedding_config=EmbeddingConfig.default_config(provider="openai"), + model=None, + embedding=None, + model_settings=None, + compaction_settings=summarizer_config, + response_format=None, + description=None, + metadata=None, + memory=Memory(blocks=[]), + blocks=[], + tools=[], + sources=[], + tags=[], + tool_exec_environment_variables=[], + secrets=[], + project_id=None, + template_id=None, + base_template_id=None, + deployment_id=None, + entity_id=None, + identity_ids=[], + identities=[], + message_ids=[], + message_buffer_autoclear=False, + enable_sleeptime=None, + multi_agent_group=None, + managed_group=None, + last_run_completion=None, + last_run_duration_ms=None, + last_stop_reason=None, + timezone="UTC", + max_files_open=None, + per_file_view_window_char_limit=None, + hidden=None, + created_by_id=None, + last_updated_by_id=None, + created_at=None, + updated_at=None, + tool_rules=None, + ) + + # Use the static helper on LettaAgentV3 to derive summarizer llm_config + summarizer_llm_config = LettaAgentV3._build_summarizer_llm_config( + agent_llm_config=agent_state.llm_config, + summarizer_config=agent_state.compaction_settings, + ) + + # Agent model remains the base model + assert agent_state.llm_config.model == "gpt-4o-mini" + + # Summarizer config should use the handle/model from compaction_settings + assert summarizer_llm_config.handle == summarizer_handle + assert summarizer_llm_config.model == "gpt-5-mini" + # And should reflect overrides from model_settings + assert summarizer_llm_config.max_tokens == 1234 + assert summarizer_llm_config.temperature == 0.1 + + @pytest.mark.asyncio async def test_calculate_multi_agent_tools(set_letta_environment): """Test that calculate_multi_agent_tools excludes local-only tools in production."""