fix: use model instead of model_settings (#6834)

2025-12-14 17:07:10 -08:00
parent a721a00899
commit a731e01e88
5 changed files with 536 additions and 56 deletions
--- a/fern/openapi.json
+++ b/fern/openapi.json
@@ -20645,7 +20645,7 @@
          "compaction_settings": {
            "anyOf": [
              {
-                "$ref": "#/components/schemas/CompactionSettings"
+                "$ref": "#/components/schemas/CompactionSettings-Output"
              },
              {
                "type": "null"
@@ -24304,11 +24304,70 @@
        "required": ["code"],
        "title": "CodeInput"
      },
-      "CompactionSettings": {
+      "CompactionSettings-Input": {
        "properties": {
+          "model": {
+            "type": "string",
+            "title": "Model",
+            "description": "Model handle to use for summarization (format: provider/model-name)."
+          },
          "model_settings": {
-            "$ref": "#/components/schemas/ModelSettings",
-            "description": "The model settings to use for summarization."
+            "anyOf": [
+              {
+                "oneOf": [
+                  {
+                    "$ref": "#/components/schemas/OpenAIModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/AnthropicModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/GoogleAIModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/GoogleVertexModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/AzureModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/XAIModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/GroqModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/DeepseekModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/TogetherModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/BedrockModelSettings"
+                  }
+                ],
+                "discriminator": {
+                  "propertyName": "provider_type",
+                  "mapping": {
+                    "anthropic": "#/components/schemas/AnthropicModelSettings",
+                    "azure": "#/components/schemas/AzureModelSettings",
+                    "bedrock": "#/components/schemas/BedrockModelSettings",
+                    "deepseek": "#/components/schemas/DeepseekModelSettings",
+                    "google_ai": "#/components/schemas/GoogleAIModelSettings",
+                    "google_vertex": "#/components/schemas/GoogleVertexModelSettings",
+                    "groq": "#/components/schemas/GroqModelSettings",
+                    "openai": "#/components/schemas/OpenAIModelSettings",
+                    "together": "#/components/schemas/TogetherModelSettings",
+                    "xai": "#/components/schemas/XAIModelSettings"
+                  }
+                }
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Model Settings",
+            "description": "Optional model settings used to override defaults for the summarizer model."
          },
          "prompt": {
            "type": "string",
@@ -24348,8 +24407,116 @@
          }
        },
        "type": "object",
-        "required": ["model_settings", "prompt", "prompt_acknowledgement"],
-        "title": "CompactionSettings"
+        "required": ["model", "prompt", "prompt_acknowledgement"],
+        "title": "CompactionSettings",
+        "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
+      },
+      "CompactionSettings-Output": {
+        "properties": {
+          "model": {
+            "type": "string",
+            "title": "Model",
+            "description": "Model handle to use for summarization (format: provider/model-name)."
+          },
+          "model_settings": {
+            "anyOf": [
+              {
+                "oneOf": [
+                  {
+                    "$ref": "#/components/schemas/OpenAIModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/AnthropicModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/GoogleAIModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/GoogleVertexModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/AzureModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/XAIModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/GroqModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/DeepseekModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/TogetherModelSettings"
+                  },
+                  {
+                    "$ref": "#/components/schemas/BedrockModelSettings"
+                  }
+                ],
+                "discriminator": {
+                  "propertyName": "provider_type",
+                  "mapping": {
+                    "anthropic": "#/components/schemas/AnthropicModelSettings",
+                    "azure": "#/components/schemas/AzureModelSettings",
+                    "bedrock": "#/components/schemas/BedrockModelSettings",
+                    "deepseek": "#/components/schemas/DeepseekModelSettings",
+                    "google_ai": "#/components/schemas/GoogleAIModelSettings",
+                    "google_vertex": "#/components/schemas/GoogleVertexModelSettings",
+                    "groq": "#/components/schemas/GroqModelSettings",
+                    "openai": "#/components/schemas/OpenAIModelSettings",
+                    "together": "#/components/schemas/TogetherModelSettings",
+                    "xai": "#/components/schemas/XAIModelSettings"
+                  }
+                }
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Model Settings",
+            "description": "Optional model settings used to override defaults for the summarizer model."
+          },
+          "prompt": {
+            "type": "string",
+            "title": "Prompt",
+            "description": "The prompt to use for summarization."
+          },
+          "prompt_acknowledgement": {
+            "type": "string",
+            "title": "Prompt Acknowledgement",
+            "description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
+          },
+          "clip_chars": {
+            "anyOf": [
+              {
+                "type": "integer"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Clip Chars",
+            "description": "The maximum length of the summary in characters. If none, no clipping is performed.",
+            "default": 2000
+          },
+          "mode": {
+            "type": "string",
+            "enum": ["all", "sliding_window"],
+            "title": "Mode",
+            "description": "The type of summarization technique use.",
+            "default": "sliding_window"
+          },
+          "sliding_window_percentage": {
+            "type": "number",
+            "title": "Sliding Window Percentage",
+            "description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).",
+            "default": 0.3
+          }
+        },
+        "type": "object",
+        "required": ["model", "prompt", "prompt_acknowledgement"],
+        "title": "CompactionSettings",
+        "description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
      },
      "ComparisonOperator": {
        "type": "string",
@@ -25153,7 +25320,7 @@
          "compaction_settings": {
            "anyOf": [
              {
-                "$ref": "#/components/schemas/CompactionSettings"
+                "$ref": "#/components/schemas/CompactionSettings-Input"
              },
              {
                "type": "null"
@@ -29386,7 +29553,7 @@
          "compaction_settings": {
            "anyOf": [
              {
-                "$ref": "#/components/schemas/CompactionSettings"
+                "$ref": "#/components/schemas/CompactionSettings-Input"
              },
              {
                "type": "null"
@@ -32764,25 +32931,6 @@
        ],
        "title": "Model"
      },
-      "ModelSettings": {
-        "properties": {
-          "max_output_tokens": {
-            "type": "integer",
-            "title": "Max Output Tokens",
-            "description": "The maximum number of tokens the model can generate.",
-            "default": 4096
-          },
-          "parallel_tool_calls": {
-            "type": "boolean",
-            "title": "Parallel Tool Calls",
-            "description": "Whether to enable parallel tool calling.",
-            "default": false
-          }
-        },
-        "type": "object",
-        "title": "ModelSettings",
-        "description": "Schema for defining settings for a model"
-      },
      "ModifyApprovalRequest": {
        "properties": {
          "requires_approval": {
@@ -38722,7 +38870,7 @@
          "compaction_settings": {
            "anyOf": [
              {
-                "$ref": "#/components/schemas/CompactionSettings"
+                "$ref": "#/components/schemas/CompactionSettings-Input"
              },
              {
                "type": "null"
@@ -39995,7 +40143,7 @@
          "compaction_settings": {
            "anyOf": [
              {
-                "$ref": "#/components/schemas/CompactionSettings"
+                "$ref": "#/components/schemas/CompactionSettings-Input"
              },
              {
                "type": "null"
--- a/letta/agents/letta_agent_v3.py
+++ b/letta/agents/letta_agent_v3.py
@@ -1331,19 +1331,39 @@ class LettaAgentV3(LettaAgentV2):

    @trace_method
    async def compact(self, messages, trigger_threshold: Optional[int] = None) -> Message:
+        """Compact the current in-context messages for this agent.
+
+        Compaction uses a summarizer LLM configuration derived from
+        ``compaction_settings.model`` when provided. This mirrors how agent
+        creation derives defaults from provider-specific ModelSettings, but is
+        localized to summarization.
        """
-        Simplified compaction method. Does NOT do any persistence (handled in the loop)
-        """
-        # compact the current in-context messages (self.in_context_messages)
-        # Use agent's compaction_settings if set, otherwise fall back to defaults
-        summarizer_config = self.agent_state.compaction_settings or get_default_compaction_settings(
-            self.agent_state.llm_config._to_model_settings()
+
+        # Use agent's compaction_settings if set, otherwise fall back to
+        # global defaults based on the agent's model handle.
+        if self.agent_state.compaction_settings is not None:
+            summarizer_config = self.agent_state.compaction_settings
+        else:
+            # Prefer the new handle field if set, otherwise derive from llm_config
+            if self.agent_state.model is not None:
+                handle = self.agent_state.model
+            else:
+                llm_cfg = self.agent_state.llm_config
+                handle = llm_cfg.handle or f"{llm_cfg.model_endpoint_type}/{llm_cfg.model}"
+
+            summarizer_config = get_default_compaction_settings(handle)
+
+        # Build the LLMConfig used for summarization
+        summarizer_llm_config = self._build_summarizer_llm_config(
+            agent_llm_config=self.agent_state.llm_config,
+            summarizer_config=summarizer_config,
        )
+
        summarization_mode_used = summarizer_config.mode
        if summarizer_config.mode == "all":
            summary, compacted_messages = await summarize_all(
                actor=self.actor,
-                llm_config=self.agent_state.llm_config,
+                llm_config=summarizer_llm_config,
                summarizer_config=summarizer_config,
                in_context_messages=messages,
            )
@@ -1351,7 +1371,7 @@ class LettaAgentV3(LettaAgentV2):
            try:
                summary, compacted_messages = await summarize_via_sliding_window(
                    actor=self.actor,
-                    llm_config=self.agent_state.llm_config,
+                    llm_config=summarizer_llm_config,
                    summarizer_config=summarizer_config,
                    in_context_messages=messages,
                )
@@ -1359,7 +1379,7 @@ class LettaAgentV3(LettaAgentV2):
                self.logger.error(f"Sliding window summarization failed with exception: {str(e)}. Falling back to all mode.")
                summary, compacted_messages = await summarize_all(
                    actor=self.actor,
-                    llm_config=self.agent_state.llm_config,
+                    llm_config=summarizer_llm_config,
                    summarizer_config=summarizer_config,
                    in_context_messages=messages,
                )
@@ -1445,3 +1465,46 @@ class LettaAgentV3(LettaAgentV2):
            final_messages += compacted_messages[1:]

        return summary_message_obj, final_messages
+
+    @staticmethod
+    def _build_summarizer_llm_config(
+        agent_llm_config: LLMConfig,
+        summarizer_config: CompactionSettings,
+    ) -> LLMConfig:
+        """Derive an LLMConfig for summarization from a model handle.
+
+        This mirrors the agent-creation path: start from the agent's LLMConfig,
+        override provider/model/handle from ``compaction_settings.model``, and
+        then apply any explicit ``compaction_settings.model_settings`` via
+        ``_to_legacy_config_params``.
+        """
+
+        # If no summarizer model handle is provided, fall back to the agent's config
+        if not summarizer_config.model:
+            return agent_llm_config
+
+        try:
+            # Parse provider/model from the handle, falling back to the agent's
+            # provider type when only a model name is given.
+            if "/" in summarizer_config.model:
+                provider, model_name = summarizer_config.model.split("/", 1)
+            else:
+                provider = agent_llm_config.model_endpoint_type
+                model_name = summarizer_config.model
+
+            # Start from the agent's config and override model + provider + handle
+            base = agent_llm_config.model_copy()
+            base.model_endpoint_type = provider
+            base.model = model_name
+            base.handle = summarizer_config.model
+
+            # If explicit model_settings are provided for the summarizer, apply
+            # them just like server.create_agent_async does for agents.
+            if summarizer_config.model_settings is not None:
+                update_params = summarizer_config.model_settings._to_legacy_config_params()
+                return base.model_copy(update=update_params)
+
+            return base
+        except Exception:
+            # On any error, do not break the agent – just fall back
+            return agent_llm_config
--- a/letta/services/summarizer/summarizer_config.py
+++ b/letta/services/summarizer/summarizer_config.py
@@ -2,13 +2,30 @@ from typing import Literal

 from pydantic import BaseModel, Field

-from letta.schemas.llm_config import LLMConfig
-from letta.schemas.model import ModelSettings
+from letta.schemas.model import ModelSettingsUnion


 class CompactionSettings(BaseModel):
-    # summarizer_model: LLMConfig = Field(default=..., description="The model to use for summarization.")
-    model_settings: ModelSettings = Field(default=..., description="The model settings to use for summarization.")
+    """Configuration for conversation compaction / summarization.
+
+    ``model`` is the only required user-facing field – it specifies the summarizer
+    model handle (e.g. ``"openai/gpt-4o-mini"``). Per-model settings (temperature,
+    max tokens, etc.) are derived from the default configuration for that handle.
+    """
+
+    # Summarizer model handle (provider/model-name).
+    # This is required whenever compaction_settings is provided.
+    model: str = Field(
+        ...,
+        description="Model handle to use for summarization (format: provider/model-name).",
+    )
+
+    # Optional provider-specific model settings for the summarizer model
+    model_settings: ModelSettingsUnion | None = Field(
+        default=None,
+        description="Optional model settings used to override defaults for the summarizer model.",
+    )
+
    prompt: str = Field(default=..., description="The prompt to use for summarization.")
    prompt_acknowledgement: str = Field(
        default=..., description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
@@ -23,22 +40,25 @@ class CompactionSettings(BaseModel):
    )


-def get_default_compaction_settings(model_settings: ModelSettings) -> CompactionSettings:
-    """Build a default CompactionSettings from global settings for backward compatibility.
+def get_default_compaction_settings(model_handle: str) -> CompactionSettings:
+    """Build a default :class:`CompactionSettings` from a model handle.

    Args:
-        llm_config: The LLMConfig to use for the summarizer model (typically the agent's llm_config).
+        model_handle: The model handle to use for summarization
+            (format: provider/model-name).

    Returns:
-        A CompactionSettings with default values from global settings.
+        A :class:`CompactionSettings` populated with sane defaults.
    """
+
    from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
    from letta.prompts import gpt_summarize
    from letta.settings import summarizer_settings

    return CompactionSettings(
        mode="sliding_window",
-        model_settings=model_settings,
+        model=model_handle,
+        model_settings=None,
        prompt=gpt_summarize.SYSTEM,
        prompt_acknowledgement=MESSAGE_SUMMARY_REQUEST_ACK,
        clip_chars=2000,
--- a/tests/integration_test_summarizer.py
+++ b/tests/integration_test_summarizer.py
@@ -675,11 +675,11 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon
    new_letta_messages = await server.message_manager.create_many_messages_async(new_letta_messages, actor=actor)

    # Create a custom CompactionSettings with the desired mode
-    def mock_get_default_compaction_settings(model_settings):
-        config = get_default_compaction_settings(model_settings)
+    def mock_get_default_compaction_settings(llm_config_inner):
+        config = get_default_compaction_settings(llm_config_inner)
        # Override the mode
        return CompactionSettings(
-            model_settings=config.model_settings,
+            model=config.model,
            prompt=config.prompt,
            prompt_acknowledgement=config.prompt_acknowledgement,
            clip_chars=config.clip_chars,
@@ -724,6 +724,86 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon
            assert result[1].role == MessageRole.user


+@pytest.mark.asyncio
+async def test_v3_compact_uses_compaction_settings_model_and_model_settings(server: SyncServer, actor):
+    """Integration test: LettaAgentV3.compact uses the LLMConfig implied by CompactionSettings.
+
+    We set a different summarizer model handle + model_settings and verify that
+    the LLMConfig passed into simple_summary reflects both the handle and
+    the model_settings overrides.
+    """
+
+    from letta.agents.letta_agent_v3 import LettaAgentV3
+    from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning
+    from letta.services.summarizer import summarizer_all
+
+    base_llm_config = LLMConfig.default_config("gpt-4o-mini")
+
+    messages = [
+        PydanticMessage(
+            role=MessageRole.system,
+            content=[TextContent(type="text", text="You are a helpful assistant.")],
+        ),
+        PydanticMessage(
+            role=MessageRole.user,
+            content=[TextContent(type="text", text="Hello")],
+        ),
+        PydanticMessage(
+            role=MessageRole.assistant,
+            content=[TextContent(type="text", text="Hi there")],
+        ),
+    ]
+
+    # Create agent + messages via helper to get a real AgentState
+    agent_state, in_context_messages = await create_agent_with_messages(
+        server=server,
+        actor=actor,
+        llm_config=base_llm_config,
+        messages=messages,
+    )
+
+    summarizer_handle = "openai/gpt-5-mini"
+    summarizer_model_settings = OpenAIModelSettings(
+        max_output_tokens=4321,
+        temperature=0.05,
+        reasoning=OpenAIReasoning(reasoning_effort="high"),
+        response_format=None,
+    )
+    agent_state.compaction_settings = CompactionSettings(
+        model=summarizer_handle,
+        model_settings=summarizer_model_settings,
+        prompt="You are a summarizer.",
+        prompt_acknowledgement="ack",
+        clip_chars=2000,
+        mode="all",
+        sliding_window_percentage=0.3,
+    )
+
+    captured_llm_config: dict = {}
+
+    async def fake_simple_summary(messages, llm_config, actor, include_ack=True, prompt=None):  # type: ignore[override]
+        captured_llm_config["value"] = llm_config
+        return "summary text"
+
+    # Patch simple_summary so we don't hit the real LLM and can inspect llm_config
+    with patch.object(summarizer_all, "simple_summary", new=fake_simple_summary):
+        agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)
+        summary_msg, compacted = await agent_loop.compact(messages=in_context_messages)
+
+    assert summary_msg is not None
+    assert "value" in captured_llm_config
+    summarizer_llm_config = captured_llm_config["value"]
+
+    # Agent's llm_config remains the base config
+    assert agent_state.llm_config.model == "gpt-4o-mini"
+
+    # Summarizer llm_config should reflect compaction_settings.model and model_settings
+    assert summarizer_llm_config.handle == summarizer_handle
+    assert summarizer_llm_config.model == "gpt-5-mini"
+    assert summarizer_llm_config.max_tokens == 4321
+    assert summarizer_llm_config.temperature == 0.05
+
+
@pytest.mark.asyncio
@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS])
 async def test_v3_summarize_hard_eviction_when_still_over_threshold(
@@ -847,14 +927,13 @@ async def test_sliding_window_cutoff_index_does_not_exceed_message_count(server:
    This test uses the real token counter (via create_token_counter) to verify
    the sliding window logic works with actual token counting.
    """
-    from letta.schemas.model import ModelSettings
    from letta.services.summarizer.summarizer_config import get_default_compaction_settings
    from letta.services.summarizer.summarizer_sliding_window import summarize_via_sliding_window

    # Create a real summarizer config using the default factory
    # Override sliding_window_percentage to 0.3 for this test
-    model_settings = ModelSettings()  # Use defaults
-    summarizer_config = get_default_compaction_settings(model_settings)
+    handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
+    summarizer_config = get_default_compaction_settings(handle)
    summarizer_config.sliding_window_percentage = 0.3

    # Create 65 messages (similar to the failing case in the bug report)
@@ -1399,13 +1478,12 @@ async def test_summarize_all(server: SyncServer, actor, llm_config: LLMConfig):
    This test verifies that the 'all' summarization mode works correctly,
    summarizing the entire conversation into a single summary string.
    """
-    from letta.schemas.model import ModelSettings
    from letta.services.summarizer.summarizer_all import summarize_all
    from letta.services.summarizer.summarizer_config import get_default_compaction_settings

    # Create a summarizer config with "all" mode
-    model_settings = ModelSettings()
-    summarizer_config = get_default_compaction_settings(model_settings)
+    handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
+    summarizer_config = get_default_compaction_settings(handle)
    summarizer_config.mode = "all"

    # Create test messages - a simple conversation
--- a/tests/managers/test_agent_manager.py
+++ b/tests/managers/test_agent_manager.py
@@ -253,6 +253,177 @@ async def test_create_agent_base_tool_rules_non_excluded_providers(server: SyncS
    assert len(created_agent.tool_rules) > 0


+@pytest.mark.asyncio
+async def test_create_agent_with_model_handle_uses_correct_llm_config(server: SyncServer, default_user):
+    """When CreateAgent.model is provided, ensure the correct handle is used to resolve llm_config.
+
+    This verifies that the model handle passed by the client is forwarded into
+    SyncServer.get_cached_llm_config_async and that the resulting AgentState
+    carries an llm_config with the same handle.
+    """
+
+    # Track the arguments used to resolve the LLM config
+    captured_kwargs: dict = {}
+
+    async def fake_get_cached_llm_config_async(self, actor, **kwargs):  # type: ignore[override]
+        from letta.schemas.llm_config import LLMConfig as PydanticLLMConfig
+
+        captured_kwargs.update(kwargs)
+        handle = kwargs["handle"]
+
+        # Return a minimal but valid LLMConfig with the requested handle
+        return PydanticLLMConfig(
+            model="test-model-name",
+            model_endpoint_type="openai",
+            model_endpoint="https://api.openai.com/v1",
+            context_window=8192,
+            handle=handle,
+        )
+
+    model_handle = "openai/gpt-4o-mini"
+
+    # Patch SyncServer.get_cached_llm_config_async so we don't depend on provider DB state
+    with patch.object(SyncServer, "get_cached_llm_config_async", new=fake_get_cached_llm_config_async):
+        created_agent = await server.create_agent_async(
+            request=CreateAgent(
+                name="agent_with_model_handle",
+                agent_type="memgpt_v2_agent",
+                # Use new model handle field instead of llm_config
+                model=model_handle,
+                embedding_config=EmbeddingConfig.default_config(provider="openai"),
+                memory_blocks=[],
+                include_base_tools=False,
+            ),
+            actor=default_user,
+        )
+
+    # Ensure we resolved the config using the provided handle
+    assert captured_kwargs["handle"] == model_handle
+
+    # And that the resulting agent's llm_config reflects the same handle
+    assert created_agent.llm_config is not None
+    assert created_agent.llm_config.handle == model_handle
+
+
+@pytest.mark.asyncio
+async def test_compaction_settings_model_uses_separate_llm_config_for_summarization(default_user):
+    """When compaction_settings.model differs from the agent model, use a separate llm_config.
+
+    This test exercises the summarization helpers directly to avoid external
+    provider dependencies. It verifies that CompactionSettings.model controls
+    the LLMConfig used for the summarizer request.
+    """
+
+    from letta.agents.letta_agent_v3 import LettaAgentV3
+    from letta.schemas.agent import AgentState as PydanticAgentState
+    from letta.schemas.enums import AgentType, MessageRole
+    from letta.schemas.memory import Memory
+    from letta.schemas.message import Message as PydanticMessage
+    from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning
+
+    # Base agent LLM config
+    base_llm_config = LLMConfig.default_config("gpt-4o-mini")
+    assert base_llm_config.model == "gpt-4o-mini"
+
+    # Configure compaction to use a different summarizer model
+    summarizer_handle = "openai/gpt-5-mini"
+    summarizer_model_settings = OpenAIModelSettings(
+        max_output_tokens=1234,
+        temperature=0.1,
+        reasoning=OpenAIReasoning(reasoning_effort="high"),
+        response_format=None,
+    )
+    summarizer_config = CompactionSettings(
+        model=summarizer_handle,
+        model_settings=summarizer_model_settings,
+        prompt="You are a summarizer.",
+        prompt_acknowledgement="ack",
+        clip_chars=2000,
+        mode="all",
+        sliding_window_percentage=0.3,
+    )
+
+    # Minimal message buffer: system + one user + one assistant
+    messages = [
+        PydanticMessage(
+            role=MessageRole.system,
+            content=[TextContent(type="text", text="You are a helpful assistant.")],
+        ),
+        PydanticMessage(
+            role=MessageRole.user,
+            content=[TextContent(type="text", text="Hello")],
+        ),
+        PydanticMessage(
+            role=MessageRole.assistant,
+            content=[TextContent(type="text", text="Hi there")],
+        ),
+    ]
+
+    # Build a minimal AgentState for LettaAgentV3 using the base llm_config
+    agent_state = PydanticAgentState(
+        id="agent-test-compaction-llm-config",
+        name="test-agent",
+        system="You are a helpful assistant.",
+        agent_type=AgentType.letta_v1_agent,
+        llm_config=base_llm_config,
+        embedding_config=EmbeddingConfig.default_config(provider="openai"),
+        model=None,
+        embedding=None,
+        model_settings=None,
+        compaction_settings=summarizer_config,
+        response_format=None,
+        description=None,
+        metadata=None,
+        memory=Memory(blocks=[]),
+        blocks=[],
+        tools=[],
+        sources=[],
+        tags=[],
+        tool_exec_environment_variables=[],
+        secrets=[],
+        project_id=None,
+        template_id=None,
+        base_template_id=None,
+        deployment_id=None,
+        entity_id=None,
+        identity_ids=[],
+        identities=[],
+        message_ids=[],
+        message_buffer_autoclear=False,
+        enable_sleeptime=None,
+        multi_agent_group=None,
+        managed_group=None,
+        last_run_completion=None,
+        last_run_duration_ms=None,
+        last_stop_reason=None,
+        timezone="UTC",
+        max_files_open=None,
+        per_file_view_window_char_limit=None,
+        hidden=None,
+        created_by_id=None,
+        last_updated_by_id=None,
+        created_at=None,
+        updated_at=None,
+        tool_rules=None,
+    )
+
+    # Use the static helper on LettaAgentV3 to derive summarizer llm_config
+    summarizer_llm_config = LettaAgentV3._build_summarizer_llm_config(
+        agent_llm_config=agent_state.llm_config,
+        summarizer_config=agent_state.compaction_settings,
+    )
+
+    # Agent model remains the base model
+    assert agent_state.llm_config.model == "gpt-4o-mini"
+
+    # Summarizer config should use the handle/model from compaction_settings
+    assert summarizer_llm_config.handle == summarizer_handle
+    assert summarizer_llm_config.model == "gpt-5-mini"
+    # And should reflect overrides from model_settings
+    assert summarizer_llm_config.max_tokens == 1234
+    assert summarizer_llm_config.temperature == 0.1
+
+
@pytest.mark.asyncio
 async def test_calculate_multi_agent_tools(set_letta_environment):
    """Test that calculate_multi_agent_tools excludes local-only tools in production."""