fix: use model instead of model_settings (#6834)

This commit is contained in:
Sarah Wooders
2025-12-14 17:07:10 -08:00
committed by Caren Thomas
parent a721a00899
commit a731e01e88
5 changed files with 536 additions and 56 deletions

View File

@@ -20645,7 +20645,7 @@
"compaction_settings": {
"anyOf": [
{
"$ref": "#/components/schemas/CompactionSettings"
"$ref": "#/components/schemas/CompactionSettings-Output"
},
{
"type": "null"
@@ -24304,11 +24304,70 @@
"required": ["code"],
"title": "CodeInput"
},
"CompactionSettings": {
"CompactionSettings-Input": {
"properties": {
"model": {
"type": "string",
"title": "Model",
"description": "Model handle to use for summarization (format: provider/model-name)."
},
"model_settings": {
"$ref": "#/components/schemas/ModelSettings",
"description": "The model settings to use for summarization."
"anyOf": [
{
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIModelSettings"
},
{
"$ref": "#/components/schemas/AnthropicModelSettings"
},
{
"$ref": "#/components/schemas/GoogleAIModelSettings"
},
{
"$ref": "#/components/schemas/GoogleVertexModelSettings"
},
{
"$ref": "#/components/schemas/AzureModelSettings"
},
{
"$ref": "#/components/schemas/XAIModelSettings"
},
{
"$ref": "#/components/schemas/GroqModelSettings"
},
{
"$ref": "#/components/schemas/DeepseekModelSettings"
},
{
"$ref": "#/components/schemas/TogetherModelSettings"
},
{
"$ref": "#/components/schemas/BedrockModelSettings"
}
],
"discriminator": {
"propertyName": "provider_type",
"mapping": {
"anthropic": "#/components/schemas/AnthropicModelSettings",
"azure": "#/components/schemas/AzureModelSettings",
"bedrock": "#/components/schemas/BedrockModelSettings",
"deepseek": "#/components/schemas/DeepseekModelSettings",
"google_ai": "#/components/schemas/GoogleAIModelSettings",
"google_vertex": "#/components/schemas/GoogleVertexModelSettings",
"groq": "#/components/schemas/GroqModelSettings",
"openai": "#/components/schemas/OpenAIModelSettings",
"together": "#/components/schemas/TogetherModelSettings",
"xai": "#/components/schemas/XAIModelSettings"
}
}
},
{
"type": "null"
}
],
"title": "Model Settings",
"description": "Optional model settings used to override defaults for the summarizer model."
},
"prompt": {
"type": "string",
@@ -24348,8 +24407,116 @@
}
},
"type": "object",
"required": ["model_settings", "prompt", "prompt_acknowledgement"],
"title": "CompactionSettings"
"required": ["model", "prompt", "prompt_acknowledgement"],
"title": "CompactionSettings",
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
},
"CompactionSettings-Output": {
"properties": {
"model": {
"type": "string",
"title": "Model",
"description": "Model handle to use for summarization (format: provider/model-name)."
},
"model_settings": {
"anyOf": [
{
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIModelSettings"
},
{
"$ref": "#/components/schemas/AnthropicModelSettings"
},
{
"$ref": "#/components/schemas/GoogleAIModelSettings"
},
{
"$ref": "#/components/schemas/GoogleVertexModelSettings"
},
{
"$ref": "#/components/schemas/AzureModelSettings"
},
{
"$ref": "#/components/schemas/XAIModelSettings"
},
{
"$ref": "#/components/schemas/GroqModelSettings"
},
{
"$ref": "#/components/schemas/DeepseekModelSettings"
},
{
"$ref": "#/components/schemas/TogetherModelSettings"
},
{
"$ref": "#/components/schemas/BedrockModelSettings"
}
],
"discriminator": {
"propertyName": "provider_type",
"mapping": {
"anthropic": "#/components/schemas/AnthropicModelSettings",
"azure": "#/components/schemas/AzureModelSettings",
"bedrock": "#/components/schemas/BedrockModelSettings",
"deepseek": "#/components/schemas/DeepseekModelSettings",
"google_ai": "#/components/schemas/GoogleAIModelSettings",
"google_vertex": "#/components/schemas/GoogleVertexModelSettings",
"groq": "#/components/schemas/GroqModelSettings",
"openai": "#/components/schemas/OpenAIModelSettings",
"together": "#/components/schemas/TogetherModelSettings",
"xai": "#/components/schemas/XAIModelSettings"
}
}
},
{
"type": "null"
}
],
"title": "Model Settings",
"description": "Optional model settings used to override defaults for the summarizer model."
},
"prompt": {
"type": "string",
"title": "Prompt",
"description": "The prompt to use for summarization."
},
"prompt_acknowledgement": {
"type": "string",
"title": "Prompt Acknowledgement",
"description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
},
"clip_chars": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"title": "Clip Chars",
"description": "The maximum length of the summary in characters. If none, no clipping is performed.",
"default": 2000
},
"mode": {
"type": "string",
"enum": ["all", "sliding_window"],
"title": "Mode",
"description": "The type of summarization technique use.",
"default": "sliding_window"
},
"sliding_window_percentage": {
"type": "number",
"title": "Sliding Window Percentage",
"description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).",
"default": 0.3
}
},
"type": "object",
"required": ["model", "prompt", "prompt_acknowledgement"],
"title": "CompactionSettings",
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
},
"ComparisonOperator": {
"type": "string",
@@ -25153,7 +25320,7 @@
"compaction_settings": {
"anyOf": [
{
"$ref": "#/components/schemas/CompactionSettings"
"$ref": "#/components/schemas/CompactionSettings-Input"
},
{
"type": "null"
@@ -29386,7 +29553,7 @@
"compaction_settings": {
"anyOf": [
{
"$ref": "#/components/schemas/CompactionSettings"
"$ref": "#/components/schemas/CompactionSettings-Input"
},
{
"type": "null"
@@ -32764,25 +32931,6 @@
],
"title": "Model"
},
"ModelSettings": {
"properties": {
"max_output_tokens": {
"type": "integer",
"title": "Max Output Tokens",
"description": "The maximum number of tokens the model can generate.",
"default": 4096
},
"parallel_tool_calls": {
"type": "boolean",
"title": "Parallel Tool Calls",
"description": "Whether to enable parallel tool calling.",
"default": false
}
},
"type": "object",
"title": "ModelSettings",
"description": "Schema for defining settings for a model"
},
"ModifyApprovalRequest": {
"properties": {
"requires_approval": {
@@ -38722,7 +38870,7 @@
"compaction_settings": {
"anyOf": [
{
"$ref": "#/components/schemas/CompactionSettings"
"$ref": "#/components/schemas/CompactionSettings-Input"
},
{
"type": "null"
@@ -39995,7 +40143,7 @@
"compaction_settings": {
"anyOf": [
{
"$ref": "#/components/schemas/CompactionSettings"
"$ref": "#/components/schemas/CompactionSettings-Input"
},
{
"type": "null"

View File

@@ -1331,19 +1331,39 @@ class LettaAgentV3(LettaAgentV2):
@trace_method
async def compact(self, messages, trigger_threshold: Optional[int] = None) -> Message:
"""Compact the current in-context messages for this agent.
Compaction uses a summarizer LLM configuration derived from
``compaction_settings.model`` when provided. This mirrors how agent
creation derives defaults from provider-specific ModelSettings, but is
localized to summarization.
"""
Simplified compaction method. Does NOT do any persistence (handled in the loop)
"""
# compact the current in-context messages (self.in_context_messages)
# Use agent's compaction_settings if set, otherwise fall back to defaults
summarizer_config = self.agent_state.compaction_settings or get_default_compaction_settings(
self.agent_state.llm_config._to_model_settings()
# Use agent's compaction_settings if set, otherwise fall back to
# global defaults based on the agent's model handle.
if self.agent_state.compaction_settings is not None:
summarizer_config = self.agent_state.compaction_settings
else:
# Prefer the new handle field if set, otherwise derive from llm_config
if self.agent_state.model is not None:
handle = self.agent_state.model
else:
llm_cfg = self.agent_state.llm_config
handle = llm_cfg.handle or f"{llm_cfg.model_endpoint_type}/{llm_cfg.model}"
summarizer_config = get_default_compaction_settings(handle)
# Build the LLMConfig used for summarization
summarizer_llm_config = self._build_summarizer_llm_config(
agent_llm_config=self.agent_state.llm_config,
summarizer_config=summarizer_config,
)
summarization_mode_used = summarizer_config.mode
if summarizer_config.mode == "all":
summary, compacted_messages = await summarize_all(
actor=self.actor,
llm_config=self.agent_state.llm_config,
llm_config=summarizer_llm_config,
summarizer_config=summarizer_config,
in_context_messages=messages,
)
@@ -1351,7 +1371,7 @@ class LettaAgentV3(LettaAgentV2):
try:
summary, compacted_messages = await summarize_via_sliding_window(
actor=self.actor,
llm_config=self.agent_state.llm_config,
llm_config=summarizer_llm_config,
summarizer_config=summarizer_config,
in_context_messages=messages,
)
@@ -1359,7 +1379,7 @@ class LettaAgentV3(LettaAgentV2):
self.logger.error(f"Sliding window summarization failed with exception: {str(e)}. Falling back to all mode.")
summary, compacted_messages = await summarize_all(
actor=self.actor,
llm_config=self.agent_state.llm_config,
llm_config=summarizer_llm_config,
summarizer_config=summarizer_config,
in_context_messages=messages,
)
@@ -1445,3 +1465,46 @@ class LettaAgentV3(LettaAgentV2):
final_messages += compacted_messages[1:]
return summary_message_obj, final_messages
@staticmethod
def _build_summarizer_llm_config(
agent_llm_config: LLMConfig,
summarizer_config: CompactionSettings,
) -> LLMConfig:
"""Derive an LLMConfig for summarization from a model handle.
This mirrors the agent-creation path: start from the agent's LLMConfig,
override provider/model/handle from ``compaction_settings.model``, and
then apply any explicit ``compaction_settings.model_settings`` via
``_to_legacy_config_params``.
"""
# If no summarizer model handle is provided, fall back to the agent's config
if not summarizer_config.model:
return agent_llm_config
try:
# Parse provider/model from the handle, falling back to the agent's
# provider type when only a model name is given.
if "/" in summarizer_config.model:
provider, model_name = summarizer_config.model.split("/", 1)
else:
provider = agent_llm_config.model_endpoint_type
model_name = summarizer_config.model
# Start from the agent's config and override model + provider + handle
base = agent_llm_config.model_copy()
base.model_endpoint_type = provider
base.model = model_name
base.handle = summarizer_config.model
# If explicit model_settings are provided for the summarizer, apply
# them just like server.create_agent_async does for agents.
if summarizer_config.model_settings is not None:
update_params = summarizer_config.model_settings._to_legacy_config_params()
return base.model_copy(update=update_params)
return base
except Exception:
# On any error, do not break the agent just fall back
return agent_llm_config

View File

@@ -2,13 +2,30 @@ from typing import Literal
from pydantic import BaseModel, Field
from letta.schemas.llm_config import LLMConfig
from letta.schemas.model import ModelSettings
from letta.schemas.model import ModelSettingsUnion
class CompactionSettings(BaseModel):
# summarizer_model: LLMConfig = Field(default=..., description="The model to use for summarization.")
model_settings: ModelSettings = Field(default=..., description="The model settings to use for summarization.")
"""Configuration for conversation compaction / summarization.
``model`` is the only required user-facing field it specifies the summarizer
model handle (e.g. ``"openai/gpt-4o-mini"``). Per-model settings (temperature,
max tokens, etc.) are derived from the default configuration for that handle.
"""
# Summarizer model handle (provider/model-name).
# This is required whenever compaction_settings is provided.
model: str = Field(
...,
description="Model handle to use for summarization (format: provider/model-name).",
)
# Optional provider-specific model settings for the summarizer model
model_settings: ModelSettingsUnion | None = Field(
default=None,
description="Optional model settings used to override defaults for the summarizer model.",
)
prompt: str = Field(default=..., description="The prompt to use for summarization.")
prompt_acknowledgement: str = Field(
default=..., description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
@@ -23,22 +40,25 @@ class CompactionSettings(BaseModel):
)
def get_default_compaction_settings(model_settings: ModelSettings) -> CompactionSettings:
"""Build a default CompactionSettings from global settings for backward compatibility.
def get_default_compaction_settings(model_handle: str) -> CompactionSettings:
"""Build a default :class:`CompactionSettings` from a model handle.
Args:
llm_config: The LLMConfig to use for the summarizer model (typically the agent's llm_config).
model_handle: The model handle to use for summarization
(format: provider/model-name).
Returns:
A CompactionSettings with default values from global settings.
A :class:`CompactionSettings` populated with sane defaults.
"""
from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
from letta.prompts import gpt_summarize
from letta.settings import summarizer_settings
return CompactionSettings(
mode="sliding_window",
model_settings=model_settings,
model=model_handle,
model_settings=None,
prompt=gpt_summarize.SYSTEM,
prompt_acknowledgement=MESSAGE_SUMMARY_REQUEST_ACK,
clip_chars=2000,

View File

@@ -675,11 +675,11 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon
new_letta_messages = await server.message_manager.create_many_messages_async(new_letta_messages, actor=actor)
# Create a custom CompactionSettings with the desired mode
def mock_get_default_compaction_settings(model_settings):
config = get_default_compaction_settings(model_settings)
def mock_get_default_compaction_settings(llm_config_inner):
config = get_default_compaction_settings(llm_config_inner)
# Override the mode
return CompactionSettings(
model_settings=config.model_settings,
model=config.model,
prompt=config.prompt,
prompt_acknowledgement=config.prompt_acknowledgement,
clip_chars=config.clip_chars,
@@ -724,6 +724,86 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon
assert result[1].role == MessageRole.user
@pytest.mark.asyncio
async def test_v3_compact_uses_compaction_settings_model_and_model_settings(server: SyncServer, actor):
"""Integration test: LettaAgentV3.compact uses the LLMConfig implied by CompactionSettings.
We set a different summarizer model handle + model_settings and verify that
the LLMConfig passed into simple_summary reflects both the handle and
the model_settings overrides.
"""
from letta.agents.letta_agent_v3 import LettaAgentV3
from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning
from letta.services.summarizer import summarizer_all
base_llm_config = LLMConfig.default_config("gpt-4o-mini")
messages = [
PydanticMessage(
role=MessageRole.system,
content=[TextContent(type="text", text="You are a helpful assistant.")],
),
PydanticMessage(
role=MessageRole.user,
content=[TextContent(type="text", text="Hello")],
),
PydanticMessage(
role=MessageRole.assistant,
content=[TextContent(type="text", text="Hi there")],
),
]
# Create agent + messages via helper to get a real AgentState
agent_state, in_context_messages = await create_agent_with_messages(
server=server,
actor=actor,
llm_config=base_llm_config,
messages=messages,
)
summarizer_handle = "openai/gpt-5-mini"
summarizer_model_settings = OpenAIModelSettings(
max_output_tokens=4321,
temperature=0.05,
reasoning=OpenAIReasoning(reasoning_effort="high"),
response_format=None,
)
agent_state.compaction_settings = CompactionSettings(
model=summarizer_handle,
model_settings=summarizer_model_settings,
prompt="You are a summarizer.",
prompt_acknowledgement="ack",
clip_chars=2000,
mode="all",
sliding_window_percentage=0.3,
)
captured_llm_config: dict = {}
async def fake_simple_summary(messages, llm_config, actor, include_ack=True, prompt=None): # type: ignore[override]
captured_llm_config["value"] = llm_config
return "summary text"
# Patch simple_summary so we don't hit the real LLM and can inspect llm_config
with patch.object(summarizer_all, "simple_summary", new=fake_simple_summary):
agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)
summary_msg, compacted = await agent_loop.compact(messages=in_context_messages)
assert summary_msg is not None
assert "value" in captured_llm_config
summarizer_llm_config = captured_llm_config["value"]
# Agent's llm_config remains the base config
assert agent_state.llm_config.model == "gpt-4o-mini"
# Summarizer llm_config should reflect compaction_settings.model and model_settings
assert summarizer_llm_config.handle == summarizer_handle
assert summarizer_llm_config.model == "gpt-5-mini"
assert summarizer_llm_config.max_tokens == 4321
assert summarizer_llm_config.temperature == 0.05
@pytest.mark.asyncio
@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS])
async def test_v3_summarize_hard_eviction_when_still_over_threshold(
@@ -847,14 +927,13 @@ async def test_sliding_window_cutoff_index_does_not_exceed_message_count(server:
This test uses the real token counter (via create_token_counter) to verify
the sliding window logic works with actual token counting.
"""
from letta.schemas.model import ModelSettings
from letta.services.summarizer.summarizer_config import get_default_compaction_settings
from letta.services.summarizer.summarizer_sliding_window import summarize_via_sliding_window
# Create a real summarizer config using the default factory
# Override sliding_window_percentage to 0.3 for this test
model_settings = ModelSettings() # Use defaults
summarizer_config = get_default_compaction_settings(model_settings)
handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
summarizer_config = get_default_compaction_settings(handle)
summarizer_config.sliding_window_percentage = 0.3
# Create 65 messages (similar to the failing case in the bug report)
@@ -1399,13 +1478,12 @@ async def test_summarize_all(server: SyncServer, actor, llm_config: LLMConfig):
This test verifies that the 'all' summarization mode works correctly,
summarizing the entire conversation into a single summary string.
"""
from letta.schemas.model import ModelSettings
from letta.services.summarizer.summarizer_all import summarize_all
from letta.services.summarizer.summarizer_config import get_default_compaction_settings
# Create a summarizer config with "all" mode
model_settings = ModelSettings()
summarizer_config = get_default_compaction_settings(model_settings)
handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
summarizer_config = get_default_compaction_settings(handle)
summarizer_config.mode = "all"
# Create test messages - a simple conversation

View File

@@ -253,6 +253,177 @@ async def test_create_agent_base_tool_rules_non_excluded_providers(server: SyncS
assert len(created_agent.tool_rules) > 0
@pytest.mark.asyncio
async def test_create_agent_with_model_handle_uses_correct_llm_config(server: SyncServer, default_user):
"""When CreateAgent.model is provided, ensure the correct handle is used to resolve llm_config.
This verifies that the model handle passed by the client is forwarded into
SyncServer.get_cached_llm_config_async and that the resulting AgentState
carries an llm_config with the same handle.
"""
# Track the arguments used to resolve the LLM config
captured_kwargs: dict = {}
async def fake_get_cached_llm_config_async(self, actor, **kwargs): # type: ignore[override]
from letta.schemas.llm_config import LLMConfig as PydanticLLMConfig
captured_kwargs.update(kwargs)
handle = kwargs["handle"]
# Return a minimal but valid LLMConfig with the requested handle
return PydanticLLMConfig(
model="test-model-name",
model_endpoint_type="openai",
model_endpoint="https://api.openai.com/v1",
context_window=8192,
handle=handle,
)
model_handle = "openai/gpt-4o-mini"
# Patch SyncServer.get_cached_llm_config_async so we don't depend on provider DB state
with patch.object(SyncServer, "get_cached_llm_config_async", new=fake_get_cached_llm_config_async):
created_agent = await server.create_agent_async(
request=CreateAgent(
name="agent_with_model_handle",
agent_type="memgpt_v2_agent",
# Use new model handle field instead of llm_config
model=model_handle,
embedding_config=EmbeddingConfig.default_config(provider="openai"),
memory_blocks=[],
include_base_tools=False,
),
actor=default_user,
)
# Ensure we resolved the config using the provided handle
assert captured_kwargs["handle"] == model_handle
# And that the resulting agent's llm_config reflects the same handle
assert created_agent.llm_config is not None
assert created_agent.llm_config.handle == model_handle
@pytest.mark.asyncio
async def test_compaction_settings_model_uses_separate_llm_config_for_summarization(default_user):
"""When compaction_settings.model differs from the agent model, use a separate llm_config.
This test exercises the summarization helpers directly to avoid external
provider dependencies. It verifies that CompactionSettings.model controls
the LLMConfig used for the summarizer request.
"""
from letta.agents.letta_agent_v3 import LettaAgentV3
from letta.schemas.agent import AgentState as PydanticAgentState
from letta.schemas.enums import AgentType, MessageRole
from letta.schemas.memory import Memory
from letta.schemas.message import Message as PydanticMessage
from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning
# Base agent LLM config
base_llm_config = LLMConfig.default_config("gpt-4o-mini")
assert base_llm_config.model == "gpt-4o-mini"
# Configure compaction to use a different summarizer model
summarizer_handle = "openai/gpt-5-mini"
summarizer_model_settings = OpenAIModelSettings(
max_output_tokens=1234,
temperature=0.1,
reasoning=OpenAIReasoning(reasoning_effort="high"),
response_format=None,
)
summarizer_config = CompactionSettings(
model=summarizer_handle,
model_settings=summarizer_model_settings,
prompt="You are a summarizer.",
prompt_acknowledgement="ack",
clip_chars=2000,
mode="all",
sliding_window_percentage=0.3,
)
# Minimal message buffer: system + one user + one assistant
messages = [
PydanticMessage(
role=MessageRole.system,
content=[TextContent(type="text", text="You are a helpful assistant.")],
),
PydanticMessage(
role=MessageRole.user,
content=[TextContent(type="text", text="Hello")],
),
PydanticMessage(
role=MessageRole.assistant,
content=[TextContent(type="text", text="Hi there")],
),
]
# Build a minimal AgentState for LettaAgentV3 using the base llm_config
agent_state = PydanticAgentState(
id="agent-test-compaction-llm-config",
name="test-agent",
system="You are a helpful assistant.",
agent_type=AgentType.letta_v1_agent,
llm_config=base_llm_config,
embedding_config=EmbeddingConfig.default_config(provider="openai"),
model=None,
embedding=None,
model_settings=None,
compaction_settings=summarizer_config,
response_format=None,
description=None,
metadata=None,
memory=Memory(blocks=[]),
blocks=[],
tools=[],
sources=[],
tags=[],
tool_exec_environment_variables=[],
secrets=[],
project_id=None,
template_id=None,
base_template_id=None,
deployment_id=None,
entity_id=None,
identity_ids=[],
identities=[],
message_ids=[],
message_buffer_autoclear=False,
enable_sleeptime=None,
multi_agent_group=None,
managed_group=None,
last_run_completion=None,
last_run_duration_ms=None,
last_stop_reason=None,
timezone="UTC",
max_files_open=None,
per_file_view_window_char_limit=None,
hidden=None,
created_by_id=None,
last_updated_by_id=None,
created_at=None,
updated_at=None,
tool_rules=None,
)
# Use the static helper on LettaAgentV3 to derive summarizer llm_config
summarizer_llm_config = LettaAgentV3._build_summarizer_llm_config(
agent_llm_config=agent_state.llm_config,
summarizer_config=agent_state.compaction_settings,
)
# Agent model remains the base model
assert agent_state.llm_config.model == "gpt-4o-mini"
# Summarizer config should use the handle/model from compaction_settings
assert summarizer_llm_config.handle == summarizer_handle
assert summarizer_llm_config.model == "gpt-5-mini"
# And should reflect overrides from model_settings
assert summarizer_llm_config.max_tokens == 1234
assert summarizer_llm_config.temperature == 0.1
@pytest.mark.asyncio
async def test_calculate_multi_agent_tools(set_letta_environment):
"""Test that calculate_multi_agent_tools excludes local-only tools in production."""