fix: use model instead of model_settings (#6834)
This commit is contained in:
committed by
Caren Thomas
parent
a721a00899
commit
a731e01e88
@@ -20645,7 +20645,7 @@
|
||||
"compaction_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionSettings"
|
||||
"$ref": "#/components/schemas/CompactionSettings-Output"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
@@ -24304,11 +24304,70 @@
|
||||
"required": ["code"],
|
||||
"title": "CodeInput"
|
||||
},
|
||||
"CompactionSettings": {
|
||||
"CompactionSettings-Input": {
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"title": "Model",
|
||||
"description": "Model handle to use for summarization (format: provider/model-name)."
|
||||
},
|
||||
"model_settings": {
|
||||
"$ref": "#/components/schemas/ModelSettings",
|
||||
"description": "The model settings to use for summarization."
|
||||
"anyOf": [
|
||||
{
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/AnthropicModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GoogleAIModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GoogleVertexModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/AzureModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/XAIModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GroqModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/DeepseekModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/TogetherModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/BedrockModelSettings"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "provider_type",
|
||||
"mapping": {
|
||||
"anthropic": "#/components/schemas/AnthropicModelSettings",
|
||||
"azure": "#/components/schemas/AzureModelSettings",
|
||||
"bedrock": "#/components/schemas/BedrockModelSettings",
|
||||
"deepseek": "#/components/schemas/DeepseekModelSettings",
|
||||
"google_ai": "#/components/schemas/GoogleAIModelSettings",
|
||||
"google_vertex": "#/components/schemas/GoogleVertexModelSettings",
|
||||
"groq": "#/components/schemas/GroqModelSettings",
|
||||
"openai": "#/components/schemas/OpenAIModelSettings",
|
||||
"together": "#/components/schemas/TogetherModelSettings",
|
||||
"xai": "#/components/schemas/XAIModelSettings"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Model Settings",
|
||||
"description": "Optional model settings used to override defaults for the summarizer model."
|
||||
},
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
@@ -24348,8 +24407,116 @@
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": ["model_settings", "prompt", "prompt_acknowledgement"],
|
||||
"title": "CompactionSettings"
|
||||
"required": ["model", "prompt", "prompt_acknowledgement"],
|
||||
"title": "CompactionSettings",
|
||||
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
|
||||
},
|
||||
"CompactionSettings-Output": {
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"title": "Model",
|
||||
"description": "Model handle to use for summarization (format: provider/model-name)."
|
||||
},
|
||||
"model_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/AnthropicModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GoogleAIModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GoogleVertexModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/AzureModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/XAIModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GroqModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/DeepseekModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/TogetherModelSettings"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/BedrockModelSettings"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "provider_type",
|
||||
"mapping": {
|
||||
"anthropic": "#/components/schemas/AnthropicModelSettings",
|
||||
"azure": "#/components/schemas/AzureModelSettings",
|
||||
"bedrock": "#/components/schemas/BedrockModelSettings",
|
||||
"deepseek": "#/components/schemas/DeepseekModelSettings",
|
||||
"google_ai": "#/components/schemas/GoogleAIModelSettings",
|
||||
"google_vertex": "#/components/schemas/GoogleVertexModelSettings",
|
||||
"groq": "#/components/schemas/GroqModelSettings",
|
||||
"openai": "#/components/schemas/OpenAIModelSettings",
|
||||
"together": "#/components/schemas/TogetherModelSettings",
|
||||
"xai": "#/components/schemas/XAIModelSettings"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Model Settings",
|
||||
"description": "Optional model settings used to override defaults for the summarizer model."
|
||||
},
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"title": "Prompt",
|
||||
"description": "The prompt to use for summarization."
|
||||
},
|
||||
"prompt_acknowledgement": {
|
||||
"type": "string",
|
||||
"title": "Prompt Acknowledgement",
|
||||
"description": "Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
|
||||
},
|
||||
"clip_chars": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"title": "Clip Chars",
|
||||
"description": "The maximum length of the summary in characters. If none, no clipping is performed.",
|
||||
"default": 2000
|
||||
},
|
||||
"mode": {
|
||||
"type": "string",
|
||||
"enum": ["all", "sliding_window"],
|
||||
"title": "Mode",
|
||||
"description": "The type of summarization technique use.",
|
||||
"default": "sliding_window"
|
||||
},
|
||||
"sliding_window_percentage": {
|
||||
"type": "number",
|
||||
"title": "Sliding Window Percentage",
|
||||
"description": "The percentage of the context window to keep post-summarization (only used in sliding window mode).",
|
||||
"default": 0.3
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": ["model", "prompt", "prompt_acknowledgement"],
|
||||
"title": "CompactionSettings",
|
||||
"description": "Configuration for conversation compaction / summarization.\n\n``model`` is the only required user-facing field – it specifies the summarizer\nmodel handle (e.g. ``\"openai/gpt-4o-mini\"``). Per-model settings (temperature,\nmax tokens, etc.) are derived from the default configuration for that handle."
|
||||
},
|
||||
"ComparisonOperator": {
|
||||
"type": "string",
|
||||
@@ -25153,7 +25320,7 @@
|
||||
"compaction_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionSettings"
|
||||
"$ref": "#/components/schemas/CompactionSettings-Input"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
@@ -29386,7 +29553,7 @@
|
||||
"compaction_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionSettings"
|
||||
"$ref": "#/components/schemas/CompactionSettings-Input"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
@@ -32764,25 +32931,6 @@
|
||||
],
|
||||
"title": "Model"
|
||||
},
|
||||
"ModelSettings": {
|
||||
"properties": {
|
||||
"max_output_tokens": {
|
||||
"type": "integer",
|
||||
"title": "Max Output Tokens",
|
||||
"description": "The maximum number of tokens the model can generate.",
|
||||
"default": 4096
|
||||
},
|
||||
"parallel_tool_calls": {
|
||||
"type": "boolean",
|
||||
"title": "Parallel Tool Calls",
|
||||
"description": "Whether to enable parallel tool calling.",
|
||||
"default": false
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"title": "ModelSettings",
|
||||
"description": "Schema for defining settings for a model"
|
||||
},
|
||||
"ModifyApprovalRequest": {
|
||||
"properties": {
|
||||
"requires_approval": {
|
||||
@@ -38722,7 +38870,7 @@
|
||||
"compaction_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionSettings"
|
||||
"$ref": "#/components/schemas/CompactionSettings-Input"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
@@ -39995,7 +40143,7 @@
|
||||
"compaction_settings": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/CompactionSettings"
|
||||
"$ref": "#/components/schemas/CompactionSettings-Input"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
|
||||
@@ -1331,19 +1331,39 @@ class LettaAgentV3(LettaAgentV2):
|
||||
|
||||
@trace_method
|
||||
async def compact(self, messages, trigger_threshold: Optional[int] = None) -> Message:
|
||||
"""Compact the current in-context messages for this agent.
|
||||
|
||||
Compaction uses a summarizer LLM configuration derived from
|
||||
``compaction_settings.model`` when provided. This mirrors how agent
|
||||
creation derives defaults from provider-specific ModelSettings, but is
|
||||
localized to summarization.
|
||||
"""
|
||||
Simplified compaction method. Does NOT do any persistence (handled in the loop)
|
||||
"""
|
||||
# compact the current in-context messages (self.in_context_messages)
|
||||
# Use agent's compaction_settings if set, otherwise fall back to defaults
|
||||
summarizer_config = self.agent_state.compaction_settings or get_default_compaction_settings(
|
||||
self.agent_state.llm_config._to_model_settings()
|
||||
|
||||
# Use agent's compaction_settings if set, otherwise fall back to
|
||||
# global defaults based on the agent's model handle.
|
||||
if self.agent_state.compaction_settings is not None:
|
||||
summarizer_config = self.agent_state.compaction_settings
|
||||
else:
|
||||
# Prefer the new handle field if set, otherwise derive from llm_config
|
||||
if self.agent_state.model is not None:
|
||||
handle = self.agent_state.model
|
||||
else:
|
||||
llm_cfg = self.agent_state.llm_config
|
||||
handle = llm_cfg.handle or f"{llm_cfg.model_endpoint_type}/{llm_cfg.model}"
|
||||
|
||||
summarizer_config = get_default_compaction_settings(handle)
|
||||
|
||||
# Build the LLMConfig used for summarization
|
||||
summarizer_llm_config = self._build_summarizer_llm_config(
|
||||
agent_llm_config=self.agent_state.llm_config,
|
||||
summarizer_config=summarizer_config,
|
||||
)
|
||||
|
||||
summarization_mode_used = summarizer_config.mode
|
||||
if summarizer_config.mode == "all":
|
||||
summary, compacted_messages = await summarize_all(
|
||||
actor=self.actor,
|
||||
llm_config=self.agent_state.llm_config,
|
||||
llm_config=summarizer_llm_config,
|
||||
summarizer_config=summarizer_config,
|
||||
in_context_messages=messages,
|
||||
)
|
||||
@@ -1351,7 +1371,7 @@ class LettaAgentV3(LettaAgentV2):
|
||||
try:
|
||||
summary, compacted_messages = await summarize_via_sliding_window(
|
||||
actor=self.actor,
|
||||
llm_config=self.agent_state.llm_config,
|
||||
llm_config=summarizer_llm_config,
|
||||
summarizer_config=summarizer_config,
|
||||
in_context_messages=messages,
|
||||
)
|
||||
@@ -1359,7 +1379,7 @@ class LettaAgentV3(LettaAgentV2):
|
||||
self.logger.error(f"Sliding window summarization failed with exception: {str(e)}. Falling back to all mode.")
|
||||
summary, compacted_messages = await summarize_all(
|
||||
actor=self.actor,
|
||||
llm_config=self.agent_state.llm_config,
|
||||
llm_config=summarizer_llm_config,
|
||||
summarizer_config=summarizer_config,
|
||||
in_context_messages=messages,
|
||||
)
|
||||
@@ -1445,3 +1465,46 @@ class LettaAgentV3(LettaAgentV2):
|
||||
final_messages += compacted_messages[1:]
|
||||
|
||||
return summary_message_obj, final_messages
|
||||
|
||||
@staticmethod
|
||||
def _build_summarizer_llm_config(
|
||||
agent_llm_config: LLMConfig,
|
||||
summarizer_config: CompactionSettings,
|
||||
) -> LLMConfig:
|
||||
"""Derive an LLMConfig for summarization from a model handle.
|
||||
|
||||
This mirrors the agent-creation path: start from the agent's LLMConfig,
|
||||
override provider/model/handle from ``compaction_settings.model``, and
|
||||
then apply any explicit ``compaction_settings.model_settings`` via
|
||||
``_to_legacy_config_params``.
|
||||
"""
|
||||
|
||||
# If no summarizer model handle is provided, fall back to the agent's config
|
||||
if not summarizer_config.model:
|
||||
return agent_llm_config
|
||||
|
||||
try:
|
||||
# Parse provider/model from the handle, falling back to the agent's
|
||||
# provider type when only a model name is given.
|
||||
if "/" in summarizer_config.model:
|
||||
provider, model_name = summarizer_config.model.split("/", 1)
|
||||
else:
|
||||
provider = agent_llm_config.model_endpoint_type
|
||||
model_name = summarizer_config.model
|
||||
|
||||
# Start from the agent's config and override model + provider + handle
|
||||
base = agent_llm_config.model_copy()
|
||||
base.model_endpoint_type = provider
|
||||
base.model = model_name
|
||||
base.handle = summarizer_config.model
|
||||
|
||||
# If explicit model_settings are provided for the summarizer, apply
|
||||
# them just like server.create_agent_async does for agents.
|
||||
if summarizer_config.model_settings is not None:
|
||||
update_params = summarizer_config.model_settings._to_legacy_config_params()
|
||||
return base.model_copy(update=update_params)
|
||||
|
||||
return base
|
||||
except Exception:
|
||||
# On any error, do not break the agent – just fall back
|
||||
return agent_llm_config
|
||||
|
||||
@@ -2,13 +2,30 @@ from typing import Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from letta.schemas.llm_config import LLMConfig
|
||||
from letta.schemas.model import ModelSettings
|
||||
from letta.schemas.model import ModelSettingsUnion
|
||||
|
||||
|
||||
class CompactionSettings(BaseModel):
|
||||
# summarizer_model: LLMConfig = Field(default=..., description="The model to use for summarization.")
|
||||
model_settings: ModelSettings = Field(default=..., description="The model settings to use for summarization.")
|
||||
"""Configuration for conversation compaction / summarization.
|
||||
|
||||
``model`` is the only required user-facing field – it specifies the summarizer
|
||||
model handle (e.g. ``"openai/gpt-4o-mini"``). Per-model settings (temperature,
|
||||
max tokens, etc.) are derived from the default configuration for that handle.
|
||||
"""
|
||||
|
||||
# Summarizer model handle (provider/model-name).
|
||||
# This is required whenever compaction_settings is provided.
|
||||
model: str = Field(
|
||||
...,
|
||||
description="Model handle to use for summarization (format: provider/model-name).",
|
||||
)
|
||||
|
||||
# Optional provider-specific model settings for the summarizer model
|
||||
model_settings: ModelSettingsUnion | None = Field(
|
||||
default=None,
|
||||
description="Optional model settings used to override defaults for the summarizer model.",
|
||||
)
|
||||
|
||||
prompt: str = Field(default=..., description="The prompt to use for summarization.")
|
||||
prompt_acknowledgement: str = Field(
|
||||
default=..., description="Whether to include an acknowledgement post-prompt (helps prevent non-summary outputs)."
|
||||
@@ -23,22 +40,25 @@ class CompactionSettings(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
def get_default_compaction_settings(model_settings: ModelSettings) -> CompactionSettings:
|
||||
"""Build a default CompactionSettings from global settings for backward compatibility.
|
||||
def get_default_compaction_settings(model_handle: str) -> CompactionSettings:
|
||||
"""Build a default :class:`CompactionSettings` from a model handle.
|
||||
|
||||
Args:
|
||||
llm_config: The LLMConfig to use for the summarizer model (typically the agent's llm_config).
|
||||
model_handle: The model handle to use for summarization
|
||||
(format: provider/model-name).
|
||||
|
||||
Returns:
|
||||
A CompactionSettings with default values from global settings.
|
||||
A :class:`CompactionSettings` populated with sane defaults.
|
||||
"""
|
||||
|
||||
from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
|
||||
from letta.prompts import gpt_summarize
|
||||
from letta.settings import summarizer_settings
|
||||
|
||||
return CompactionSettings(
|
||||
mode="sliding_window",
|
||||
model_settings=model_settings,
|
||||
model=model_handle,
|
||||
model_settings=None,
|
||||
prompt=gpt_summarize.SYSTEM,
|
||||
prompt_acknowledgement=MESSAGE_SUMMARY_REQUEST_ACK,
|
||||
clip_chars=2000,
|
||||
|
||||
@@ -675,11 +675,11 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon
|
||||
new_letta_messages = await server.message_manager.create_many_messages_async(new_letta_messages, actor=actor)
|
||||
|
||||
# Create a custom CompactionSettings with the desired mode
|
||||
def mock_get_default_compaction_settings(model_settings):
|
||||
config = get_default_compaction_settings(model_settings)
|
||||
def mock_get_default_compaction_settings(llm_config_inner):
|
||||
config = get_default_compaction_settings(llm_config_inner)
|
||||
# Override the mode
|
||||
return CompactionSettings(
|
||||
model_settings=config.model_settings,
|
||||
model=config.model,
|
||||
prompt=config.prompt,
|
||||
prompt_acknowledgement=config.prompt_acknowledgement,
|
||||
clip_chars=config.clip_chars,
|
||||
@@ -724,6 +724,86 @@ async def test_summarize_with_mode(server: SyncServer, actor, llm_config: LLMCon
|
||||
assert result[1].role == MessageRole.user
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_v3_compact_uses_compaction_settings_model_and_model_settings(server: SyncServer, actor):
|
||||
"""Integration test: LettaAgentV3.compact uses the LLMConfig implied by CompactionSettings.
|
||||
|
||||
We set a different summarizer model handle + model_settings and verify that
|
||||
the LLMConfig passed into simple_summary reflects both the handle and
|
||||
the model_settings overrides.
|
||||
"""
|
||||
|
||||
from letta.agents.letta_agent_v3 import LettaAgentV3
|
||||
from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning
|
||||
from letta.services.summarizer import summarizer_all
|
||||
|
||||
base_llm_config = LLMConfig.default_config("gpt-4o-mini")
|
||||
|
||||
messages = [
|
||||
PydanticMessage(
|
||||
role=MessageRole.system,
|
||||
content=[TextContent(type="text", text="You are a helpful assistant.")],
|
||||
),
|
||||
PydanticMessage(
|
||||
role=MessageRole.user,
|
||||
content=[TextContent(type="text", text="Hello")],
|
||||
),
|
||||
PydanticMessage(
|
||||
role=MessageRole.assistant,
|
||||
content=[TextContent(type="text", text="Hi there")],
|
||||
),
|
||||
]
|
||||
|
||||
# Create agent + messages via helper to get a real AgentState
|
||||
agent_state, in_context_messages = await create_agent_with_messages(
|
||||
server=server,
|
||||
actor=actor,
|
||||
llm_config=base_llm_config,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
summarizer_handle = "openai/gpt-5-mini"
|
||||
summarizer_model_settings = OpenAIModelSettings(
|
||||
max_output_tokens=4321,
|
||||
temperature=0.05,
|
||||
reasoning=OpenAIReasoning(reasoning_effort="high"),
|
||||
response_format=None,
|
||||
)
|
||||
agent_state.compaction_settings = CompactionSettings(
|
||||
model=summarizer_handle,
|
||||
model_settings=summarizer_model_settings,
|
||||
prompt="You are a summarizer.",
|
||||
prompt_acknowledgement="ack",
|
||||
clip_chars=2000,
|
||||
mode="all",
|
||||
sliding_window_percentage=0.3,
|
||||
)
|
||||
|
||||
captured_llm_config: dict = {}
|
||||
|
||||
async def fake_simple_summary(messages, llm_config, actor, include_ack=True, prompt=None): # type: ignore[override]
|
||||
captured_llm_config["value"] = llm_config
|
||||
return "summary text"
|
||||
|
||||
# Patch simple_summary so we don't hit the real LLM and can inspect llm_config
|
||||
with patch.object(summarizer_all, "simple_summary", new=fake_simple_summary):
|
||||
agent_loop = LettaAgentV3(agent_state=agent_state, actor=actor)
|
||||
summary_msg, compacted = await agent_loop.compact(messages=in_context_messages)
|
||||
|
||||
assert summary_msg is not None
|
||||
assert "value" in captured_llm_config
|
||||
summarizer_llm_config = captured_llm_config["value"]
|
||||
|
||||
# Agent's llm_config remains the base config
|
||||
assert agent_state.llm_config.model == "gpt-4o-mini"
|
||||
|
||||
# Summarizer llm_config should reflect compaction_settings.model and model_settings
|
||||
assert summarizer_llm_config.handle == summarizer_handle
|
||||
assert summarizer_llm_config.model == "gpt-5-mini"
|
||||
assert summarizer_llm_config.max_tokens == 4321
|
||||
assert summarizer_llm_config.temperature == 0.05
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("llm_config", TESTED_LLM_CONFIGS, ids=[c.model for c in TESTED_LLM_CONFIGS])
|
||||
async def test_v3_summarize_hard_eviction_when_still_over_threshold(
|
||||
@@ -847,14 +927,13 @@ async def test_sliding_window_cutoff_index_does_not_exceed_message_count(server:
|
||||
This test uses the real token counter (via create_token_counter) to verify
|
||||
the sliding window logic works with actual token counting.
|
||||
"""
|
||||
from letta.schemas.model import ModelSettings
|
||||
from letta.services.summarizer.summarizer_config import get_default_compaction_settings
|
||||
from letta.services.summarizer.summarizer_sliding_window import summarize_via_sliding_window
|
||||
|
||||
# Create a real summarizer config using the default factory
|
||||
# Override sliding_window_percentage to 0.3 for this test
|
||||
model_settings = ModelSettings() # Use defaults
|
||||
summarizer_config = get_default_compaction_settings(model_settings)
|
||||
handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
|
||||
summarizer_config = get_default_compaction_settings(handle)
|
||||
summarizer_config.sliding_window_percentage = 0.3
|
||||
|
||||
# Create 65 messages (similar to the failing case in the bug report)
|
||||
@@ -1399,13 +1478,12 @@ async def test_summarize_all(server: SyncServer, actor, llm_config: LLMConfig):
|
||||
This test verifies that the 'all' summarization mode works correctly,
|
||||
summarizing the entire conversation into a single summary string.
|
||||
"""
|
||||
from letta.schemas.model import ModelSettings
|
||||
from letta.services.summarizer.summarizer_all import summarize_all
|
||||
from letta.services.summarizer.summarizer_config import get_default_compaction_settings
|
||||
|
||||
# Create a summarizer config with "all" mode
|
||||
model_settings = ModelSettings()
|
||||
summarizer_config = get_default_compaction_settings(model_settings)
|
||||
handle = llm_config.handle or f"{llm_config.model_endpoint_type}/{llm_config.model}"
|
||||
summarizer_config = get_default_compaction_settings(handle)
|
||||
summarizer_config.mode = "all"
|
||||
|
||||
# Create test messages - a simple conversation
|
||||
|
||||
@@ -253,6 +253,177 @@ async def test_create_agent_base_tool_rules_non_excluded_providers(server: SyncS
|
||||
assert len(created_agent.tool_rules) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_agent_with_model_handle_uses_correct_llm_config(server: SyncServer, default_user):
|
||||
"""When CreateAgent.model is provided, ensure the correct handle is used to resolve llm_config.
|
||||
|
||||
This verifies that the model handle passed by the client is forwarded into
|
||||
SyncServer.get_cached_llm_config_async and that the resulting AgentState
|
||||
carries an llm_config with the same handle.
|
||||
"""
|
||||
|
||||
# Track the arguments used to resolve the LLM config
|
||||
captured_kwargs: dict = {}
|
||||
|
||||
async def fake_get_cached_llm_config_async(self, actor, **kwargs): # type: ignore[override]
|
||||
from letta.schemas.llm_config import LLMConfig as PydanticLLMConfig
|
||||
|
||||
captured_kwargs.update(kwargs)
|
||||
handle = kwargs["handle"]
|
||||
|
||||
# Return a minimal but valid LLMConfig with the requested handle
|
||||
return PydanticLLMConfig(
|
||||
model="test-model-name",
|
||||
model_endpoint_type="openai",
|
||||
model_endpoint="https://api.openai.com/v1",
|
||||
context_window=8192,
|
||||
handle=handle,
|
||||
)
|
||||
|
||||
model_handle = "openai/gpt-4o-mini"
|
||||
|
||||
# Patch SyncServer.get_cached_llm_config_async so we don't depend on provider DB state
|
||||
with patch.object(SyncServer, "get_cached_llm_config_async", new=fake_get_cached_llm_config_async):
|
||||
created_agent = await server.create_agent_async(
|
||||
request=CreateAgent(
|
||||
name="agent_with_model_handle",
|
||||
agent_type="memgpt_v2_agent",
|
||||
# Use new model handle field instead of llm_config
|
||||
model=model_handle,
|
||||
embedding_config=EmbeddingConfig.default_config(provider="openai"),
|
||||
memory_blocks=[],
|
||||
include_base_tools=False,
|
||||
),
|
||||
actor=default_user,
|
||||
)
|
||||
|
||||
# Ensure we resolved the config using the provided handle
|
||||
assert captured_kwargs["handle"] == model_handle
|
||||
|
||||
# And that the resulting agent's llm_config reflects the same handle
|
||||
assert created_agent.llm_config is not None
|
||||
assert created_agent.llm_config.handle == model_handle
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compaction_settings_model_uses_separate_llm_config_for_summarization(default_user):
|
||||
"""When compaction_settings.model differs from the agent model, use a separate llm_config.
|
||||
|
||||
This test exercises the summarization helpers directly to avoid external
|
||||
provider dependencies. It verifies that CompactionSettings.model controls
|
||||
the LLMConfig used for the summarizer request.
|
||||
"""
|
||||
|
||||
from letta.agents.letta_agent_v3 import LettaAgentV3
|
||||
from letta.schemas.agent import AgentState as PydanticAgentState
|
||||
from letta.schemas.enums import AgentType, MessageRole
|
||||
from letta.schemas.memory import Memory
|
||||
from letta.schemas.message import Message as PydanticMessage
|
||||
from letta.schemas.model import OpenAIModelSettings, OpenAIReasoning
|
||||
|
||||
# Base agent LLM config
|
||||
base_llm_config = LLMConfig.default_config("gpt-4o-mini")
|
||||
assert base_llm_config.model == "gpt-4o-mini"
|
||||
|
||||
# Configure compaction to use a different summarizer model
|
||||
summarizer_handle = "openai/gpt-5-mini"
|
||||
summarizer_model_settings = OpenAIModelSettings(
|
||||
max_output_tokens=1234,
|
||||
temperature=0.1,
|
||||
reasoning=OpenAIReasoning(reasoning_effort="high"),
|
||||
response_format=None,
|
||||
)
|
||||
summarizer_config = CompactionSettings(
|
||||
model=summarizer_handle,
|
||||
model_settings=summarizer_model_settings,
|
||||
prompt="You are a summarizer.",
|
||||
prompt_acknowledgement="ack",
|
||||
clip_chars=2000,
|
||||
mode="all",
|
||||
sliding_window_percentage=0.3,
|
||||
)
|
||||
|
||||
# Minimal message buffer: system + one user + one assistant
|
||||
messages = [
|
||||
PydanticMessage(
|
||||
role=MessageRole.system,
|
||||
content=[TextContent(type="text", text="You are a helpful assistant.")],
|
||||
),
|
||||
PydanticMessage(
|
||||
role=MessageRole.user,
|
||||
content=[TextContent(type="text", text="Hello")],
|
||||
),
|
||||
PydanticMessage(
|
||||
role=MessageRole.assistant,
|
||||
content=[TextContent(type="text", text="Hi there")],
|
||||
),
|
||||
]
|
||||
|
||||
# Build a minimal AgentState for LettaAgentV3 using the base llm_config
|
||||
agent_state = PydanticAgentState(
|
||||
id="agent-test-compaction-llm-config",
|
||||
name="test-agent",
|
||||
system="You are a helpful assistant.",
|
||||
agent_type=AgentType.letta_v1_agent,
|
||||
llm_config=base_llm_config,
|
||||
embedding_config=EmbeddingConfig.default_config(provider="openai"),
|
||||
model=None,
|
||||
embedding=None,
|
||||
model_settings=None,
|
||||
compaction_settings=summarizer_config,
|
||||
response_format=None,
|
||||
description=None,
|
||||
metadata=None,
|
||||
memory=Memory(blocks=[]),
|
||||
blocks=[],
|
||||
tools=[],
|
||||
sources=[],
|
||||
tags=[],
|
||||
tool_exec_environment_variables=[],
|
||||
secrets=[],
|
||||
project_id=None,
|
||||
template_id=None,
|
||||
base_template_id=None,
|
||||
deployment_id=None,
|
||||
entity_id=None,
|
||||
identity_ids=[],
|
||||
identities=[],
|
||||
message_ids=[],
|
||||
message_buffer_autoclear=False,
|
||||
enable_sleeptime=None,
|
||||
multi_agent_group=None,
|
||||
managed_group=None,
|
||||
last_run_completion=None,
|
||||
last_run_duration_ms=None,
|
||||
last_stop_reason=None,
|
||||
timezone="UTC",
|
||||
max_files_open=None,
|
||||
per_file_view_window_char_limit=None,
|
||||
hidden=None,
|
||||
created_by_id=None,
|
||||
last_updated_by_id=None,
|
||||
created_at=None,
|
||||
updated_at=None,
|
||||
tool_rules=None,
|
||||
)
|
||||
|
||||
# Use the static helper on LettaAgentV3 to derive summarizer llm_config
|
||||
summarizer_llm_config = LettaAgentV3._build_summarizer_llm_config(
|
||||
agent_llm_config=agent_state.llm_config,
|
||||
summarizer_config=agent_state.compaction_settings,
|
||||
)
|
||||
|
||||
# Agent model remains the base model
|
||||
assert agent_state.llm_config.model == "gpt-4o-mini"
|
||||
|
||||
# Summarizer config should use the handle/model from compaction_settings
|
||||
assert summarizer_llm_config.handle == summarizer_handle
|
||||
assert summarizer_llm_config.model == "gpt-5-mini"
|
||||
# And should reflect overrides from model_settings
|
||||
assert summarizer_llm_config.max_tokens == 1234
|
||||
assert summarizer_llm_config.temperature == 0.1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_calculate_multi_agent_tools(set_letta_environment):
|
||||
"""Test that calculate_multi_agent_tools excludes local-only tools in production."""
|
||||
|
||||
Reference in New Issue
Block a user