feat: add summarization retry to new agent loop (#4477)
This commit is contained in:
@@ -10,6 +10,7 @@ from letta.adapters.letta_llm_adapter import LettaLLMAdapter
|
||||
from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
|
||||
from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
|
||||
from letta.agents.base_agent_v2 import BaseAgentV2
|
||||
from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
|
||||
from letta.agents.helpers import (
|
||||
_build_rule_violation_result,
|
||||
_pop_heartbeat,
|
||||
@@ -18,6 +19,7 @@ from letta.agents.helpers import (
|
||||
generate_step_id,
|
||||
)
|
||||
from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
|
||||
from letta.errors import ContextWindowExceededError
|
||||
from letta.helpers import ToolRulesSolver
|
||||
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
|
||||
from letta.helpers.reasoning_helper import scrub_inner_thoughts_from_messages
|
||||
@@ -52,7 +54,7 @@ from letta.services.step_manager import StepManager
|
||||
from letta.services.summarizer.summarizer import Summarizer
|
||||
from letta.services.telemetry_manager import TelemetryManager
|
||||
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
|
||||
from letta.settings import settings, summarizer_settings
|
||||
from letta.settings import model_settings, settings, summarizer_settings
|
||||
from letta.system import package_function_response
|
||||
from letta.types import JsonDict
|
||||
from letta.utils import log_telemetry, united_diff, validate_function_response
|
||||
@@ -93,10 +95,21 @@ class LettaAgentV2(BaseAgentV2):
|
||||
self.step_manager = StepManager()
|
||||
self.telemetry_manager = TelemetryManager()
|
||||
|
||||
# TODO: Expand to more
|
||||
if summarizer_settings.enable_summarization and model_settings.openai_api_key:
|
||||
self.summarization_agent = EphemeralSummaryAgent(
|
||||
target_block_label="conversation_summary",
|
||||
agent_id=self.agent_state.id,
|
||||
block_manager=self.block_manager,
|
||||
message_manager=self.message_manager,
|
||||
agent_manager=self.agent_manager,
|
||||
actor=self.actor,
|
||||
)
|
||||
|
||||
# Initialize summarizer for context window management
|
||||
self.summarizer = Summarizer(
|
||||
mode=summarizer_settings.mode,
|
||||
summarizer_agent=None, # TODO: Add summary agent if needed
|
||||
summarizer_agent=self.summarization_agent,
|
||||
message_buffer_limit=summarizer_settings.message_buffer_limit,
|
||||
message_buffer_min=summarizer_settings.message_buffer_min,
|
||||
partial_evict_summarizer_percentage=summarizer_settings.partial_evict_summarizer_percentage,
|
||||
@@ -366,35 +379,46 @@ class LettaAgentV2(BaseAgentV2):
|
||||
|
||||
messages = await self._refresh_messages(messages)
|
||||
force_tool_call = valid_tools[0]["name"] if len(valid_tools) == 1 else None
|
||||
request_data = self.llm_client.build_request_data(
|
||||
messages=messages,
|
||||
llm_config=self.agent_state.llm_config,
|
||||
tools=valid_tools,
|
||||
force_tool_call=force_tool_call,
|
||||
)
|
||||
if dry_run:
|
||||
yield request_data
|
||||
return
|
||||
for llm_request_attempt in summarizer_settings.max_summarizer_retries:
|
||||
try:
|
||||
request_data = self.llm_client.build_request_data(
|
||||
messages=messages,
|
||||
llm_config=self.agent_state.llm_config,
|
||||
tools=valid_tools,
|
||||
force_tool_call=force_tool_call,
|
||||
)
|
||||
if dry_run:
|
||||
yield request_data
|
||||
return
|
||||
|
||||
step_progression, step_metrics = self._step_checkpoint_llm_request_start(step_metrics, agent_step_span)
|
||||
step_progression, step_metrics = self._step_checkpoint_llm_request_start(step_metrics, agent_step_span)
|
||||
|
||||
try:
|
||||
invocation = llm_adapter.invoke_llm(
|
||||
request_data=request_data,
|
||||
messages=messages,
|
||||
tools=valid_tools,
|
||||
use_assistant_message=use_assistant_message,
|
||||
step_id=step_id,
|
||||
actor=self.actor,
|
||||
)
|
||||
async for chunk in invocation:
|
||||
if llm_adapter.supports_token_streaming():
|
||||
if include_return_message_types is None or chunk.message_type in include_return_message_types:
|
||||
first_chunk = True
|
||||
yield chunk
|
||||
except ValueError:
|
||||
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
|
||||
raise
|
||||
invocation = llm_adapter.invoke_llm(
|
||||
request_data=request_data,
|
||||
messages=messages,
|
||||
tools=valid_tools,
|
||||
use_assistant_message=use_assistant_message,
|
||||
step_id=step_id,
|
||||
actor=self.actor,
|
||||
)
|
||||
async for chunk in invocation:
|
||||
if llm_adapter.supports_token_streaming():
|
||||
if include_return_message_types is None or chunk.message_type in include_return_message_types:
|
||||
first_chunk = True
|
||||
yield chunk
|
||||
except ValueError as e:
|
||||
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
|
||||
raise e
|
||||
except Exception as e:
|
||||
if isinstance(e, ContextWindowExceededError) and llm_request_attempt < summarizer_settings.max_summarizer_retries:
|
||||
messages = await self._rebuild_context_window(
|
||||
in_context_messages=messages,
|
||||
new_letta_messages=self.response_messages,
|
||||
llm_config=self.agent_state.llm_config,
|
||||
force=True,
|
||||
)
|
||||
else:
|
||||
raise e
|
||||
|
||||
step_progression, step_metrics = self._step_checkpoint_llm_request_finish(
|
||||
step_metrics, agent_step_span, llm_adapter.llm_request_finish_timestamp_ns
|
||||
|
||||
Reference in New Issue
Block a user