diff --git a/letta/agent.py b/letta/agent.py index 978fbda3..3f416e39 100644 --- a/letta/agent.py +++ b/letta/agent.py @@ -13,9 +13,6 @@ from letta.constants import ( LETTA_CORE_TOOL_MODULE_NAME, LETTA_MULTI_AGENT_TOOL_MODULE_NAME, LLM_MAX_TOKENS, - MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST, - MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC, - MESSAGE_SUMMARY_WARNING_FRAC, REQ_HEARTBEAT_MESSAGE, ) from letta.errors import ContextWindowExceededError @@ -23,7 +20,7 @@ from letta.functions.ast_parsers import coerce_dict_args_by_annotations, get_fun from letta.functions.functions import get_function_from_module from letta.helpers import ToolRulesSolver from letta.interface import AgentInterface -from letta.llm_api.helpers import is_context_overflow_error +from letta.llm_api.helpers import calculate_summarizer_cutoff, get_token_counts_for_messages, is_context_overflow_error from letta.llm_api.llm_api_tools import create from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages from letta.log import get_logger @@ -52,6 +49,7 @@ from letta.services.passage_manager import PassageManager from letta.services.provider_manager import ProviderManager from letta.services.step_manager import StepManager from letta.services.tool_execution_sandbox import ToolExecutionSandbox +from letta.settings import summarizer_settings from letta.streaming_interface import StreamingRefreshCLIInterface from letta.system import get_heartbeat, get_token_limit_warning, package_function_response, package_summarize_message, package_user_message from letta.utils import ( @@ -66,6 +64,8 @@ from letta.utils import ( validate_function_response, ) +logger = get_logger(__name__) + class BaseAgent(ABC): """ @@ -635,7 +635,7 @@ class Agent(BaseAgent): self.logger.info(f"Hit max chaining steps, stopping after {counter} steps") break # Chain handlers - elif token_warning: + elif token_warning and summarizer_settings.send_memory_warning_message: assert self.agent_state.created_by_id is not None next_input_message = Message.dict_to_message( agent_id=self.agent_state.id, @@ -686,6 +686,7 @@ class Agent(BaseAgent): stream: bool = False, # TODO move to config? step_count: Optional[int] = None, metadata: Optional[dict] = None, + summarize_attempt_count: int = 0, ) -> AgentStepResponse: """Runs a single step in the agent loop (generates at most one LLM call)""" @@ -753,9 +754,9 @@ class Agent(BaseAgent): LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"] ) - if current_total_tokens > MESSAGE_SUMMARY_WARNING_FRAC * int(self.agent_state.llm_config.context_window): - self.logger.warning( - f"{CLI_WARNING_PREFIX}last response total_tokens ({current_total_tokens}) > {MESSAGE_SUMMARY_WARNING_FRAC * int(self.agent_state.llm_config.context_window)}" + if current_total_tokens > summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window): + printd( + f"{CLI_WARNING_PREFIX}last response total_tokens ({current_total_tokens}) > {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}" ) # Only deliver the alert if we haven't already (this period) @@ -764,8 +765,8 @@ class Agent(BaseAgent): self.agent_alerted_about_memory_pressure = True # it's up to the outer loop to handle this else: - self.logger.warning( - f"last response total_tokens ({current_total_tokens}) < {MESSAGE_SUMMARY_WARNING_FRAC * int(self.agent_state.llm_config.context_window)}" + printd( + f"last response total_tokens ({current_total_tokens}) < {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}" ) # Log step - this must happen before messages are persisted @@ -807,28 +808,46 @@ class Agent(BaseAgent): ) except Exception as e: - self.logger.error(f"step() failed\nmessages = {messages}\nerror = {e}") + logger.error(f"step() failed\nmessages = {messages}\nerror = {e}") # If we got a context alert, try trimming the messages length, then try again if is_context_overflow_error(e): - self.logger.warning( - f"context window exceeded with limit {self.agent_state.llm_config.context_window}, running summarizer to trim messages" - ) - # A separate API call to run a summarizer - self.summarize_messages_inplace() + in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user) - # Try step again - return self.inner_step( - messages=messages, - first_message=first_message, - first_message_retry_limit=first_message_retry_limit, - skip_verify=skip_verify, - stream=stream, - metadata=metadata, - ) + if summarize_attempt_count <= summarizer_settings.max_summarizer_retries: + logger.warning( + f"context window exceeded with limit {self.agent_state.llm_config.context_window}, attempting to summarize ({summarize_attempt_count}/{summarizer_settings.max_summarizer_retries}" + ) + # A separate API call to run a summarizer + self.summarize_messages_inplace() + + # Try step again + return self.inner_step( + messages=messages, + first_message=first_message, + first_message_retry_limit=first_message_retry_limit, + skip_verify=skip_verify, + stream=stream, + metadata=metadata, + summarize_attempt_count=summarize_attempt_count + 1, + ) + else: + err_msg = f"Ran summarizer {summarize_attempt_count - 1} times for agent id={self.agent_state.id}, but messages are still overflowing the context window." + token_counts = (get_token_counts_for_messages(in_context_messages),) + logger.error(err_msg) + logger.error(f"num_in_context_messages: {len(self.agent_state.message_ids)}") + logger.error(f"token_counts: {token_counts}") + raise ContextWindowExceededError( + err_msg, + details={ + "num_in_context_messages": len(self.agent_state.message_ids), + "in_context_messages_text": [m.text for m in in_context_messages], + "token_counts": token_counts, + }, + ) else: - self.logger.error(f"step() failed with an unrecognized exception: '{str(e)}'") + logger.error(f"step() failed with an unrecognized exception: '{str(e)}'") raise e def step_user_message(self, user_message_str: str, **kwargs) -> AgentStepResponse: @@ -865,109 +884,54 @@ class Agent(BaseAgent): return self.inner_step(messages=[user_message], **kwargs) - def summarize_messages_inplace(self, cutoff=None, preserve_last_N_messages=True, disallow_tool_as_first=True): + def summarize_messages_inplace(self): in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user) in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages] + in_context_messages_openai_no_system = in_context_messages_openai[1:] + token_counts = get_token_counts_for_messages(in_context_messages) + logger.info(f"System message token count={token_counts[0]}") + logger.info(f"token_counts_no_system={token_counts[1:]}") if in_context_messages_openai[0]["role"] != "system": raise RuntimeError(f"in_context_messages_openai[0] should be system (instead got {in_context_messages_openai[0]})") - # Start at index 1 (past the system message), - # and collect messages for summarization until we reach the desired truncation token fraction (eg 50%) - # Do not allow truncation of the last N messages, since these are needed for in-context examples of function calling - token_counts = [count_tokens(str(msg)) for msg in in_context_messages_openai] - message_buffer_token_count = sum(token_counts[1:]) # no system message - desired_token_count_to_summarize = int(message_buffer_token_count * MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC) - candidate_messages_to_summarize = in_context_messages_openai[1:] - token_counts = token_counts[1:] - - if preserve_last_N_messages: - candidate_messages_to_summarize = candidate_messages_to_summarize[:-MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST] - token_counts = token_counts[:-MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST] - - printd(f"MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC={MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC}") - printd(f"MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST={MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST}") - printd(f"token_counts={token_counts}") - printd(f"message_buffer_token_count={message_buffer_token_count}") - printd(f"desired_token_count_to_summarize={desired_token_count_to_summarize}") - printd(f"len(candidate_messages_to_summarize)={len(candidate_messages_to_summarize)}") - # If at this point there's nothing to summarize, throw an error - if len(candidate_messages_to_summarize) == 0: + if len(in_context_messages_openai_no_system) == 0: raise ContextWindowExceededError( "Not enough messages to compress for summarization", details={ - "num_candidate_messages": len(candidate_messages_to_summarize), + "num_candidate_messages": len(in_context_messages_openai_no_system), "num_total_messages": len(in_context_messages_openai), - "preserve_N": MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST, }, ) - # Walk down the message buffer (front-to-back) until we hit the target token count - tokens_so_far = 0 - cutoff = 0 - for i, msg in enumerate(candidate_messages_to_summarize): - cutoff = i - tokens_so_far += token_counts[i] - if tokens_so_far > desired_token_count_to_summarize: - break - # Account for system message - cutoff += 1 - - # Try to make an assistant message come after the cutoff - try: - printd(f"Selected cutoff {cutoff} was a 'user', shifting one...") - if in_context_messages_openai[cutoff]["role"] == "user": - new_cutoff = cutoff + 1 - if in_context_messages_openai[new_cutoff]["role"] == "user": - printd(f"Shifted cutoff {new_cutoff} is still a 'user', ignoring...") - cutoff = new_cutoff - except IndexError: - pass - - # Make sure the cutoff isn't on a 'tool' or 'function' - if disallow_tool_as_first: - while in_context_messages_openai[cutoff]["role"] in ["tool", "function"] and cutoff < len(in_context_messages_openai): - printd(f"Selected cutoff {cutoff} was a 'tool', shifting one...") - cutoff += 1 - + cutoff = calculate_summarizer_cutoff(in_context_messages=in_context_messages, token_counts=token_counts, logger=logger) message_sequence_to_summarize = in_context_messages[1:cutoff] # do NOT get rid of the system message - if len(message_sequence_to_summarize) <= 1: - # This prevents a potential infinite loop of summarizing the same message over and over - raise ContextWindowExceededError( - "Not enough messages to compress for summarization after determining cutoff", - details={ - "num_candidate_messages": len(message_sequence_to_summarize), - "num_total_messages": len(in_context_messages_openai), - "preserve_N": MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST, - }, - ) - else: - printd(f"Attempting to summarize {len(message_sequence_to_summarize)} messages [1:{cutoff}] of {len(in_context_messages)}") + logger.info(f"Attempting to summarize {len(message_sequence_to_summarize)} messages of {len(in_context_messages)}") # We can't do summarize logic properly if context_window is undefined if self.agent_state.llm_config.context_window is None: # Fallback if for some reason context_window is missing, just set to the default - print(f"{CLI_WARNING_PREFIX}could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}") - print(f"{self.agent_state}") + logger.warning(f"{CLI_WARNING_PREFIX}could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}") self.agent_state.llm_config.context_window = ( LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"] ) summary = summarize_messages(agent_state=self.agent_state, message_sequence_to_summarize=message_sequence_to_summarize) - printd(f"Got summary: {summary}") + logger.info(f"Got summary: {summary}") # Metadata that's useful for the agent to see all_time_message_count = self.message_manager.size(agent_id=self.agent_state.id, actor=self.user) - remaining_message_count = len(in_context_messages_openai[cutoff:]) + remaining_message_count = 1 + len(in_context_messages) - cutoff # System + remaining hidden_message_count = all_time_message_count - remaining_message_count summary_message_count = len(message_sequence_to_summarize) summary_message = package_summarize_message(summary, summary_message_count, hidden_message_count, all_time_message_count) - printd(f"Packaged into message: {summary_message}") + logger.info(f"Packaged into message: {summary_message}") prior_len = len(in_context_messages_openai) - self.agent_state = self.agent_manager.trim_older_in_context_messages(cutoff, agent_id=self.agent_state.id, actor=self.user) + self.agent_state = self.agent_manager.trim_all_in_context_messages_except_system(agent_id=self.agent_state.id, actor=self.user) packed_summary_message = {"role": "user", "content": summary_message} + # Prepend the summary self.agent_state = self.agent_manager.prepend_to_in_context_messages( messages=[ Message.dict_to_message( @@ -983,8 +947,12 @@ class Agent(BaseAgent): # reset alert self.agent_alerted_about_memory_pressure = False + curr_in_context_messages = self.agent_manager.get_in_context_messages(agent_id=self.agent_state.id, actor=self.user) - printd(f"Ran summarizer, messages length {prior_len} -> {len(in_context_messages_openai)}") + logger.info(f"Ran summarizer, messages length {prior_len} -> {len(curr_in_context_messages)}") + logger.info( + f"Summarizer brought down total token count from {sum(token_counts)} -> {sum(get_token_counts_for_messages(curr_in_context_messages))}" + ) def add_function(self, function_name: str) -> str: # TODO: refactor diff --git a/letta/constants.py b/letta/constants.py index 0b46202a..ee062cda 100644 --- a/letta/constants.py +++ b/letta/constants.py @@ -125,8 +125,6 @@ LLM_MAX_TOKENS = { "gpt-3.5-turbo-16k-0613": 16385, # legacy "gpt-3.5-turbo-0301": 4096, # legacy } -# The amount of tokens before a sytem warning about upcoming truncation is sent to Letta -MESSAGE_SUMMARY_WARNING_FRAC = 0.75 # The error message that Letta will receive # MESSAGE_SUMMARY_WARNING_STR = f"Warning: the conversation history will soon reach its maximum length and be trimmed. Make sure to save any important information from the conversation to your memory before it is removed." # Much longer and more specific variant of the prompt @@ -138,15 +136,10 @@ MESSAGE_SUMMARY_WARNING_STR = " ".join( # "Remember to pass request_heartbeat = true if you would like to send a message immediately after.", ] ) -# The fraction of tokens we truncate down to -MESSAGE_SUMMARY_TRUNC_TOKEN_FRAC = 0.75 + # The ackknowledgement message used in the summarize sequence MESSAGE_SUMMARY_REQUEST_ACK = "Understood, I will respond with a summary of the message (and only the summary, nothing else) once I receive the conversation history. I'm ready." -# Even when summarizing, we want to keep a handful of recent messages -# These serve as in-context examples of how to use functions / what user messages look like -MESSAGE_SUMMARY_TRUNC_KEEP_N_LAST = 3 - # Maximum length of an error message MAX_ERROR_MESSAGE_CHAR_LIMIT = 500 diff --git a/letta/llm_api/helpers.py b/letta/llm_api/helpers.py index 7c99bbcd..cdb178b9 100644 --- a/letta/llm_api/helpers.py +++ b/letta/llm_api/helpers.py @@ -7,8 +7,10 @@ from typing import Any, List, Union import requests from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING +from letta.schemas.message import Message from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice -from letta.utils import json_dumps, printd +from letta.settings import summarizer_settings +from letta.utils import count_tokens, json_dumps, printd def _convert_to_structured_output_helper(property: dict) -> dict: @@ -287,6 +289,54 @@ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) - return rewritten_choice +def calculate_summarizer_cutoff(in_context_messages: List[Message], token_counts: List[int], logger: "logging.Logger") -> int: + if len(in_context_messages) != len(token_counts): + raise ValueError( + f"Given in_context_messages has different length from given token_counts: {len(in_context_messages)} != {len(token_counts)}" + ) + + in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages] + + if summarizer_settings.evict_all_messages: + logger.info("Evicting all messages...") + return len(in_context_messages) + else: + # Start at index 1 (past the system message), + # and collect messages for summarization until we reach the desired truncation token fraction (eg 50%) + # We do the inverse of `desired_memory_token_pressure` to get what we need to remove + desired_token_count_to_summarize = int(sum(token_counts) * (1 - summarizer_settings.desired_memory_token_pressure)) + logger.info(f"desired_token_count_to_summarize={desired_token_count_to_summarize}") + + tokens_so_far = 0 + cutoff = 0 + for i, msg in enumerate(in_context_messages_openai): + # Skip system + if i == 0: + continue + cutoff = i + tokens_so_far += token_counts[i] + + if msg["role"] not in ["user", "tool", "function"] and tokens_so_far >= desired_token_count_to_summarize: + # Break if the role is NOT a user or tool/function and tokens_so_far is enough + break + elif len(in_context_messages) - cutoff - 1 <= summarizer_settings.keep_last_n_messages: + # Also break if we reached the `keep_last_n_messages` threshold + # NOTE: This may be on a user, tool, or function in theory + logger.warning( + f"Breaking summary cutoff early on role={msg['role']} because we hit the `keep_last_n_messages`={summarizer_settings.keep_last_n_messages}" + ) + break + + logger.info(f"Evicting {cutoff}/{len(in_context_messages)} messages...") + return cutoff + 1 + + +def get_token_counts_for_messages(in_context_messages: List[Message]) -> List[int]: + in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages] + token_counts = [count_tokens(str(msg)) for msg in in_context_messages_openai] + return token_counts + + def is_context_overflow_error(exception: Union[requests.exceptions.RequestException, Exception]) -> bool: """Checks if an exception is due to context overflow (based on common OpenAI response messages)""" from letta.utils import printd diff --git a/letta/memory.py b/letta/memory.py index 10799094..b81e5e1d 100644 --- a/letta/memory.py +++ b/letta/memory.py @@ -1,12 +1,13 @@ from typing import Callable, Dict, List -from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK, MESSAGE_SUMMARY_WARNING_FRAC +from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK from letta.llm_api.llm_api_tools import create from letta.prompts.gpt_summarize import SYSTEM as SUMMARY_PROMPT_SYSTEM from letta.schemas.agent import AgentState from letta.schemas.enums import MessageRole from letta.schemas.memory import Memory from letta.schemas.message import Message +from letta.settings import summarizer_settings from letta.utils import count_tokens, printd @@ -49,8 +50,8 @@ def summarize_messages( summary_prompt = SUMMARY_PROMPT_SYSTEM summary_input = _format_summary_history(message_sequence_to_summarize) summary_input_tkns = count_tokens(summary_input) - if summary_input_tkns > MESSAGE_SUMMARY_WARNING_FRAC * context_window: - trunc_ratio = (MESSAGE_SUMMARY_WARNING_FRAC * context_window / summary_input_tkns) * 0.8 # For good measure... + if summary_input_tkns > summarizer_settings.memory_warning_threshold * context_window: + trunc_ratio = (summarizer_settings.memory_warning_threshold * context_window / summary_input_tkns) * 0.8 # For good measure... cutoff = int(len(message_sequence_to_summarize) * trunc_ratio) summary_input = str( [summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff])] @@ -58,10 +59,11 @@ def summarize_messages( ) dummy_agent_id = agent_state.id - message_sequence = [] - message_sequence.append(Message(agent_id=dummy_agent_id, role=MessageRole.system, text=summary_prompt)) - message_sequence.append(Message(agent_id=dummy_agent_id, role=MessageRole.assistant, text=MESSAGE_SUMMARY_REQUEST_ACK)) - message_sequence.append(Message(agent_id=dummy_agent_id, role=MessageRole.user, text=summary_input)) + message_sequence = [ + Message(agent_id=dummy_agent_id, role=MessageRole.system, text=summary_prompt), + Message(agent_id=dummy_agent_id, role=MessageRole.assistant, text=MESSAGE_SUMMARY_REQUEST_ACK), + Message(agent_id=dummy_agent_id, role=MessageRole.user, text=summary_input), + ] # TODO: We need to eventually have a separate LLM config for the summarizer LLM llm_config_no_inner_thoughts = agent_state.llm_config.model_copy(deep=True) diff --git a/letta/schemas/environment_variables.py b/letta/schemas/environment_variables.py index 9f482c1c..bf423e06 100644 --- a/letta/schemas/environment_variables.py +++ b/letta/schemas/environment_variables.py @@ -26,7 +26,7 @@ class EnvironmentVariableUpdateBase(LettaBase): description: Optional[str] = Field(None, description="An optional description of the environment variable.") -# Sandbox-Specific Environment Variable +# Environment Variable class SandboxEnvironmentVariableBase(EnvironmentVariableBase): __id_prefix__ = "sandbox-env" sandbox_config_id: str = Field(..., description="The ID of the sandbox config this environment variable belongs to.") diff --git a/letta/services/agent_manager.py b/letta/services/agent_manager.py index a7dd4507..f288cc62 100644 --- a/letta/services/agent_manager.py +++ b/letta/services/agent_manager.py @@ -464,6 +464,12 @@ class AgentManager: new_messages = [message_ids[0]] + message_ids[num:] # 0 is system message return self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor) + @enforce_types + def trim_all_in_context_messages_except_system(self, agent_id: str, actor: PydanticUser) -> PydanticAgentState: + message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids + new_messages = [message_ids[0]] # 0 is system message + return self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor) + @enforce_types def prepend_to_in_context_messages(self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser) -> PydanticAgentState: message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids diff --git a/letta/settings.py b/letta/settings.py index da3e429f..1c5f5bfe 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -18,6 +18,34 @@ class ToolSettings(BaseSettings): local_sandbox_dir: Optional[str] = None +class SummarizerSettings(BaseSettings): + model_config = SettingsConfigDict(env_prefix="letta_summarizer_", extra="ignore") + + # Controls if we should evict all messages + # TODO: Can refactor this into an enum if we have a bunch of different kinds of summarizers + evict_all_messages: bool = False + + # The maximum number of retries for the summarizer + # If we reach this cutoff, it probably means that the summarizer is not compressing down the in-context messages any further + # And we throw a fatal error + max_summarizer_retries: int = 3 + + # When to warn the model that a summarize command will happen soon + # The amount of tokens before a system warning about upcoming truncation is sent to Letta + memory_warning_threshold: float = 0.75 + + # Whether to send the system memory warning message + send_memory_warning_message: bool = False + + # The desired memory pressure to summarize down to + desired_memory_token_pressure: float = 0.3 + + # The number of messages at the end to keep + # Even when summarizing, we may want to keep a handful of recent messages + # These serve as in-context examples of how to use functions / what user messages look like + keep_last_n_messages: int = 0 + + class ModelSettings(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="ignore") @@ -147,3 +175,4 @@ settings = Settings(_env_parse_none_str="None") test_settings = TestSettings() model_settings = ModelSettings() tool_settings = ToolSettings() +summarizer_settings = SummarizerSettings() diff --git a/letta/system.py b/letta/system.py index d903bf1f..9c795704 100644 --- a/letta/system.py +++ b/letta/system.py @@ -161,10 +161,10 @@ def package_system_message(system_message, message_type="system_alert", time=Non return json.dumps(packaged_message) -def package_summarize_message(summary, summary_length, hidden_message_count, total_message_count, timestamp=None): +def package_summarize_message(summary, summary_message_count, hidden_message_count, total_message_count, timestamp=None): context_message = ( f"Note: prior messages ({hidden_message_count} of {total_message_count} total messages) have been hidden from view due to conversation memory constraints.\n" - + f"The following is a summary of the previous {summary_length} messages:\n {summary}" + + f"The following is a summary of the previous {summary_message_count} messages:\n {summary}" ) formatted_time = get_local_time() if timestamp is None else timestamp diff --git a/poetry.lock b/poetry.lock index 36d49021..2904a3b6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -416,10 +416,6 @@ files = [ {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d"}, {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0"}, {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e"}, - {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5dab0844f2cf82be357a0eb11a9087f70c5430b2c241493fc122bb6f2bb0917c"}, - {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e4fe605b917c70283db7dfe5ada75e04561479075761a0b3866c081d035b01c1"}, - {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1e9a65b5736232e7a7f91ff3d02277f11d339bf34099a56cdab6a8b3410a02b2"}, - {file = "Brotli-1.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:58d4b711689366d4a03ac7957ab8c28890415e267f9b6589969e74b6e42225ec"}, {file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2"}, {file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128"}, {file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc"}, @@ -432,14 +428,8 @@ files = [ {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9"}, {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265"}, {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8"}, - {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c247dd99d39e0338a604f8c2b3bc7061d5c2e9e2ac7ba9cc1be5a69cb6cd832f"}, - {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1b2c248cd517c222d89e74669a4adfa5577e06ab68771a529060cf5a156e9757"}, - {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2a24c50840d89ded6c9a8fdc7b6ed3692ed4e86f1c4a4a938e1e92def92933e0"}, - {file = "Brotli-1.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f31859074d57b4639318523d6ffdca586ace54271a73ad23ad021acd807eb14b"}, {file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50"}, {file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1"}, - {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28"}, - {file = "Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f"}, {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409"}, {file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2"}, {file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451"}, @@ -450,24 +440,8 @@ files = [ {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180"}, {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248"}, {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966"}, - {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9"}, - {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb"}, - {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111"}, - {file = "Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839"}, {file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0"}, {file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951"}, - {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5"}, - {file = "Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8"}, - {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f"}, - {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648"}, - {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0"}, - {file = "Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089"}, - {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368"}, - {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c"}, - {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284"}, - {file = "Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7"}, - {file = "Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0"}, - {file = "Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b"}, {file = "Brotli-1.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a090ca607cbb6a34b0391776f0cb48062081f5f60ddcce5d11838e67a01928d1"}, {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de9d02f5bda03d27ede52e8cfe7b865b066fa49258cbab568720aa5be80a47d"}, {file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2333e30a5e00fe0fe55903c8832e08ee9c3b1382aacf4db26664a16528d51b4b"}, @@ -477,10 +451,6 @@ files = [ {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:fd5f17ff8f14003595ab414e45fce13d073e0762394f957182e69035c9f3d7c2"}, {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:069a121ac97412d1fe506da790b3e69f52254b9df4eb665cd42460c837193354"}, {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e93dfc1a1165e385cc8239fab7c036fb2cd8093728cbd85097b284d7b99249a2"}, - {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_aarch64.whl", hash = "sha256:aea440a510e14e818e67bfc4027880e2fb500c2ccb20ab21c7a7c8b5b4703d75"}, - {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_i686.whl", hash = "sha256:6974f52a02321b36847cd19d1b8e381bf39939c21efd6ee2fc13a28b0d99348c"}, - {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_ppc64le.whl", hash = "sha256:a7e53012d2853a07a4a79c00643832161a910674a893d296c9f1259859a289d2"}, - {file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:d7702622a8b40c49bffb46e1e3ba2e81268d5c04a34f460978c6b5517a34dd52"}, {file = "Brotli-1.1.0-cp36-cp36m-win32.whl", hash = "sha256:a599669fd7c47233438a56936988a2478685e74854088ef5293802123b5b2460"}, {file = "Brotli-1.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d143fd47fad1db3d7c27a1b1d66162e855b5d50a89666af46e1679c496e8e579"}, {file = "Brotli-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11d00ed0a83fa22d29bc6b64ef636c4552ebafcef57154b4ddd132f5638fbd1c"}, @@ -492,10 +462,6 @@ files = [ {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:919e32f147ae93a09fe064d77d5ebf4e35502a8df75c29fb05788528e330fe74"}, {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:23032ae55523cc7bccb4f6a0bf368cd25ad9bcdcc1990b64a647e7bbcce9cb5b"}, {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:224e57f6eac61cc449f498cc5f0e1725ba2071a3d4f48d5d9dffba42db196438"}, - {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cb1dac1770878ade83f2ccdf7d25e494f05c9165f5246b46a621cc849341dc01"}, - {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:3ee8a80d67a4334482d9712b8e83ca6b1d9bc7e351931252ebef5d8f7335a547"}, - {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:5e55da2c8724191e5b557f8e18943b1b4839b8efc3ef60d65985bcf6f587dd38"}, - {file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:d342778ef319e1026af243ed0a07c97acf3bad33b9f29e7ae6a1f68fd083e90c"}, {file = "Brotli-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:587ca6d3cef6e4e868102672d3bd9dc9698c309ba56d41c2b9c85bbb903cdb95"}, {file = "Brotli-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2954c1c23f81c2eaf0b0717d9380bd348578a94161a65b3a2afc62c86467dd68"}, {file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:efa8b278894b14d6da122a72fefcebc28445f2d3f880ac59d46c90f4c13be9a3"}, @@ -508,10 +474,6 @@ files = [ {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ab4fbee0b2d9098c74f3057b2bc055a8bd92ccf02f65944a241b4349229185a"}, {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:141bd4d93984070e097521ed07e2575b46f817d08f9fa42b16b9b5f27b5ac088"}, {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fce1473f3ccc4187f75b4690cfc922628aed4d3dd013d047f95a9b3919a86596"}, - {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d2b35ca2c7f81d173d2fadc2f4f31e88cc5f7a39ae5b6db5513cf3383b0e0ec7"}, - {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:af6fa6817889314555aede9a919612b23739395ce767fe7fcbea9a80bf140fe5"}, - {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:2feb1d960f760a575dbc5ab3b1c00504b24caaf6986e2dc2b01c09c87866a943"}, - {file = "Brotli-1.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4410f84b33374409552ac9b6903507cdb31cd30d2501fc5ca13d18f73548444a"}, {file = "Brotli-1.1.0-cp38-cp38-win32.whl", hash = "sha256:db85ecf4e609a48f4b29055f1e144231b90edc90af7481aa731ba2d059226b1b"}, {file = "Brotli-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3d7954194c36e304e1523f55d7042c59dc53ec20dd4e9ea9d151f1b62b4415c0"}, {file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a"}, @@ -524,10 +486,6 @@ files = [ {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c"}, {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d"}, {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0737ddb3068957cf1b054899b0883830bb1fec522ec76b1098f9b6e0f02d9419"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:4f3607b129417e111e30637af1b56f24f7a49e64763253bbc275c75fa887d4b2"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:6c6e0c425f22c1c719c42670d561ad682f7bfeeef918edea971a79ac5252437f"}, - {file = "Brotli-1.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:494994f807ba0b92092a163a0a283961369a65f6cbe01e8891132b7a320e61eb"}, {file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64"}, {file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467"}, {file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724"}, @@ -2021,7 +1979,7 @@ files = [ name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, @@ -3737,7 +3695,7 @@ type = ["mypy (>=1.11.2)"] name = "pluggy" version = "1.5.0" description = "plugin and hook calling mechanisms for python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, @@ -4417,7 +4375,7 @@ websocket-client = "!=0.49" name = "pytest" version = "8.3.4" description = "pytest: simple powerful testing with Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, @@ -4453,6 +4411,23 @@ pytest = ">=7.0.0,<9" docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] +[[package]] +name = "pytest-mock" +version = "3.14.0" +description = "Thin-wrapper around the mock package for easier use with pytest" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, + {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, +] + +[package.dependencies] +pytest = ">=6.2.5" + +[package.extras] +dev = ["pre-commit", "pytest-asyncio", "tox"] + [[package]] name = "pytest-order" version = "1.3.0" @@ -6318,4 +6293,4 @@ tests = ["wikipedia"] [metadata] lock-version = "2.0" python-versions = "<3.14,>=3.10" -content-hash = "f79e70bc03fff20fcd97a1be2c7421d94458df8ffd92096c487b9dbb81f23164" +content-hash = "2f552617ff233fe8b07bdec4dc1679935df30030046984962b69ebe625717815" diff --git a/pyproject.toml b/pyproject.toml index cc22753b..730edd9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,6 +94,7 @@ bedrock = ["boto3"] black = "^24.4.2" ipykernel = "^6.29.5" ipdb = "^0.13.13" +pytest-mock = "^3.14.0" [tool.black] line-length = 140 diff --git a/tests/integration_test_summarizer.py b/tests/integration_test_summarizer.py index b4de0043..07b0e90a 100644 --- a/tests/integration_test_summarizer.py +++ b/tests/integration_test_summarizer.py @@ -1,6 +1,7 @@ import json import os import uuid +from datetime import datetime from typing import List import pytest @@ -8,9 +9,13 @@ import pytest from letta import create_client from letta.agent import Agent from letta.client.client import LocalClient +from letta.errors import ContextWindowExceededError +from letta.llm_api.helpers import calculate_summarizer_cutoff from letta.schemas.embedding_config import EmbeddingConfig +from letta.schemas.enums import MessageRole from letta.schemas.llm_config import LLMConfig from letta.schemas.message import Message +from letta.settings import summarizer_settings from letta.streaming_interface import StreamingRefreshCLIInterface from tests.helpers.endpoints_helper import EMBEDDING_CONFIG_PATH from tests.helpers.utils import cleanup @@ -44,6 +49,101 @@ def agent_state(client): client.delete_agent(agent_state.id) +# Sample data setup +def generate_message(role: str, text: str = None, tool_calls: List = None) -> Message: + """Helper to generate a Message object.""" + return Message( + id="message-" + str(uuid.uuid4()), + role=MessageRole(role), + text=text or f"{role} message text", + created_at=datetime.utcnow(), + tool_calls=tool_calls or [], + ) + + +def test_cutoff_calculation(mocker): + """Test basic scenarios where the function calculates the cutoff correctly.""" + # Arrange + logger = mocker.Mock() # Mock logger + messages = [ + generate_message("system"), + generate_message("user"), + generate_message("assistant"), + generate_message("user"), + generate_message("assistant"), + ] + mocker.patch("letta.settings.summarizer_settings.desired_memory_token_pressure", 0.5) + mocker.patch("letta.settings.summarizer_settings.evict_all_messages", False) + + # Basic tests + token_counts = [4, 2, 8, 2, 2] + cutoff = calculate_summarizer_cutoff(messages, token_counts, logger) + assert cutoff == 3 + assert messages[cutoff - 1].role == MessageRole.assistant + + token_counts = [4, 2, 2, 2, 2] + cutoff = calculate_summarizer_cutoff(messages, token_counts, logger) + assert cutoff == 5 + assert messages[cutoff - 1].role == MessageRole.assistant + + token_counts = [2, 2, 3, 2, 2] + cutoff = calculate_summarizer_cutoff(messages, token_counts, logger) + assert cutoff == 3 + assert messages[cutoff - 1].role == MessageRole.assistant + + # Evict all messages + # Should give the end of the token_counts, even though it is not necessary (can just evict up to the 100) + mocker.patch("letta.settings.summarizer_settings.evict_all_messages", True) + token_counts = [1, 1, 100, 1, 1] + cutoff = calculate_summarizer_cutoff(messages, token_counts, logger) + assert cutoff == 5 + assert messages[cutoff - 1].role == MessageRole.assistant + + # Don't evict all messages with same token_counts, cutoff now should be at the 100 + # Should give the end of the token_counts, even though it is not necessary (can just evict up to the 100) + mocker.patch("letta.settings.summarizer_settings.evict_all_messages", False) + cutoff = calculate_summarizer_cutoff(messages, token_counts, logger) + assert cutoff == 3 + assert messages[cutoff - 1].role == MessageRole.assistant + + # Set `keep_last_n_messages` + mocker.patch("letta.settings.summarizer_settings.keep_last_n_messages", 3) + token_counts = [4, 2, 2, 2, 2] + cutoff = calculate_summarizer_cutoff(messages, token_counts, logger) + assert cutoff == 2 + assert messages[cutoff - 1].role == MessageRole.user + + +def test_summarize_many_messages_basic(client, mock_e2b_api_key_none): + small_context_llm_config = LLMConfig.default_config("gpt-4o-mini") + small_context_llm_config.context_window = 3000 + small_agent_state = client.create_agent( + name="small_context_agent", + llm_config=small_context_llm_config, + ) + for _ in range(10): + client.user_message( + agent_id=small_agent_state.id, + message="hi " * 60, + ) + client.delete_agent(small_agent_state.id) + + +def test_summarize_large_message_does_not_loop_infinitely(client, mock_e2b_api_key_none): + small_context_llm_config = LLMConfig.default_config("gpt-4o-mini") + small_context_llm_config.context_window = 2000 + small_agent_state = client.create_agent( + name="super_small_context_agent", + llm_config=small_context_llm_config, + ) + with pytest.raises(ContextWindowExceededError, match=f"Ran summarizer {summarizer_settings.max_summarizer_retries}"): + client.user_message( + agent_id=small_agent_state.id, + message="hi " * 1000, + ) + client.delete_agent(small_agent_state.id) + + def test_summarize_messages_inplace(client, agent_state, mock_e2b_api_key_none): """Test summarization via sending the summarize CLI command or via a direct call to the agent object""" # First send a few messages (5) @@ -134,7 +234,7 @@ def test_auto_summarize(client, mock_e2b_api_key_none): # "gemini-pro.json", TODO: Gemini is broken ], ) -def test_summarizer(config_filename): +def test_summarizer(config_filename, client, agent_state): namespace = uuid.NAMESPACE_DNS agent_name = str(uuid.uuid5(namespace, f"integration-test-summarizer-{config_filename}")) @@ -175,6 +275,6 @@ def test_summarizer(config_filename): ) # Invoke a summarize - letta_agent.summarize_messages_inplace(preserve_last_N_messages=False) + letta_agent.summarize_messages_inplace() in_context_messages = client.get_in_context_messages(agent_state.id) assert SUMMARY_KEY_PHRASE in in_context_messages[1].text, f"Test failed for config: {config_filename}"