diff --git a/letta/agents/letta_agent.py b/letta/agents/letta_agent.py index dd393eaf..6ea255d7 100644 --- a/letta/agents/letta_agent.py +++ b/letta/agents/letta_agent.py @@ -1352,6 +1352,7 @@ class LettaAgent(BaseAgent): ) -> list[Message]: # If total tokens is reached, we truncate down # TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc. + # TODO: `force` and `clear` seem to no longer be used, we should remove if force or (total_tokens and total_tokens > llm_config.context_window): self.logger.warning( f"Total tokens {total_tokens} exceeds configured max tokens {llm_config.context_window}, forcefully clearing message history." @@ -1363,6 +1364,7 @@ class LettaAgent(BaseAgent): clear=True, ) else: + # NOTE (Sarah): Seems like this is doing nothing? self.logger.info( f"Total tokens {total_tokens} does not exceed configured max tokens {llm_config.context_window}, passing summarizing w/o force." ) diff --git a/letta/services/summarizer/summarizer.py b/letta/services/summarizer/summarizer.py index e9c65d0c..b1d7c0ae 100644 --- a/letta/services/summarizer/summarizer.py +++ b/letta/services/summarizer/summarizer.py @@ -354,7 +354,11 @@ async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor: # NOTE: we should disable the inner_thoughts_in_kwargs here, because we don't use it # I'm leaving it commented it out for now for safety but is fine assuming the var here is a copy not a reference # llm_config.put_inner_thoughts_in_kwargs = False - response_data = await llm_client.request_async(request_data, llm_config) + try: + response_data = await llm_client.request_async(request_data, llm_config) + except Exception as e: + # handle LLM error (likely a context window exceeded error) + raise llm_client.handle_llm_error(e) response = llm_client.convert_response_to_chat_completion(response_data, input_messages_obj, llm_config) if response.choices[0].message.content is None: logger.warning("No content returned from summarizer")