diff --git a/letta/agents/letta_agent.py b/letta/agents/letta_agent.py
index dd393eaf..6ea255d7 100644
--- a/letta/agents/letta_agent.py
+++ b/letta/agents/letta_agent.py
@@ -1352,6 +1352,7 @@ class LettaAgent(BaseAgent):
     ) -> list[Message]:
         # If total tokens is reached, we truncate down
         # TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc.
+        # TODO: `force` and `clear` seem to no longer be used, we should remove
         if force or (total_tokens and total_tokens > llm_config.context_window):
             self.logger.warning(
                 f"Total tokens {total_tokens} exceeds configured max tokens {llm_config.context_window}, forcefully clearing message history."
@@ -1363,6 +1364,7 @@ class LettaAgent(BaseAgent):
                 clear=True,
             )
         else:
+            # NOTE (Sarah): Seems like this is doing nothing?
             self.logger.info(
                 f"Total tokens {total_tokens} does not exceed configured max tokens {llm_config.context_window}, passing summarizing w/o force."
             )
diff --git a/letta/services/summarizer/summarizer.py b/letta/services/summarizer/summarizer.py
index e9c65d0c..b1d7c0ae 100644
--- a/letta/services/summarizer/summarizer.py
+++ b/letta/services/summarizer/summarizer.py
@@ -354,7 +354,11 @@ async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor:
     # NOTE: we should disable the inner_thoughts_in_kwargs here, because we don't use it
     # I'm leaving it commented it out for now for safety but is fine assuming the var here is a copy not a reference
     # llm_config.put_inner_thoughts_in_kwargs = False
-    response_data = await llm_client.request_async(request_data, llm_config)
+    try:
+        response_data = await llm_client.request_async(request_data, llm_config)
+    except Exception as e:
+        # handle LLM error (likely a context window exceeded error)
+        raise llm_client.handle_llm_error(e)
     response = llm_client.convert_response_to_chat_completion(response_data, input_messages_obj, llm_config)
     if response.choices[0].message.content is None:
         logger.warning("No content returned from summarizer")