fix: add handling for summarizer llm call (#4028)
This commit is contained in:
@@ -1352,6 +1352,7 @@ class LettaAgent(BaseAgent):
|
||||
) -> list[Message]:
|
||||
# If total tokens is reached, we truncate down
|
||||
# TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc.
|
||||
# TODO: `force` and `clear` seem to no longer be used, we should remove
|
||||
if force or (total_tokens and total_tokens > llm_config.context_window):
|
||||
self.logger.warning(
|
||||
f"Total tokens {total_tokens} exceeds configured max tokens {llm_config.context_window}, forcefully clearing message history."
|
||||
@@ -1363,6 +1364,7 @@ class LettaAgent(BaseAgent):
|
||||
clear=True,
|
||||
)
|
||||
else:
|
||||
# NOTE (Sarah): Seems like this is doing nothing?
|
||||
self.logger.info(
|
||||
f"Total tokens {total_tokens} does not exceed configured max tokens {llm_config.context_window}, passing summarizing w/o force."
|
||||
)
|
||||
|
||||
@@ -354,7 +354,11 @@ async def simple_summary(messages: List[Message], llm_config: LLMConfig, actor:
|
||||
# NOTE: we should disable the inner_thoughts_in_kwargs here, because we don't use it
|
||||
# I'm leaving it commented it out for now for safety but is fine assuming the var here is a copy not a reference
|
||||
# llm_config.put_inner_thoughts_in_kwargs = False
|
||||
response_data = await llm_client.request_async(request_data, llm_config)
|
||||
try:
|
||||
response_data = await llm_client.request_async(request_data, llm_config)
|
||||
except Exception as e:
|
||||
# handle LLM error (likely a context window exceeded error)
|
||||
raise llm_client.handle_llm_error(e)
|
||||
response = llm_client.convert_response_to_chat_completion(response_data, input_messages_obj, llm_config)
|
||||
if response.choices[0].message.content is None:
|
||||
logger.warning("No content returned from summarizer")
|
||||
|
||||
Reference in New Issue
Block a user