From 80a0d1a95f5f819215abc551ddebb96dbab9b770 Mon Sep 17 00:00:00 2001 From: amysguan <64990783+amysguan@users.noreply.github.com> Date: Fri, 13 Feb 2026 15:44:09 -0800 Subject: [PATCH] Add LLM client compaction errors to traces (#9474) * add llm client errors to traces * update response json for telemetry * prevent silent failures and properly log errored responses in streaming path * remove double logging --------- Co-authored-by: Amy Guan Co-authored-by: Kian Jones --- letta/llm_api/llm_client_base.py | 6 +++- letta/services/summarizer/summarizer.py | 44 +++++++++++++++---------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/letta/llm_api/llm_client_base.py b/letta/llm_api/llm_client_base.py index b4b8d69d..43fa9e27 100644 --- a/letta/llm_api/llm_client_base.py +++ b/letta/llm_api/llm_client_base.py @@ -161,7 +161,11 @@ class LLMClientBase: return if response_json is None: - return + if error_msg: + response_json = {"error": error_msg, "error_type": error_type} + else: + logger.warning(f"Skipping telemetry: no response_json or error_msg (call_type={self._telemetry_call_type})") + return try: pydantic_actor = self.actor.to_pydantic() if hasattr(self.actor, "to_pydantic") else self.actor diff --git a/letta/services/summarizer/summarizer.py b/letta/services/summarizer/summarizer.py index e7aabbae..b5a11e02 100644 --- a/letta/services/summarizer/summarizer.py +++ b/letta/services/summarizer/summarizer.py @@ -542,27 +542,35 @@ async def simple_summary( ) # AnthropicClient.stream_async sets request_data["stream"] = True internally. - stream = await llm_client.stream_async(req_data, summarizer_llm_config) - async for _chunk in interface.process(stream): - # We don't emit anything; we just want the fully-accumulated content. - pass + try: + stream = await llm_client.stream_async(req_data, summarizer_llm_config) + async for _chunk in interface.process(stream): + pass - content_parts = interface.get_content() - text = "".join(part.text for part in content_parts if isinstance(part, TextContent)).strip() + content_parts = interface.get_content() + text = "".join(part.text for part in content_parts if isinstance(part, TextContent)).strip() - # Log telemetry after stream processing - await llm_client.log_provider_trace_async( - request_data=req_data, - response_json={ - "content": text, - "model": summarizer_llm_config.model, - "usage": { - "input_tokens": getattr(interface, "input_tokens", None), - "output_tokens": getattr(interface, "output_tokens", None), + await llm_client.log_provider_trace_async( + request_data=req_data, + response_json={ + "content": text, + "model": summarizer_llm_config.model, + "usage": { + "input_tokens": getattr(interface, "input_tokens", None), + "output_tokens": getattr(interface, "output_tokens", None), + }, }, - }, - llm_config=summarizer_llm_config, - ) + llm_config=summarizer_llm_config, + ) + except Exception as e: + await llm_client.log_provider_trace_async( + request_data=req_data, + response_json=None, + llm_config=summarizer_llm_config, + error_msg=str(e), + error_type=type(e).__name__, + ) + raise if not text: logger.warning("No content returned from summarizer (streaming path)")