Add LLM client compaction errors to traces (#9474)

* add llm client errors to traces * update response json for telemetry * prevent silent failures and properly log errored responses in streaming path * remove double logging --------- Co-authored-by: Amy Guan <amy@letta.com> Co-authored-by: Kian Jones <kian@letta.com>
2026-02-13 15:44:09 -08:00
parent 2f76f2c629
commit 80a0d1a95f
2 changed files with 31 additions and 19 deletions
--- a/letta/llm_api/llm_client_base.py
+++ b/letta/llm_api/llm_client_base.py
@@ -161,7 +161,11 @@ class LLMClientBase:
            return

        if response_json is None:
-            return
+            if error_msg:
+                response_json = {"error": error_msg, "error_type": error_type}
+            else:
+                logger.warning(f"Skipping telemetry: no response_json or error_msg (call_type={self._telemetry_call_type})")
+                return

        try:
            pydantic_actor = self.actor.to_pydantic() if hasattr(self.actor, "to_pydantic") else self.actor
--- a/letta/services/summarizer/summarizer.py
+++ b/letta/services/summarizer/summarizer.py
@@ -542,27 +542,35 @@ async def simple_summary(
            )

            # AnthropicClient.stream_async sets request_data["stream"] = True internally.
-            stream = await llm_client.stream_async(req_data, summarizer_llm_config)
-            async for _chunk in interface.process(stream):
-                # We don't emit anything; we just want the fully-accumulated content.
-                pass
+            try:
+                stream = await llm_client.stream_async(req_data, summarizer_llm_config)
+                async for _chunk in interface.process(stream):
+                    pass

-            content_parts = interface.get_content()
-            text = "".join(part.text for part in content_parts if isinstance(part, TextContent)).strip()
+                content_parts = interface.get_content()
+                text = "".join(part.text for part in content_parts if isinstance(part, TextContent)).strip()

-            # Log telemetry after stream processing
-            await llm_client.log_provider_trace_async(
-                request_data=req_data,
-                response_json={
-                    "content": text,
-                    "model": summarizer_llm_config.model,
-                    "usage": {
-                        "input_tokens": getattr(interface, "input_tokens", None),
-                        "output_tokens": getattr(interface, "output_tokens", None),
+                await llm_client.log_provider_trace_async(
+                    request_data=req_data,
+                    response_json={
+                        "content": text,
+                        "model": summarizer_llm_config.model,
+                        "usage": {
+                            "input_tokens": getattr(interface, "input_tokens", None),
+                            "output_tokens": getattr(interface, "output_tokens", None),
+                        },
                    },
-                },
-                llm_config=summarizer_llm_config,
-            )
+                    llm_config=summarizer_llm_config,
+                )
+            except Exception as e:
+                await llm_client.log_provider_trace_async(
+                    request_data=req_data,
+                    response_json=None,
+                    llm_config=summarizer_llm_config,
+                    error_msg=str(e),
+                    error_type=type(e).__name__,
+                )
+                raise

            if not text:
                logger.warning("No content returned from summarizer (streaming path)")