fix: add error trace logging to simple_llm_stream_adapter (#9247)

Log error traces to ClickHouse when streaming requests fail, matching the behavior in letta_llm_stream_adapter. 🤖 Generated with [Letta Code](https://letta.com) Co-authored-by: Letta <noreply@letta.com>
2026-02-02 17:43:44 -08:00
parent 4096b30cd7
commit cd7e80acc3
1 changed files with 23 additions and 1 deletions
--- a/letta/adapters/simple_llm_stream_adapter.py
+++ b/letta/adapters/simple_llm_stream_adapter.py
@@ -70,6 +70,9 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
        # Store request data
        self.request_data = request_data

+        # Track request start time for latency calculation
+        request_start_ns = get_utc_timestamp_ns()
+
        # Get cancellation event for this run to enable graceful cancellation (before branching)
        cancellation_event = get_cancellation_event_for_run(self.run_id) if self.run_id else None

@@ -138,6 +141,16 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
            else:
                stream = await self.llm_client.stream_async(request_data, self.llm_config)
        except Exception as e:
+            self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()
+            latency_ms = int((self.llm_request_finish_timestamp_ns - request_start_ns) / 1_000_000)
+            await self.llm_client.log_provider_trace_async(
+                request_data=request_data,
+                response_json=None,
+                llm_config=self.llm_config,
+                latency_ms=latency_ms,
+                error_msg=str(e),
+                error_type=type(e).__name__,
+            )
            raise self.llm_client.handle_llm_error(e)

        # Process the stream and yield chunks immediately for TTFT
@@ -146,7 +159,16 @@ class SimpleLLMStreamAdapter(LettaLLMStreamAdapter):
                # Yield each chunk immediately as it arrives
                yield chunk
        except Exception as e:
-            # Map provider-specific errors during streaming to common LLMError types
+            self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()
+            latency_ms = int((self.llm_request_finish_timestamp_ns - request_start_ns) / 1_000_000)
+            await self.llm_client.log_provider_trace_async(
+                request_data=request_data,
+                response_json=None,
+                llm_config=self.llm_config,
+                latency_ms=latency_ms,
+                error_msg=str(e),
+                error_type=type(e).__name__,
+            )
            raise self.llm_client.handle_llm_error(e)

        # After streaming completes, extract the accumulated data