feat: centralize telemetry logging at LLM client level (#8815)

* feat: centralize telemetry logging at LLM client level

Moves telemetry logging from individual adapters to LLMClientBase:
- Add TelemetryStreamWrapper for streaming telemetry on stream close
- Add request_async_with_telemetry() for non-streaming requests
- Add stream_async_with_telemetry() for streaming requests
- Add set_telemetry_context() to configure agent_id, run_id, step_id

Updates adapters and agents to use new pattern:
- LettaLLMAdapter now accepts agent_id/run_id in constructor
- Adapters call set_telemetry_context() before LLM requests
- Removes duplicate telemetry logging from adapters
- Enriches traces with agent_id, run_id, call_type metadata

🐙 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix: accumulate streaming response content for telemetry

TelemetryStreamWrapper now extracts actual response data from chunks:
- Content text (concatenated from deltas)
- Tool calls (id, name, arguments)
- Model name, finish reason, usage stats

🐙 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* refactor: move streaming telemetry to caller (option 3)

- Remove TelemetryStreamWrapper class
- Add log_provider_trace_async() helper to LLMClientBase
- stream_async_with_telemetry() now just returns raw stream
- Callers log telemetry after processing with rich interface data

Updated callers:
- summarizer.py: logs content + usage after stream processing
- letta_agent.py: logs tool_call, reasoning, model, usage

🐙 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

* fix: pass agent_id and run_id to parent adapter class

LettaLLMStreamAdapter was not passing agent_id/run_id to parent,
causing "unexpected keyword argument" errors.

🐙 Generated with [Letta Code](https://letta.com)

Co-Authored-By: Letta <noreply@letta.com>

---------

Co-authored-by: Letta <noreply@letta.com>
This commit is contained in:
Kian Jones
2026-01-16 22:23:48 -08:00
committed by Sarah Wooders
parent 9418ab9815
commit a92e868ee6
10 changed files with 216 additions and 19 deletions

View File

@@ -86,7 +86,14 @@ class EphemeralSummaryAgent(BaseAgent):
)
request_data = llm_client.build_request_data(agent_state.agent_type, messages, agent_state.llm_config, tools=[])
response_data = await llm_client.request_async(request_data, agent_state.llm_config)
from letta.services.telemetry_manager import TelemetryManager
llm_client.set_telemetry_context(
telemetry_manager=TelemetryManager(),
agent_id=self.agent_id,
call_type="summarization",
)
response_data = await llm_client.request_async_with_telemetry(request_data, agent_state.llm_config)
response = await llm_client.convert_response_to_chat_completion(response_data, messages, agent_state.llm_config)
summary = response.choices[0].message.content.strip()

View File

@@ -414,7 +414,9 @@ class LettaAgent(BaseAgent):
provider_trace=ProviderTrace(
request_json=request_data,
response_json=response_data,
step_id=step_id, # Use original step_id for telemetry
step_id=step_id,
agent_id=self.agent_id,
run_id=self.current_run_id,
),
)
step_progression = StepProgression.LOGGED_TRACE
@@ -759,7 +761,9 @@ class LettaAgent(BaseAgent):
provider_trace=ProviderTrace(
request_json=request_data,
response_json=response_data,
step_id=step_id, # Use original step_id for telemetry
step_id=step_id,
agent_id=self.agent_id,
run_id=self.current_run_id,
),
)
step_progression = StepProgression.LOGGED_TRACE
@@ -1117,6 +1121,22 @@ class LettaAgent(BaseAgent):
stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
raise e
reasoning_content = interface.get_reasoning_content()
# Log provider trace telemetry after stream processing
await llm_client.log_provider_trace_async(
request_data=request_data,
response_json={
"content": {
"tool_call": tool_call.model_dump() if tool_call else None,
"reasoning": [c.model_dump() for c in reasoning_content] if reasoning_content else [],
},
"model": getattr(interface, "model", None),
"usage": {
"input_tokens": interface.input_tokens,
"output_tokens": interface.output_tokens,
},
},
)
persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
tool_call,
valid_tool_names,
@@ -1208,7 +1228,9 @@ class LettaAgent(BaseAgent):
"output_tokens": usage.completion_tokens,
},
},
step_id=step_id, # Use original step_id for telemetry
step_id=step_id,
agent_id=self.agent_id,
run_id=self.current_run_id,
),
)
step_progression = StepProgression.LOGGED_TRACE
@@ -1430,8 +1452,14 @@ class LettaAgent(BaseAgent):
log_event("agent.stream_no_tokens.llm_request.created")
async with AsyncTimer() as timer:
# Attempt LLM request
response = await llm_client.request_async(request_data, agent_state.llm_config)
# Attempt LLM request with telemetry
llm_client.set_telemetry_context(
telemetry_manager=self.telemetry_manager,
agent_id=self.agent_id,
run_id=self.current_run_id,
call_type="agent_step",
)
response = await llm_client.request_async_with_telemetry(request_data, agent_state.llm_config)
# Track LLM request time
step_metrics.llm_request_ns = int(timer.elapsed_ns)
@@ -1492,10 +1520,18 @@ class LettaAgent(BaseAgent):
attributes={"request_start_to_provider_request_start_ns": ns_to_ms(request_start_to_provider_request_start_ns)},
)
# Attempt LLM request
# Set telemetry context before streaming
llm_client.set_telemetry_context(
telemetry_manager=self.telemetry_manager,
agent_id=self.agent_id,
run_id=self.current_run_id,
call_type="agent_step",
)
# Attempt LLM request with telemetry wrapper
return (
request_data,
await llm_client.stream_async(request_data, agent_state.llm_config),
await llm_client.stream_async_with_telemetry(request_data, agent_state.llm_config),
current_in_context_messages,
new_in_context_messages,
valid_tool_names,

View File

@@ -205,7 +205,9 @@ class LettaAgentV2(BaseAgentV2):
response = self._step(
messages=in_context_messages + self.response_messages,
input_messages_to_persist=input_messages_to_persist,
llm_adapter=LettaLLMRequestAdapter(llm_client=self.llm_client, llm_config=self.agent_state.llm_config),
llm_adapter=LettaLLMRequestAdapter(
llm_client=self.llm_client, llm_config=self.agent_state.llm_config, agent_id=self.agent_state.id, run_id=run_id
),
run_id=run_id,
use_assistant_message=use_assistant_message,
include_return_message_types=include_return_message_types,
@@ -286,12 +288,15 @@ class LettaAgentV2(BaseAgentV2):
llm_adapter = LettaLLMStreamAdapter(
llm_client=self.llm_client,
llm_config=self.agent_state.llm_config,
agent_id=self.agent_state.id,
run_id=run_id,
)
else:
llm_adapter = LettaLLMRequestAdapter(
llm_client=self.llm_client,
llm_config=self.agent_state.llm_config,
agent_id=self.agent_state.id,
run_id=run_id,
)
try:

View File

@@ -167,7 +167,9 @@ class LettaAgentV3(LettaAgentV2):
messages=list(self.in_context_messages + input_messages_to_persist),
input_messages_to_persist=input_messages_to_persist,
# TODO need to support non-streaming adapter too
llm_adapter=SimpleLLMRequestAdapter(llm_client=self.llm_client, llm_config=self.agent_state.llm_config),
llm_adapter=SimpleLLMRequestAdapter(
llm_client=self.llm_client, llm_config=self.agent_state.llm_config, agent_id=self.agent_state.id, run_id=run_id
),
run_id=run_id,
# use_assistant_message=use_assistant_message,
include_return_message_types=include_return_message_types,
@@ -307,12 +309,15 @@ class LettaAgentV3(LettaAgentV2):
llm_adapter = SimpleLLMStreamAdapter(
llm_client=self.llm_client,
llm_config=self.agent_state.llm_config,
agent_id=self.agent_state.id,
run_id=run_id,
)
else:
llm_adapter = SimpleLLMRequestAdapter(
llm_client=self.llm_client,
llm_config=self.agent_state.llm_config,
agent_id=self.agent_state.id,
run_id=run_id,
)
try: