"""ClickHouse reader for LLM analytics traces. Reads LLM traces from ClickHouse for debugging, analytics, and auditing. """ from __future__ import annotations import asyncio from dataclasses import dataclass from datetime import datetime from typing import Any, List, Optional from urllib.parse import urlparse from letta.helpers.singleton import singleton from letta.log import get_logger from letta.schemas.llm_trace import LLMTrace from letta.settings import settings logger = get_logger(__name__) def _parse_clickhouse_endpoint(endpoint: str) -> tuple[str, int, bool]: """Return (host, port, secure) for clickhouse_connect.get_client. Supports: - http://host:port -> (host, port, False) - https://host:port -> (host, port, True) - host:port -> (host, port, False) # Default to insecure for local dev - host -> (host, 8123, False) # Default HTTP port, insecure """ parsed = urlparse(endpoint) if parsed.scheme in ("http", "https"): host = parsed.hostname or "" port = parsed.port or (8443 if parsed.scheme == "https" else 8123) secure = parsed.scheme == "https" return host, port, secure # Fallback: accept raw hostname (possibly with :port) # Default to insecure (HTTP) for local development if ":" in endpoint: host, port_str = endpoint.rsplit(":", 1) return host, int(port_str), False return endpoint, 8123, False @dataclass(frozen=True) class LLMTraceRow: """Raw row from ClickHouse query.""" id: str organization_id: str project_id: str agent_id: str agent_tags: List[str] run_id: str step_id: str trace_id: str call_type: str provider: str model: str is_byok: bool request_size_bytes: int response_size_bytes: int prompt_tokens: int completion_tokens: int total_tokens: int cached_input_tokens: Optional[int] cache_write_tokens: Optional[int] reasoning_tokens: Optional[int] latency_ms: int is_error: bool error_type: str error_message: str request_json: str response_json: str llm_config_json: str created_at: datetime @singleton class LLMTraceReader: """ ClickHouse reader for raw LLM traces. Provides query methods for debugging, analytics, and auditing. Usage: reader = LLMTraceReader() trace = await reader.get_by_step_id_async(step_id="step-xxx", organization_id="org-xxx") traces = await reader.list_by_agent_async(agent_id="agent-xxx", organization_id="org-xxx") """ def __init__(self): self._client = None def _get_client(self): """Initialize ClickHouse client on first use (lazy loading).""" if self._client is not None: return self._client import clickhouse_connect if not settings.clickhouse_endpoint: raise ValueError("CLICKHOUSE_ENDPOINT is required") host, port, secure = _parse_clickhouse_endpoint(settings.clickhouse_endpoint) if not host: raise ValueError("Invalid CLICKHOUSE_ENDPOINT") database = settings.clickhouse_database or "otel" username = settings.clickhouse_username or "default" password = settings.clickhouse_password if not password: raise ValueError("CLICKHOUSE_PASSWORD is required") self._client = clickhouse_connect.get_client( host=host, port=port, username=username, password=password, database=database, secure=secure, verify=True, ) return self._client def _row_to_trace(self, row: tuple) -> LLMTrace: """Convert a ClickHouse row tuple to LLMTrace.""" return LLMTrace( id=row[0], organization_id=row[1], project_id=row[2] or None, agent_id=row[3] or None, agent_tags=list(row[4]) if row[4] else [], run_id=row[5] or None, step_id=row[6] or None, trace_id=row[7] or None, call_type=row[8], provider=row[9], model=row[10], is_byok=bool(row[11]), request_size_bytes=row[12], response_size_bytes=row[13], prompt_tokens=row[14], completion_tokens=row[15], total_tokens=row[16], cached_input_tokens=row[17], cache_write_tokens=row[18], reasoning_tokens=row[19], latency_ms=row[20], is_error=bool(row[21]), error_type=row[22] or None, error_message=row[23] or None, request_json=row[24], response_json=row[25], llm_config_json=row[26] or "", created_at=row[27], ) def _query_sync(self, query: str, parameters: dict[str, Any]) -> List[tuple]: """Execute a query synchronously.""" client = self._get_client() result = client.query(query, parameters=parameters) return result.result_rows if result else [] # ------------------------------------------------------------------------- # Query Methods # ------------------------------------------------------------------------- async def get_by_step_id_async( self, step_id: str, organization_id: str, ) -> Optional[LLMTrace]: """ Get the most recent trace for a step. Args: step_id: The step ID to look up organization_id: Organization ID for access control Returns: LLMTrace if found, None otherwise """ query = """ SELECT id, organization_id, project_id, agent_id, agent_tags, run_id, step_id, trace_id, call_type, provider, model, is_byok, request_size_bytes, response_size_bytes, prompt_tokens, completion_tokens, total_tokens, cached_input_tokens, cache_write_tokens, reasoning_tokens, latency_ms, is_error, error_type, error_message, request_json, response_json, llm_config_json, created_at FROM llm_traces WHERE step_id = %(step_id)s AND organization_id = %(organization_id)s ORDER BY created_at DESC LIMIT 1 """ rows = await asyncio.to_thread( self._query_sync, query, {"step_id": step_id, "organization_id": organization_id}, ) if not rows: return None return self._row_to_trace(rows[0]) async def get_by_id_async( self, trace_id: str, organization_id: str, ) -> Optional[LLMTrace]: """ Get a trace by its ID. Args: trace_id: The trace ID (UUID) organization_id: Organization ID for access control Returns: LLMTrace if found, None otherwise """ query = """ SELECT id, organization_id, project_id, agent_id, agent_tags, run_id, step_id, trace_id, call_type, provider, model, is_byok, request_size_bytes, response_size_bytes, prompt_tokens, completion_tokens, total_tokens, cached_input_tokens, cache_write_tokens, reasoning_tokens, latency_ms, is_error, error_type, error_message, request_json, response_json, llm_config_json, created_at FROM llm_traces WHERE id = %(trace_id)s AND organization_id = %(organization_id)s LIMIT 1 """ rows = await asyncio.to_thread( self._query_sync, query, {"trace_id": trace_id, "organization_id": organization_id}, ) if not rows: return None return self._row_to_trace(rows[0]) async def list_by_agent_async( self, agent_id: str, organization_id: str, limit: int = 100, offset: int = 0, call_type: Optional[str] = None, is_error: Optional[bool] = None, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None, ) -> List[LLMTrace]: """ List traces for an agent with optional filters. Args: agent_id: Agent ID to filter by organization_id: Organization ID for access control limit: Maximum number of results (default 100) offset: Offset for pagination call_type: Filter by call type ('agent_step', 'summarization') is_error: Filter by error status start_date: Filter by created_at >= start_date end_date: Filter by created_at <= end_date Returns: List of LLMTrace objects """ conditions = [ "agent_id = %(agent_id)s", "organization_id = %(organization_id)s", ] params: dict[str, Any] = { "agent_id": agent_id, "organization_id": organization_id, "limit": limit, "offset": offset, } if call_type: conditions.append("call_type = %(call_type)s") params["call_type"] = call_type if is_error is not None: conditions.append("is_error = %(is_error)s") params["is_error"] = 1 if is_error else 0 if start_date: conditions.append("created_at >= %(start_date)s") params["start_date"] = start_date if end_date: conditions.append("created_at <= %(end_date)s") params["end_date"] = end_date where_clause = " AND ".join(conditions) query = f""" SELECT id, organization_id, project_id, agent_id, agent_tags, run_id, step_id, trace_id, call_type, provider, model, is_byok, request_size_bytes, response_size_bytes, prompt_tokens, completion_tokens, total_tokens, cached_input_tokens, cache_write_tokens, reasoning_tokens, latency_ms, is_error, error_type, error_message, request_json, response_json, llm_config_json, created_at FROM llm_traces WHERE {where_clause} ORDER BY created_at DESC LIMIT %(limit)s OFFSET %(offset)s """ rows = await asyncio.to_thread(self._query_sync, query, params) return [self._row_to_trace(row) for row in rows] async def get_usage_stats_async( self, organization_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None, group_by: str = "model", # 'model', 'agent_id', 'call_type' ) -> List[dict[str, Any]]: """ Get aggregated usage statistics. Args: organization_id: Organization ID for access control start_date: Filter by created_at >= start_date end_date: Filter by created_at <= end_date group_by: Field to group by ('model', 'agent_id', 'call_type') Returns: List of aggregated stats dicts """ valid_group_by = {"model", "agent_id", "call_type", "provider"} if group_by not in valid_group_by: raise ValueError(f"group_by must be one of {valid_group_by}") conditions = ["organization_id = %(organization_id)s"] params: dict[str, Any] = {"organization_id": organization_id} if start_date: conditions.append("created_at >= %(start_date)s") params["start_date"] = start_date if end_date: conditions.append("created_at <= %(end_date)s") params["end_date"] = end_date where_clause = " AND ".join(conditions) query = f""" SELECT {group_by}, count() as request_count, sum(total_tokens) as total_tokens, sum(prompt_tokens) as prompt_tokens, sum(completion_tokens) as completion_tokens, avg(latency_ms) as avg_latency_ms, sum(request_size_bytes) as total_request_bytes, sum(response_size_bytes) as total_response_bytes, countIf(is_error = 1) as error_count FROM llm_traces WHERE {where_clause} GROUP BY {group_by} ORDER BY total_tokens DESC """ rows = await asyncio.to_thread(self._query_sync, query, params) return [ { group_by: row[0], "request_count": row[1], "total_tokens": row[2], "prompt_tokens": row[3], "completion_tokens": row[4], "avg_latency_ms": row[5], "total_request_bytes": row[6], "total_response_bytes": row[7], "error_count": row[8], } for row in rows ] async def find_large_requests_async( self, organization_id: str, min_size_bytes: int = 1_000_000, # 1MB default limit: int = 100, ) -> List[LLMTrace]: """ Find traces with large request payloads (for debugging). Args: organization_id: Organization ID for access control min_size_bytes: Minimum request size in bytes (default 1MB) limit: Maximum number of results Returns: List of LLMTrace objects with large requests """ query = """ SELECT id, organization_id, project_id, agent_id, agent_tags, run_id, step_id, trace_id, call_type, provider, model, is_byok, request_size_bytes, response_size_bytes, prompt_tokens, completion_tokens, total_tokens, cached_input_tokens, cache_write_tokens, reasoning_tokens, latency_ms, is_error, error_type, error_message, request_json, response_json, llm_config_json, created_at FROM llm_traces WHERE organization_id = %(organization_id)s AND request_size_bytes >= %(min_size_bytes)s ORDER BY request_size_bytes DESC LIMIT %(limit)s """ rows = await asyncio.to_thread( self._query_sync, query, { "organization_id": organization_id, "min_size_bytes": min_size_bytes, "limit": limit, }, ) return [self._row_to_trace(row) for row in rows] # Module-level instance for easy access _reader_instance: Optional[LLMTraceReader] = None def get_llm_trace_reader() -> LLMTraceReader: """Get the singleton LLMTraceReader instance.""" global _reader_instance if _reader_instance is None: _reader_instance = LLMTraceReader() return _reader_instance