* feat: add billing context to LLM telemetry traces Add billing metadata (plan type, cost source, customer ID) to LLM traces in ClickHouse for cost analytics and attribution. **Data Flow:** - Cloud-API: Extract billing info from subscription in rate limiting, set x-billing-* headers - Core: Parse headers into BillingContext object via dependencies - Adapters: Flow billing_context through all LLM adapters (blocking & streaming) - Agent: Pass billing_context to step() and stream() methods - ClickHouse: Store in billing_plan_type, billing_cost_source, billing_customer_id columns **Changes:** - Add BillingContext schema to provider_trace.py - Add billing columns to llm_traces ClickHouse table DDL - Update getCustomerSubscription to fetch stripeCustomerId from organization_billing_details - Propagate billing_context through agent step flow, adapters, and streaming service - Update ProviderTrace and LLMTrace to include billing metadata - Regenerate SDK with autogen **Production Deployment:** Requires env vars: LETTA_PROVIDER_TRACE_BACKEND=clickhouse, LETTA_STORE_LLM_TRACES=true, CLICKHOUSE_* 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: add billing_context parameter to agent step methods - Add billing_context to BaseAgent and BaseAgentV2 abstract methods - Update LettaAgent, LettaAgentV2, LettaAgentV3 step methods - Update multi-agent groups: SleeptimeMultiAgentV2, V3, V4 - Fix test_utils.py to include billing header parameters - Import BillingContext in all affected files * fix: add billing_context to stream methods - Add billing_context parameter to BaseAgentV2.stream() - Add billing_context parameter to LettaAgentV2.stream() - LettaAgentV3.stream() already has it from previous commit * fix: exclude billing headers from OpenAPI spec Mark billing headers as internal (include_in_schema=False) so they don't appear in the public API. These are internal headers between cloud-api and core, not part of the public SDK. Regenerated SDK with stage-api - removes 10,650 lines of bloat that was causing OOM during Next.js build. * refactor: return billing context from handleUnifiedRateLimiting instead of mutating req Instead of passing req into handleUnifiedRateLimiting and mutating headers inside it: - Return billing context fields (billingPlanType, billingCostSource, billingCustomerId) from handleUnifiedRateLimiting - Set headers in handleMessageRateLimiting (middleware layer) after getting the result - This fixes step-orchestrator compatibility since it doesn't have a real Express req object * chore: remove extra gencode * p --------- Co-authored-by: Letta <noreply@letta.com>
178 lines
7.5 KiB
Python
178 lines
7.5 KiB
Python
"""Schema for LLM request/response traces stored in ClickHouse for analytics."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
|
|
from pydantic import Field
|
|
|
|
from letta.helpers.datetime_helpers import get_utc_time
|
|
from letta.schemas.letta_base import LettaBase
|
|
|
|
|
|
class LLMTrace(LettaBase):
|
|
"""
|
|
LLM request/response trace for ClickHouse analytics.
|
|
|
|
Stores LLM request/response payloads with denormalized columns for
|
|
fast cost analytics queries (token usage by org/agent/model).
|
|
|
|
Attributes:
|
|
id (str): Unique trace identifier (UUID).
|
|
organization_id (str): The organization this trace belongs to.
|
|
project_id (str): The project this trace belongs to.
|
|
agent_id (str): ID of the agent that made the request.
|
|
run_id (str): ID of the run this trace is associated with.
|
|
step_id (str): ID of the step that generated this trace.
|
|
trace_id (str): OTEL trace ID for correlation.
|
|
|
|
call_type (str): Type of LLM call ('agent_step', 'summarization', 'embedding').
|
|
provider (str): LLM provider name ('openai', 'anthropic', etc.).
|
|
model (str): Model name/identifier used.
|
|
|
|
request_size_bytes (int): Size of request_json in bytes.
|
|
response_size_bytes (int): Size of response_json in bytes.
|
|
prompt_tokens (int): Number of prompt tokens used.
|
|
completion_tokens (int): Number of completion tokens generated.
|
|
total_tokens (int): Total tokens (prompt + completion).
|
|
latency_ms (int): Request latency in milliseconds.
|
|
|
|
is_error (bool): Whether the request resulted in an error.
|
|
error_type (str): Exception class name if error occurred.
|
|
error_message (str): Error message if error occurred.
|
|
|
|
request_json (str): Full request payload as JSON string.
|
|
response_json (str): Full response payload as JSON string.
|
|
|
|
created_at (datetime): Timestamp when the trace was created.
|
|
"""
|
|
|
|
__id_prefix__ = "llm_trace"
|
|
|
|
# Primary identifier (UUID portion of ProviderTrace.id, prefix stripped for ClickHouse)
|
|
id: str = Field(..., description="Trace UUID (strip 'provider_trace-' prefix to correlate)")
|
|
|
|
# Context identifiers
|
|
organization_id: str = Field(..., description="Organization this trace belongs to")
|
|
project_id: Optional[str] = Field(default=None, description="Project this trace belongs to")
|
|
agent_id: Optional[str] = Field(default=None, description="Agent that made the request")
|
|
agent_tags: list[str] = Field(default_factory=list, description="Tags associated with the agent")
|
|
run_id: Optional[str] = Field(default=None, description="Run this trace is associated with")
|
|
step_id: Optional[str] = Field(default=None, description="Step that generated this trace")
|
|
trace_id: Optional[str] = Field(default=None, description="OTEL trace ID for correlation")
|
|
|
|
# Request metadata (queryable)
|
|
call_type: str = Field(..., description="Type of LLM call: 'agent_step', 'summarization', 'embedding'")
|
|
provider: str = Field(..., description="LLM provider: 'openai', 'anthropic', 'google_ai', etc.")
|
|
model: str = Field(..., description="Model name/identifier")
|
|
is_byok: bool = Field(default=False, description="Whether this request used BYOK (Bring Your Own Key)")
|
|
|
|
# Size metrics
|
|
request_size_bytes: int = Field(default=0, description="Size of request_json in bytes")
|
|
response_size_bytes: int = Field(default=0, description="Size of response_json in bytes")
|
|
|
|
# Token usage
|
|
prompt_tokens: int = Field(default=0, description="Number of prompt tokens")
|
|
completion_tokens: int = Field(default=0, description="Number of completion tokens")
|
|
total_tokens: int = Field(default=0, description="Total tokens (prompt + completion)")
|
|
|
|
# Cache and reasoning tokens (from LettaUsageStatistics)
|
|
cached_input_tokens: Optional[int] = Field(default=None, description="Number of input tokens served from cache")
|
|
cache_write_tokens: Optional[int] = Field(default=None, description="Number of tokens written to cache (Anthropic)")
|
|
reasoning_tokens: Optional[int] = Field(default=None, description="Number of reasoning/thinking tokens generated")
|
|
|
|
# Latency
|
|
latency_ms: int = Field(default=0, description="Request latency in milliseconds")
|
|
|
|
# Error tracking
|
|
is_error: bool = Field(default=False, description="Whether the request resulted in an error")
|
|
error_type: Optional[str] = Field(default=None, description="Exception class name if error")
|
|
error_message: Optional[str] = Field(default=None, description="Error message if error")
|
|
|
|
# Raw payloads (JSON strings)
|
|
request_json: str = Field(..., description="Full request payload as JSON string")
|
|
response_json: str = Field(..., description="Full response payload as JSON string")
|
|
llm_config_json: str = Field(default="", description="LLM config as JSON string")
|
|
|
|
# Billing context
|
|
billing_plan_type: Optional[str] = Field(default=None, description="Subscription tier (e.g., 'basic', 'standard', 'max', 'enterprise')")
|
|
billing_cost_source: Optional[str] = Field(default=None, description="Cost source: 'quota' or 'credits'")
|
|
billing_customer_id: Optional[str] = Field(default=None, description="Customer ID for cross-referencing billing records")
|
|
|
|
# Timestamp
|
|
created_at: datetime = Field(default_factory=get_utc_time, description="When the trace was created")
|
|
|
|
def to_clickhouse_row(self) -> tuple:
|
|
"""Convert to a tuple for ClickHouse insertion."""
|
|
return (
|
|
self.id,
|
|
self.organization_id,
|
|
self.project_id or "",
|
|
self.agent_id or "",
|
|
self.agent_tags,
|
|
self.run_id or "",
|
|
self.step_id or "",
|
|
self.trace_id or "",
|
|
self.call_type,
|
|
self.provider,
|
|
self.model,
|
|
1 if self.is_byok else 0,
|
|
self.request_size_bytes,
|
|
self.response_size_bytes,
|
|
self.prompt_tokens,
|
|
self.completion_tokens,
|
|
self.total_tokens,
|
|
self.cached_input_tokens,
|
|
self.cache_write_tokens,
|
|
self.reasoning_tokens,
|
|
self.latency_ms,
|
|
1 if self.is_error else 0,
|
|
self.error_type or "",
|
|
self.error_message or "",
|
|
self.request_json,
|
|
self.response_json,
|
|
self.llm_config_json,
|
|
self.billing_plan_type or "",
|
|
self.billing_cost_source or "",
|
|
self.billing_customer_id or "",
|
|
self.created_at,
|
|
)
|
|
|
|
@classmethod
|
|
def clickhouse_columns(cls) -> list[str]:
|
|
"""Return column names for ClickHouse insertion."""
|
|
return [
|
|
"id",
|
|
"organization_id",
|
|
"project_id",
|
|
"agent_id",
|
|
"agent_tags",
|
|
"run_id",
|
|
"step_id",
|
|
"trace_id",
|
|
"call_type",
|
|
"provider",
|
|
"model",
|
|
"is_byok",
|
|
"request_size_bytes",
|
|
"response_size_bytes",
|
|
"prompt_tokens",
|
|
"completion_tokens",
|
|
"total_tokens",
|
|
"cached_input_tokens",
|
|
"cache_write_tokens",
|
|
"reasoning_tokens",
|
|
"latency_ms",
|
|
"is_error",
|
|
"error_type",
|
|
"error_message",
|
|
"request_json",
|
|
"response_json",
|
|
"llm_config_json",
|
|
"billing_plan_type",
|
|
"billing_cost_source",
|
|
"billing_customer_id",
|
|
"created_at",
|
|
]
|