* feat: add billing context to LLM telemetry traces Add billing metadata (plan type, cost source, customer ID) to LLM traces in ClickHouse for cost analytics and attribution. **Data Flow:** - Cloud-API: Extract billing info from subscription in rate limiting, set x-billing-* headers - Core: Parse headers into BillingContext object via dependencies - Adapters: Flow billing_context through all LLM adapters (blocking & streaming) - Agent: Pass billing_context to step() and stream() methods - ClickHouse: Store in billing_plan_type, billing_cost_source, billing_customer_id columns **Changes:** - Add BillingContext schema to provider_trace.py - Add billing columns to llm_traces ClickHouse table DDL - Update getCustomerSubscription to fetch stripeCustomerId from organization_billing_details - Propagate billing_context through agent step flow, adapters, and streaming service - Update ProviderTrace and LLMTrace to include billing metadata - Regenerate SDK with autogen **Production Deployment:** Requires env vars: LETTA_PROVIDER_TRACE_BACKEND=clickhouse, LETTA_STORE_LLM_TRACES=true, CLICKHOUSE_* 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: add billing_context parameter to agent step methods - Add billing_context to BaseAgent and BaseAgentV2 abstract methods - Update LettaAgent, LettaAgentV2, LettaAgentV3 step methods - Update multi-agent groups: SleeptimeMultiAgentV2, V3, V4 - Fix test_utils.py to include billing header parameters - Import BillingContext in all affected files * fix: add billing_context to stream methods - Add billing_context parameter to BaseAgentV2.stream() - Add billing_context parameter to LettaAgentV2.stream() - LettaAgentV3.stream() already has it from previous commit * fix: exclude billing headers from OpenAPI spec Mark billing headers as internal (include_in_schema=False) so they don't appear in the public API. These are internal headers between cloud-api and core, not part of the public SDK. Regenerated SDK with stage-api - removes 10,650 lines of bloat that was causing OOM during Next.js build. * refactor: return billing context from handleUnifiedRateLimiting instead of mutating req Instead of passing req into handleUnifiedRateLimiting and mutating headers inside it: - Return billing context fields (billingPlanType, billingCostSource, billingCustomerId) from handleUnifiedRateLimiting - Set headers in handleMessageRateLimiting (middleware layer) after getting the result - This fixes step-orchestrator compatibility since it doesn't have a real Express req object * chore: remove extra gencode * p --------- Co-authored-by: Letta <noreply@letta.com>
91 lines
4.1 KiB
Python
91 lines
4.1 KiB
Python
from typing import TYPE_CHECKING, Optional
|
|
|
|
from fastapi import Header
|
|
from pydantic import BaseModel
|
|
|
|
from letta.errors import LettaInvalidArgumentError
|
|
from letta.otel.tracing import tracer
|
|
from letta.schemas.enums import PrimitiveType
|
|
from letta.schemas.provider_trace import BillingContext
|
|
from letta.validators import PRIMITIVE_ID_PATTERNS
|
|
|
|
if TYPE_CHECKING:
|
|
from letta.server.server import SyncServer
|
|
|
|
|
|
class ExperimentalParams(BaseModel):
|
|
"""Experimental parameters used across REST API endpoints."""
|
|
|
|
message_async: Optional[bool] = None
|
|
letta_v1_agent: Optional[bool] = None
|
|
letta_v1_agent_message_async: Optional[bool] = None
|
|
modal_sandbox: Optional[bool] = None
|
|
|
|
|
|
class HeaderParams(BaseModel):
|
|
"""Common header parameters used across REST API endpoints."""
|
|
|
|
actor_id: Optional[str] = None
|
|
user_agent: Optional[str] = None
|
|
project_id: Optional[str] = None
|
|
letta_source: Optional[str] = None
|
|
sdk_version: Optional[str] = None
|
|
experimental_params: Optional[ExperimentalParams] = None
|
|
billing_context: Optional[BillingContext] = None
|
|
|
|
|
|
def get_headers(
|
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
|
user_agent: Optional[str] = Header(None, alias="User-Agent"),
|
|
project_id: Optional[str] = Header(None, alias="X-Project-Id"),
|
|
letta_source: Optional[str] = Header(None, alias="X-Letta-Source", include_in_schema=False),
|
|
sdk_version: Optional[str] = Header(None, alias="X-Stainless-Package-Version", include_in_schema=False),
|
|
message_async: Optional[str] = Header(None, alias="X-Experimental-Message-Async", include_in_schema=False),
|
|
letta_v1_agent: Optional[str] = Header(None, alias="X-Experimental-Letta-V1-Agent", include_in_schema=False),
|
|
letta_v1_agent_message_async: Optional[str] = Header(
|
|
None, alias="X-Experimental-Letta-V1-Agent-Message-Async", include_in_schema=False
|
|
),
|
|
modal_sandbox: Optional[str] = Header(None, alias="X-Experimental-Modal-Sandbox", include_in_schema=False),
|
|
billing_plan_type: Optional[str] = Header(None, alias="X-Billing-Plan-Type", include_in_schema=False),
|
|
billing_cost_source: Optional[str] = Header(None, alias="X-Billing-Cost-Source", include_in_schema=False),
|
|
billing_customer_id: Optional[str] = Header(None, alias="X-Billing-Customer-Id", include_in_schema=False),
|
|
) -> HeaderParams:
|
|
"""Dependency injection function to extract common headers from requests."""
|
|
with tracer.start_as_current_span("dependency.get_headers"):
|
|
if actor_id is not None and PRIMITIVE_ID_PATTERNS[PrimitiveType.USER.value].match(actor_id) is None:
|
|
raise LettaInvalidArgumentError(
|
|
message=(f"Invalid user ID format: {actor_id}. Expected format: '{PrimitiveType.USER.value}-<uuid4>'"),
|
|
argument_name="user_id",
|
|
)
|
|
|
|
return HeaderParams(
|
|
actor_id=actor_id,
|
|
user_agent=user_agent,
|
|
project_id=project_id,
|
|
letta_source=letta_source,
|
|
sdk_version=sdk_version,
|
|
experimental_params=ExperimentalParams(
|
|
message_async=(message_async == "true") if message_async else None,
|
|
letta_v1_agent=(letta_v1_agent == "true") if letta_v1_agent else None,
|
|
letta_v1_agent_message_async=(letta_v1_agent_message_async == "true") if letta_v1_agent_message_async else None,
|
|
modal_sandbox=(modal_sandbox == "true") if modal_sandbox else None,
|
|
),
|
|
billing_context=BillingContext(
|
|
plan_type=billing_plan_type,
|
|
cost_source=billing_cost_source,
|
|
customer_id=billing_customer_id,
|
|
)
|
|
if any([billing_plan_type, billing_cost_source, billing_customer_id])
|
|
else None,
|
|
)
|
|
|
|
|
|
# TODO: why does this double up the interface?
|
|
async def get_letta_server() -> "SyncServer":
|
|
with tracer.start_as_current_span("dependency.get_letta_server"):
|
|
# Check if a global server is already instantiated
|
|
from letta.server.rest_api.app import server
|
|
|
|
# assert isinstance(server, SyncServer)
|
|
return server
|