* feat: add metadata-only provider trace storage option Add support for writing provider traces to a lightweight metadata-only table (~1.5GB) instead of the full table (~725GB) since request/response JSON is now stored in GCS. - Add `LETTA_TELEMETRY_PROVIDER_TRACE_PG_METADATA_ONLY` setting - Create `provider_trace_metadata` table via alembic migration - Conditionally write to new table when flag is enabled - Include backfill script for migrating existing data 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * chore: regenerate API spec and SDK * fix: use composite PK (created_at, id) for provider_trace_metadata Aligns with GCS partitioning structure (raw/date=YYYY-MM-DD/{id}.json.gz) and enables efficient date-range queries via the B-tree index. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * ammendments * fix: add bulk data copy to migration Copy existing provider_traces metadata in-migration instead of separate backfill script. Creates indexes after bulk insert for better performance. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: remove data copy from migration, create empty table only Old data stays in provider_traces, new writes go to provider_trace_metadata when flag is enabled. Full traces are in GCS anyway. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: address PR comments - Remove GCS mention from ProviderTraceMetadata docstring - Move metadata object creation outside session context 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * fix: reads always use full provider_traces table The metadata_only flag should only control writes. Reads always go to the full table to avoid returning ProviderTraceMetadata where ProviderTrace is expected. 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> * feat: enable metadata-only provider trace writes in prod Add LETTA_TELEMETRY_PROVIDER_TRACE_PG_METADATA_ONLY=true to all Helm values (memgpt-server and lettuce-py, prod and dev). 🐾 Generated with [Letta Code](https://letta.com) Co-Authored-By: Letta <noreply@letta.com> --------- Co-authored-by: Letta <noreply@letta.com>
46 lines
2.3 KiB
Python
46 lines
2.3 KiB
Python
import uuid
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
|
|
from sqlalchemy import JSON, DateTime, Index, String, UniqueConstraint, func
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
|
|
from letta.orm.mixins import OrganizationMixin
|
|
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
|
from letta.schemas.provider_trace import ProviderTraceMetadata as PydanticProviderTraceMetadata
|
|
|
|
|
|
class ProviderTraceMetadata(SqlalchemyBase, OrganizationMixin):
|
|
"""Metadata-only provider trace storage (no request/response JSON)."""
|
|
|
|
__tablename__ = "provider_trace_metadata"
|
|
__pydantic_model__ = PydanticProviderTraceMetadata
|
|
__table_args__ = (
|
|
Index("ix_provider_trace_metadata_step_id", "step_id"),
|
|
UniqueConstraint("id", name="uq_provider_trace_metadata_id"),
|
|
)
|
|
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True), primary_key=True, server_default=func.now(), doc="Timestamp when the trace was created"
|
|
)
|
|
id: Mapped[str] = mapped_column(
|
|
String, primary_key=True, doc="Unique provider trace identifier", default=lambda: f"provider_trace-{uuid.uuid4()}"
|
|
)
|
|
step_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the step that this trace is associated with")
|
|
|
|
# Telemetry context fields
|
|
agent_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the agent that generated this trace")
|
|
agent_tags: Mapped[Optional[list]] = mapped_column(JSON, nullable=True, doc="Tags associated with the agent for filtering")
|
|
call_type: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="Type of call (agent_step, summarization, etc.)")
|
|
run_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the run this trace is associated with")
|
|
source: Mapped[Optional[str]] = mapped_column(
|
|
String, nullable=True, doc="Source service that generated this trace (memgpt-server, lettuce-py)"
|
|
)
|
|
|
|
# v2 protocol fields
|
|
org_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the organization")
|
|
user_id: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="ID of the user who initiated the request")
|
|
|
|
# Relationships
|
|
organization: Mapped["Organization"] = relationship("Organization", lazy="selectin")
|