From 6962a6bcb3f409a281ee552e83dbf59fa41e0a1a Mon Sep 17 00:00:00 2001 From: cthomas Date: Mon, 17 Mar 2025 16:31:54 -0700 Subject: [PATCH] feat: add otel collector to prod deployment (#1320) --- letta/server/rest_api/app.py | 2 +- letta/settings.py | 1 + otel-collector-config-clickhouse-prod.yaml | 74 ++++++++++++++++++++++ otel-collector-config-clickhouse.yaml | 33 ---------- 4 files changed, 76 insertions(+), 34 deletions(-) create mode 100644 otel-collector-config-clickhouse-prod.yaml diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py index 6212e584..c2b1c137 100644 --- a/letta/server/rest_api/app.py +++ b/letta/server/rest_api/app.py @@ -257,7 +257,7 @@ def create_application() -> "FastAPI": # Set up OpenTelemetry tracing otlp_endpoint = settings.otel_exporter_otlp_endpoint - if otlp_endpoint: + if otlp_endpoint and not settings.disable_tracing: print(f"▶ Using OTLP tracing with endpoint: {otlp_endpoint}") env_name_suffix = os.getenv("ENV_NAME") service_name = f"letta-server-{env_name_suffix.lower()}" if env_name_suffix else "letta-server" diff --git a/letta/settings.py b/letta/settings.py index 5acfd532..f6ade17e 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -180,6 +180,7 @@ class Settings(BaseSettings): # telemetry logging verbose_telemetry_logging: bool = False otel_exporter_otlp_endpoint: str = "http://localhost:4317" + disable_tracing: bool = False # uvicorn settings uvicorn_workers: int = 1 diff --git a/otel-collector-config-clickhouse-prod.yaml b/otel-collector-config-clickhouse-prod.yaml new file mode 100644 index 00000000..84cdcfc9 --- /dev/null +++ b/otel-collector-config-clickhouse-prod.yaml @@ -0,0 +1,74 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + filelog: + include: + - /root/.letta/logs/Letta.log + multiline: + line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} + operators: + # Extract timestamp and other fields + - type: regex_parser + regex: '^(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+-\s+(?P[\w\.-]+)\s+-\s+(?P\w+)\s+-\s+(?P.*)$' + # Parse the timestamp + - type: time_parser + parse_from: attributes.timestamp + layout: '%Y-%m-%d %H:%M:%S,%L' + # Set severity + - type: severity_parser + parse_from: attributes.severity + mapping: + debug: DEBUG + info: INFO + warning: WARN + error: ERROR + critical: FATAL + # Add resource attributes + - type: add + field: resource.service_name + value: letta-server + - type: add + field: resource.environment + value: ${ENV_NAME} + +processors: + batch: + timeout: 1s + send_batch_size: 1024 + +exporters: + clickhouse: + endpoint: ${CLICKHOUSE_ENDPOINT} + database: ${CLICKHOUSE_DATABASE} + username: ${CLICKHOUSE_USERNAME} + password: ${CLICKHOUSE_PASSWORD} + timeout: 5s + sending_queue: + queue_size: 100 + retry_on_failure: + enabled: true + initial_interval: 5s + max_interval: 30s + max_elapsed_time: 300s + +service: + telemetry: + logs: + level: error + metrics: + level: debug + traces: + level: debug + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [clickhouse] + logs: + receivers: [filelog] + processors: [batch] + exporters: [clickhouse] diff --git a/otel-collector-config-clickhouse.yaml b/otel-collector-config-clickhouse.yaml index 6840610f..aa00ce0d 100644 --- a/otel-collector-config-clickhouse.yaml +++ b/otel-collector-config-clickhouse.yaml @@ -5,35 +5,6 @@ receivers: endpoint: 0.0.0.0:4317 http: endpoint: 0.0.0.0:4318 - filelog: - include: - - /root/.letta/logs/Letta.log - multiline: - line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} - operators: - # Extract timestamp and other fields - - type: regex_parser - regex: '^(?P\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+-\s+(?P[\w\.-]+)\s+-\s+(?P\w+)\s+-\s+(?P.*)$' - # Parse the timestamp - - type: time_parser - parse_from: attributes.timestamp - layout: '%Y-%m-%d %H:%M:%S,%L' - # Set severity - - type: severity_parser - parse_from: attributes.severity - mapping: - debug: DEBUG - info: INFO - warning: WARN - error: ERROR - critical: FATAL - # Add resource attributes - - type: add - field: resource.service_name - value: letta-server - - type: add - field: resource.environment - value: ${ENV_NAME} processors: batch: @@ -70,7 +41,3 @@ service: receivers: [otlp] processors: [batch] exporters: [file, clickhouse] - logs: - receivers: [filelog] - processors: [batch] - exporters: [clickhouse]