feat: add otel collector to prod deployment (#1320)

This commit is contained in:
cthomas
2025-03-17 16:31:54 -07:00
committed by GitHub
parent 6720ff6382
commit 6962a6bcb3
4 changed files with 76 additions and 34 deletions

View File

@@ -257,7 +257,7 @@ def create_application() -> "FastAPI":
# Set up OpenTelemetry tracing
otlp_endpoint = settings.otel_exporter_otlp_endpoint
if otlp_endpoint:
if otlp_endpoint and not settings.disable_tracing:
print(f"▶ Using OTLP tracing with endpoint: {otlp_endpoint}")
env_name_suffix = os.getenv("ENV_NAME")
service_name = f"letta-server-{env_name_suffix.lower()}" if env_name_suffix else "letta-server"

View File

@@ -180,6 +180,7 @@ class Settings(BaseSettings):
# telemetry logging
verbose_telemetry_logging: bool = False
otel_exporter_otlp_endpoint: str = "http://localhost:4317"
disable_tracing: bool = False
# uvicorn settings
uvicorn_workers: int = 1

View File

@@ -0,0 +1,74 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
filelog:
include:
- /root/.letta/logs/Letta.log
multiline:
line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}
operators:
# Extract timestamp and other fields
- type: regex_parser
regex: '^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+-\s+(?P<component>[\w\.-]+)\s+-\s+(?P<severity>\w+)\s+-\s+(?P<body>.*)$'
# Parse the timestamp
- type: time_parser
parse_from: attributes.timestamp
layout: '%Y-%m-%d %H:%M:%S,%L'
# Set severity
- type: severity_parser
parse_from: attributes.severity
mapping:
debug: DEBUG
info: INFO
warning: WARN
error: ERROR
critical: FATAL
# Add resource attributes
- type: add
field: resource.service_name
value: letta-server
- type: add
field: resource.environment
value: ${ENV_NAME}
processors:
batch:
timeout: 1s
send_batch_size: 1024
exporters:
clickhouse:
endpoint: ${CLICKHOUSE_ENDPOINT}
database: ${CLICKHOUSE_DATABASE}
username: ${CLICKHOUSE_USERNAME}
password: ${CLICKHOUSE_PASSWORD}
timeout: 5s
sending_queue:
queue_size: 100
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
service:
telemetry:
logs:
level: error
metrics:
level: debug
traces:
level: debug
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [clickhouse]
logs:
receivers: [filelog]
processors: [batch]
exporters: [clickhouse]

View File

@@ -5,35 +5,6 @@ receivers:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
filelog:
include:
- /root/.letta/logs/Letta.log
multiline:
line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}
operators:
# Extract timestamp and other fields
- type: regex_parser
regex: '^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+-\s+(?P<component>[\w\.-]+)\s+-\s+(?P<severity>\w+)\s+-\s+(?P<body>.*)$'
# Parse the timestamp
- type: time_parser
parse_from: attributes.timestamp
layout: '%Y-%m-%d %H:%M:%S,%L'
# Set severity
- type: severity_parser
parse_from: attributes.severity
mapping:
debug: DEBUG
info: INFO
warning: WARN
error: ERROR
critical: FATAL
# Add resource attributes
- type: add
field: resource.service_name
value: letta-server
- type: add
field: resource.environment
value: ${ENV_NAME}
processors:
batch:
@@ -70,7 +41,3 @@ service:
receivers: [otlp]
processors: [batch]
exporters: [file, clickhouse]
logs:
receivers: [filelog]
processors: [batch]
exporters: [clickhouse]