feat: bake otel collector into letta image (#1292)

This commit is contained in:
cthomas
2025-03-14 16:04:03 -07:00
committed by GitHub
parent 56679d2cea
commit 0198201bbe
10 changed files with 141 additions and 59 deletions

View File

@@ -40,12 +40,22 @@ RUN poetry lock --no-update && \
# Runtime stage
FROM ankane/pgvector:v0.5.1 AS runtime
# Install Python packages
# Install Python packages and OpenTelemetry Collector
RUN apt-get update && apt-get install -y \
python3 \
python3-venv \
curl \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir -p /app
&& mkdir -p /app \
# Install OpenTelemetry Collector
&& curl -L https://github.com/open-telemetry/opentelemetry-collector-releases/releases/download/v0.96.0/otelcol-contrib_0.96.0_linux_amd64.tar.gz -o /tmp/otel-collector.tar.gz \
&& tar xzf /tmp/otel-collector.tar.gz -C /usr/local/bin \
&& rm /tmp/otel-collector.tar.gz \
&& mkdir -p /etc/otel
# Add OpenTelemetry Collector configs
COPY otel-collector-config-file.yaml /etc/otel/config-file.yaml
COPY otel-collector-config-clickhouse.yaml /etc/otel/config-clickhouse.yaml
ARG LETTA_ENVIRONMENT=PRODUCTION
ENV LETTA_ENVIRONMENT=${LETTA_ENVIRONMENT} \
@@ -54,7 +64,8 @@ ENV LETTA_ENVIRONMENT=${LETTA_ENVIRONMENT} \
POSTGRES_USER=letta \
POSTGRES_PASSWORD=letta \
POSTGRES_DB=letta \
COMPOSIO_DISABLE_VERSION_CHECK=true
COMPOSIO_DISABLE_VERSION_CHECK=true \
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
WORKDIR /app
@@ -64,7 +75,7 @@ COPY --from=builder /app .
# Copy initialization SQL if it exists
COPY init.sql /docker-entrypoint-initdb.d/
EXPOSE 8283 5432
EXPOSE 8283 5432 4317 4318
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
CMD ["./letta/server/startup.sh"]

View File

@@ -1,19 +0,0 @@
services:
letta_server:
environment:
- ENV_NAME=${ENV_NAME} # optional service name
- OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
otel-collector:
image: otel/opentelemetry-collector-contrib:0.92.0
command: ["--config=/etc/otel-collector-config.yaml"]
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml
environment:
- CLICKHOUSE_ENDPOINT=${CLICKHOUSE_ENDPOINT}
- CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE}
- CLICKHOUSE_USER=${CLICKHOUSE_USER}
- CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD}
ports:
- "4317:4317"
- "4318:4318"

View File

@@ -49,6 +49,10 @@ services:
- VLLM_API_BASE=${VLLM_API_BASE}
- OPENLLM_AUTH_TYPE=${OPENLLM_AUTH_TYPE}
- OPENLLM_API_KEY=${OPENLLM_API_KEY}
- CLICKHOUSE_ENDPOINT=${CLICKHOUSE_ENDPOINT}
- CLICKHOUSE_DATABASE=${CLICKHOUSE_DATABASE}
- CLICKHOUSE_USERNAME=${CLICKHOUSE_USERNAME}
- CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD}
# volumes:
# - ./configs/server_config.yaml:/root/.letta/config # config file
# - ~/.letta/credentials:/root/.letta/credentials # credentials file

View File

@@ -53,6 +53,26 @@ if [ "${SECURE:-false}" = "true" ]; then
CMD="$CMD --secure"
fi
# Start OpenTelemetry Collector in the background
if [ -n "$CLICKHOUSE_ENDPOINT" ] && [ -n "$CLICKHOUSE_PASSWORD" ]; then
echo "Starting OpenTelemetry Collector with Clickhouse export..."
CONFIG_FILE="/etc/otel/config-clickhouse.yaml"
else
echo "Starting OpenTelemetry Collector with file export only..."
CONFIG_FILE="/etc/otel/config-file.yaml"
fi
/usr/local/bin/otelcol-contrib --config "$CONFIG_FILE" &
OTEL_PID=$!
# Function to cleanup processes on exit
cleanup() {
echo "Shutting down..."
kill $OTEL_PID
wait $OTEL_PID
}
trap cleanup EXIT
echo "Starting Letta server at http://$HOST:$PORT..."
echo "Executing: $CMD"
exec $CMD

View File

@@ -59,7 +59,6 @@ from letta.services.passage_manager import PassageManager
from letta.services.source_manager import SourceManager
from letta.services.tool_manager import ToolManager
from letta.settings import settings
from letta.tracing import trace_method
from letta.utils import enforce_types, united_diff
logger = get_logger(__name__)
@@ -83,7 +82,6 @@ class AgentManager:
# ======================================================================================================================
# Basic CRUD operations
# ======================================================================================================================
@trace_method
@enforce_types
def create_agent(
self,
@@ -446,7 +444,6 @@ class AgentManager:
agent = AgentModel.read(db_session=session, name=agent_name, actor=actor)
return agent.to_pydantic()
@trace_method
@enforce_types
def delete_agent(self, agent_id: str, actor: PydanticUser) -> None:
"""

View File

@@ -173,6 +173,7 @@ class Settings(BaseSettings):
# telemetry logging
verbose_telemetry_logging: bool = False
otel_exporter_otlp_endpoint: str = "http://localhost:4317"
# uvicorn settings
uvicorn_workers: int = 1

View File

@@ -207,7 +207,7 @@ def log_event(name: str, attributes: Optional[Dict[str, Any]] = None, timestamp:
current_span = trace.get_current_span()
if current_span:
if timestamp is None:
timestamp = int(time.perf_counter_ns())
timestamp = time.time_ns()
def _safe_convert(v):
if isinstance(v, (str, bool, int, float)):

View File

@@ -0,0 +1,73 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
filelog:
include:
- /root/.letta/logs/Letta.log
multiline:
line_start_pattern: ^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}
operators:
# Extract timestamp and other fields
- type: regex_parser
regex: '^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+-\s+(?P<component>[\w\.-]+)\s+-\s+(?P<severity>\w+)\s+-\s+(?P<body>.*)$'
# Parse the timestamp
- type: time_parser
parse_from: attributes.timestamp
layout: '%Y-%m-%d %H:%M:%S,%L'
# Set severity
- type: severity_parser
parse_from: attributes.severity
mapping:
debug: DEBUG
info: INFO
warning: WARN
error: ERROR
critical: FATAL
# Add resource attributes
- type: add
field: resource.service_name
value: letta-server
- type: add
field: resource.environment
value: ${ENV_NAME}
processors:
batch:
timeout: 1s
send_batch_size: 1024
exporters:
file:
path: /root/.letta/logs/traces.json
rotation:
max_megabytes: 100
max_days: 7
max_backups: 5
clickhouse:
endpoint: ${CLICKHOUSE_ENDPOINT}
database: ${CLICKHOUSE_DATABASE}
username: ${CLICKHOUSE_USERNAME}
password: ${CLICKHOUSE_PASSWORD}
timeout: 5s
sending_queue:
queue_size: 100
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [file, clickhouse]
logs:
receivers: [filelog]
processors: [batch]
exporters: [clickhouse]

View File

@@ -0,0 +1,27 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
processors:
batch:
timeout: 1s
send_batch_size: 1024
exporters:
file:
path: /root/.letta/logs/traces.json
rotation:
max_megabytes: 100
max_days: 7
max_backups: 5
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [file]

View File

@@ -1,32 +0,0 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
processors:
batch:
timeout: 1s
send_batch_size: 1024
exporters:
clickhouse:
endpoint: ${CLICKHOUSE_ENDPOINT}
username: ${CLICKHOUSE_USER}
password: ${CLICKHOUSE_PASSWORD}
database: ${CLICKHOUSE_DATABASE}
timeout: 10s
retry_on_failure:
enabled: true
initial_interval: 5s
max_interval: 30s
max_elapsed_time: 300s
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [clickhouse]