diff --git a/letta/otel/resource.py b/letta/otel/resource.py index 8b2bf806..3932ca18 100644 --- a/letta/otel/resource.py +++ b/letta/otel/resource.py @@ -10,6 +10,34 @@ from letta.settings import settings _resources = {} +def _normalize_environment_tag(env: str) -> str: + """ + Normalize environment value for OTEL deployment.environment tag. + Maps internal environment values to abbreviated lowercase tags for Datadog. + + Examples: + PRODUCTION -> prod + DEV -> dev + CANARY -> canary + LOCAL-TEST -> local-test + """ + if not env: + return "unknown" + + env_upper = env.upper() + + # Map known values to abbreviated forms + if env_upper == "PRODUCTION": + return "prod" + elif env_upper == "DEV" or env_upper == "DEVELOPMENT": + return "dev" + elif env_upper == "STAGING": + return "dev" # Staging maps to dev + else: + # For other values (canary, local-test, etc.), use lowercase as-is + return env.lower() + + def get_resource(service_name: str) -> Resource: _env = settings.environment if (service_name, _env) not in _resources: @@ -18,6 +46,10 @@ def get_resource(service_name: str) -> Resource: "letta.version": letta_version, "host.name": socket.gethostname(), } + # Add deployment environment for Datadog APM filtering (normalized to abbreviated lowercase) + if _env: + resource_dict["deployment.environment"] = _normalize_environment_tag(_env) + # Only add device.id in non-production environments (for debugging) if _env != "PRODUCTION": resource_dict["device.id"] = uuid.getnode() # MAC address as unique device identifier, _resources[(service_name, _env)] = Resource.create(resource_dict) diff --git a/letta/server/rest_api/app.py b/letta/server/rest_api/app.py index 9fd753b3..70864df5 100644 --- a/letta/server/rest_api/app.py +++ b/letta/server/rest_api/app.py @@ -244,6 +244,16 @@ def create_application() -> "FastAPI": # Note: DD_LOGS_INJECTION, DD_APPSEC_ENABLED, DD_IAST_ENABLED, DD_APPSEC_SCA_ENABLED # are set via deployment configs and automatically picked up by ddtrace + # Initialize Datadog tracer for APM (distributed tracing) + import ddtrace + + ddtrace.patch_all() # Auto-instrument FastAPI, HTTP, DB, etc. + logger.info( + f"Datadog tracer initialized: env={dd_env}, " + f"service={telemetry_settings.datadog_service_name}, " + f"agent={telemetry_settings.datadog_agent_host}:{telemetry_settings.datadog_agent_port}" + ) + if telemetry_settings.datadog_profiling_enabled: from ddtrace.profiling import Profiler @@ -268,7 +278,7 @@ def create_application() -> "FastAPI": f"agent={telemetry_settings.datadog_agent_host}:{telemetry_settings.datadog_agent_port}{git_info}" ) except Exception as e: - logger.error(f"Failed to initialize Datadog profiling: {e}", exc_info=True) + logger.error(f"Failed to initialize Datadog tracing/profiling: {e}", exc_info=True) if SENTRY_ENABLED: sentry_sdk.capture_exception(e) # Don't fail application startup if Datadog initialization fails diff --git a/letta/settings.py b/letta/settings.py index 9f929eeb..ce096c8b 100644 --- a/letta/settings.py +++ b/letta/settings.py @@ -233,7 +233,10 @@ class Settings(BaseSettings): letta_dir: Optional[Path] = Field(Path.home() / ".letta", alias="LETTA_DIR") debug: Optional[bool] = False cors_origins: Optional[list] = cors_origins - environment: Optional[str] = Field(default=None, description="Application environment (PRODUCTION, DEV, etc.)") + environment: Optional[str] = Field( + default=None, + description="Application environment (PRODUCTION, DEV, CANARY, etc. - normalized to lowercase for OTEL tags)", + ) # SSE Streaming keepalive settings enable_keepalive: bool = Field(True, description="Enable keepalive messages in SSE streams to prevent timeouts")