import os from enum import Enum from pathlib import Path from typing import Optional from pydantic import AliasChoices, Field from pydantic_settings import BaseSettings, SettingsConfigDict # Load config file and apply to environment before settings are created # This allows YAML config values to be picked up by pydantic-settings from letta.config_file import apply_config_to_env from letta.schemas.enums import SandboxType from letta.services.summarizer.enums import SummarizationMode apply_config_to_env() # Define constants here to avoid circular import with letta.log DEFAULT_WRAPPER_NAME = "chatml" INNER_THOUGHTS_KWARG = "thinking" class ToolSettings(BaseSettings): # Sandbox Configurations e2b_api_key: str | None = Field(default=None, description="API key for using E2B as a tool sandbox") e2b_sandbox_template_id: str | None = Field(default=None, description="Template ID for E2B Sandbox. Updated Manually.") modal_token_id: str | None = Field(default=None, description="Token id for using Modal as a tool sandbox") modal_token_secret: str | None = Field(default=None, description="Token secret for using Modal as a tool sandbox") # Search Providers tavily_api_key: str | None = Field(default=None, description="API key for using Tavily as a search provider.") exa_api_key: str | None = Field(default=None, description="API key for using Exa as a search provider.") # Local Sandbox configurations tool_exec_dir: Optional[str] = None tool_sandbox_timeout: float = 180 tool_exec_venv_name: Optional[str] = None tool_exec_autoreload_venv: bool = True # MCP settings mcp_connect_to_server_timeout: float = 30.0 mcp_list_tools_timeout: float = 30.0 mcp_execute_tool_timeout: float = 60.0 mcp_read_from_config: bool = False # if False, will throw if attempting to read/write from file mcp_disable_stdio: bool = Field( default=True, description=( "Disable MCP stdio server type. When True (default), creating or connecting to " "MCP servers using stdio transport will fail. Stdio MCP servers spawn local " "processes, which is not suitable for multi-tenant or shared server deployments. " "Set to False for local or single-user deployments where stdio-based MCP servers " "are needed (e.g., running local tools via npx or uvx)." ), ) @property def modal_sandbox_enabled(self) -> bool: """Check if Modal credentials are configured.""" return bool(self.modal_token_id and self.modal_token_secret) @property def sandbox_type(self) -> SandboxType: """Default sandbox type based on available credentials. Note: Modal is checked separately via modal_sandbox_enabled property. This property determines the fallback behavior (E2B or LOCAL). """ if self.e2b_api_key: return SandboxType.E2B else: return SandboxType.LOCAL class SummarizerSettings(BaseSettings): model_config = SettingsConfigDict(env_prefix="letta_summarizer_", extra="ignore") # mode: SummarizationMode = SummarizationMode.STATIC_MESSAGE_BUFFER mode: SummarizationMode = SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER message_buffer_limit: int = 60 message_buffer_min: int = 15 enable_summarization: bool = True max_summarization_retries: int = 3 # partial evict summarizer percentage # eviction based on percentage of message count, not token count partial_evict_summarizer_percentage: float = 0.30 # TODO(cliandy): the below settings are tied to old summarization and should be deprecated or moved # Controls if we should evict all messages # TODO: Can refactor this into an enum if we have a bunch of different kinds of summarizers evict_all_messages: bool = False # The maximum number of retries for the summarizer # If we reach this cutoff, it probably means that the summarizer is not compressing down the in-context messages any further # And we throw a fatal error max_summarizer_retries: int = 3 # When to warn the model that a summarize command will happen soon # The amount of tokens before a system warning about upcoming truncation is sent to Letta memory_warning_threshold: float = 0.75 # Whether to send the system memory warning message send_memory_warning_message: bool = False # The desired memory pressure to summarize down to desired_memory_token_pressure: float = 0.3 # The number of messages at the end to keep # Even when summarizing, we may want to keep a handful of recent messages # These serve as in-context examples of how to use functions / what user messages look like keep_last_n_messages: int = 0 class ModelSettings(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="ignore") global_max_context_window_limit: int = 128000 inner_thoughts_kwarg: str | None = Field(default=INNER_THOUGHTS_KWARG, description="Key used for passing in inner thoughts.") # env_prefix='my_prefix_' # when we use /completions APIs (instead of /chat/completions), we need to specify a model wrapper # the "model wrapper" is responsible for prompt formatting and function calling parsing default_prompt_formatter: str = DEFAULT_WRAPPER_NAME # openai openai_api_key: Optional[str] = None openai_api_base: str = Field( default="https://api.openai.com/v1", # NOTE: We previously used OPENAI_API_BASE, but this was deprecated in favor of OPENAI_BASE_URL # preferred first, fallback second # env=["OPENAI_BASE_URL", "OPENAI_API_BASE"], # pydantic-settings v2 validation_alias=AliasChoices("OPENAI_BASE_URL", "OPENAI_API_BASE"), # pydantic-settings v1 ) # openrouter openrouter_api_key: Optional[str] = None # Optional additional headers recommended by OpenRouter # See https://openrouter.ai/docs/quick-start for details openrouter_referer: Optional[str] = None # e.g., your site URL openrouter_title: Optional[str] = None # e.g., your app name openrouter_handle_base: Optional[str] = None # deepseek deepseek_api_key: Optional[str] = None # xAI / Grok xai_api_key: Optional[str] = None # Z.ai (ZhipuAI) zai_api_key: Optional[str] = None zai_base_url: str = "https://api.z.ai/api/paas/v4/" # groq groq_api_key: Optional[str] = None # minimax minimax_api_key: Optional[str] = None # Bedrock aws_access_key_id: Optional[str] = None aws_secret_access_key: Optional[str] = None aws_default_region: str = "us-east-1" bedrock_anthropic_version: Optional[str] = "bedrock-2023-05-31" # anthropic anthropic_api_key: Optional[str] = None anthropic_max_retries: int = 3 anthropic_sonnet_1m: bool = Field( default=False, description=( "Enable 1M-token context window for Claude Sonnet 4/4.5/4.6. When true, adds the" " 'context-1m-2025-08-07' beta to Anthropic requests and sets model context_window" " to 1,000,000 instead of 200,000. Note: This feature is in beta and not available" " to all orgs; once GA, this flag can be removed and behavior can default to on." ), alias="ANTHROPIC_SONNET_1M", ) anthropic_opus_1m: bool = Field( default=False, description=( "Enable 1M-token context window for Claude Opus 4.6. When true, adds the" " 'context-1m-2025-08-07' beta to Anthropic requests and sets model context_window" " to 1,000,000 instead of 200,000. Note: This feature is in beta and not available" " to all orgs; once GA, this flag can be removed and behavior can default to on." ), alias="ANTHROPIC_OPUS_1M", ) # ollama ollama_base_url: Optional[str] = None # azure azure_api_key: Optional[str] = None azure_base_url: Optional[str] = None # We provide a default here, since usually people will want to be on the latest API version. azure_api_version: Optional[str] = ( "2024-09-01-preview" # https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation ) # google ai gemini_api_key: Optional[str] = None gemini_base_url: str = "https://generativelanguage.googleapis.com/" gemini_force_minimum_thinking_budget: bool = False gemini_max_retries: int = 5 gemini_timeout_seconds: float = 600.0 # google vertex google_cloud_project: Optional[str] = None google_cloud_location: Optional[str] = None # together together_api_key: Optional[str] = None # vLLM vllm_api_base: Optional[str] = None vllm_handle_base: Optional[str] = None # SGLang sglang_api_base: Optional[str] = None sglang_handle_base: Optional[str] = None # lmstudio lmstudio_base_url: Optional[str] = None # openllm openllm_auth_type: Optional[str] = None openllm_api_key: Optional[str] = None env_cors_origins = os.getenv("ACCEPTABLE_ORIGINS") cors_origins = [ "http://letta.localhost", "http://localhost:8283", "http://localhost:8083", "http://localhost:3000", "http://localhost:4200", ] # attach the env_cors_origins to the cors_origins if it exists if env_cors_origins: cors_origins.extend(env_cors_origins.split(",")) # read pg_uri from ~/.letta/pg_uri or set to none, this is to support Letta Desktop default_pg_uri = None ## check if --use-file-pg-uri is passed import sys if "--use-file-pg-uri" in sys.argv: try: with open(Path.home() / ".letta/pg_uri", "r") as f: default_pg_uri = f.read() print(f"Read pg_uri from ~/.letta/pg_uri: {default_pg_uri}") except FileNotFoundError: pass class DatabaseChoice(str, Enum): POSTGRES = "postgres" SQLITE = "sqlite" class Settings(BaseSettings): model_config = SettingsConfigDict(env_prefix="letta_", extra="ignore") letta_dir: Optional[Path] = Field(Path.home() / ".letta", alias="LETTA_DIR") debug: Optional[bool] = False cors_origins: Optional[list] = cors_origins environment: Optional[str] = Field( default=None, description="Application environment (prod, dev, canary, etc. - lowercase values used for OTEL tags)", ) # SSE Streaming keepalive settings enable_keepalive: bool = Field(True, description="Enable keepalive messages in SSE streams to prevent timeouts") keepalive_interval: float = Field(20.0, description="Seconds between keepalive messages (default: 20)") # SSE Streaming cancellation settings enable_cancellation_aware_streaming: bool = Field(True, description="Enable cancellation aware streaming") # default handles default_llm_handle: Optional[str] = None default_embedding_handle: Optional[str] = None # database configuration pg_db: Optional[str] = None pg_user: Optional[str] = None pg_password: Optional[str] = None pg_host: Optional[str] = None pg_port: Optional[int] = None pg_uri: Optional[str] = default_pg_uri # option to specify full uri pg_pool_size: int = 25 # Concurrent connections pg_max_overflow: int = 10 # Overflow limit pg_pool_timeout: int = 30 # Seconds to wait for a connection pg_pool_recycle: int = 1800 # When to recycle connections pg_echo: bool = False # Logging pool_pre_ping: bool = True # Pre ping to check for dead connections pool_use_lifo: bool = True disable_sqlalchemy_pooling: bool = True db_max_concurrent_sessions: Optional[int] = None redis_host: Optional[str] = Field(default=None, description="Host for Redis instance") redis_port: Optional[int] = Field(default=6379, description="Port for Redis instance") plugin_register: Optional[str] = None # Object storage (used for git-backed memory repos) # # Prefer configuring a single URI rather than multiple provider-specific env vars. # Example: # LETTA_OBJECT_STORE_URI="gs://my-bucket/repository?project=my-gcp-project" object_store_uri: str | None = Field( default=None, validation_alias=AliasChoices("LETTA_OBJECT_STORE_URI"), description="Object store URI for memory repositories (e.g., gs://bucket/prefix?project=...).", ) # Optional overrides for URI query params. These are primarily useful for deployments # where you want to keep the URI stable but inject environment-specific settings. object_store_project: str | None = Field( default=None, validation_alias=AliasChoices("LETTA_OBJECT_STORE_PROJECT"), description="Optional project override for object store clients (e.g., GCS project).", ) # memfs service URL - when set, git memory operations are proxied to the memfs service # instead of running locally. This enables separating git/GCS operations into a dedicated service. memfs_service_url: str | None = Field( default=None, validation_alias=AliasChoices("LETTA_MEMFS_SERVICE_URL"), description="URL of the memfs service (e.g., http://memfs-py:8285). When set, git memory operations use this service.", ) # multi agent settings multi_agent_send_message_max_retries: int = 3 multi_agent_send_message_timeout: int = 20 * 60 multi_agent_concurrent_sends: int = 50 # telemetry logging otel_exporter_otlp_endpoint: str | None = None # otel default: "http://localhost:4317" # clickhouse (for OTEL traces reader) clickhouse_endpoint: str | None = Field( default=None, validation_alias=AliasChoices("CLICKHOUSE_ENDPOINT", "letta_clickhouse_endpoint"), description="ClickHouse endpoint URL", ) clickhouse_database: str | None = Field( default="otel", validation_alias=AliasChoices("CLICKHOUSE_DATABASE", "letta_clickhouse_database"), description="ClickHouse database name", ) clickhouse_username: str | None = Field( default="default", validation_alias=AliasChoices("CLICKHOUSE_USERNAME", "letta_clickhouse_username"), description="ClickHouse username", ) clickhouse_password: str | None = Field( default=None, validation_alias=AliasChoices("CLICKHOUSE_PASSWORD", "letta_clickhouse_password"), description="ClickHouse password", ) otel_preferred_temporality: int | None = Field( default=1, ge=0, le=2, description="Exported metric temporality. {0: UNSPECIFIED, 1: DELTA, 2: CUMULATIVE}" ) disable_tracing: bool = Field(default=False, description="Disable OTEL Tracing") llm_api_logging: bool = Field(default=True, description="Enable LLM API logging at each step") track_last_agent_run: bool = Field(default=False, description="Update last agent run metrics") track_errored_messages: bool = Field(default=True, description="Enable tracking for errored messages") track_stop_reason: bool = Field(default=True, description="Enable tracking stop reason on steps.") track_agent_run: bool = Field(default=True, description="Enable tracking agent run with cancellation support") track_provider_trace: bool = Field(default=True, description="Enable tracking raw llm request and response at each step") # LLM trace storage for analytics (direct ClickHouse, bypasses OTEL for large payloads) # TTL is configured in the ClickHouse DDL (default 90 days) store_llm_traces: bool = Field( default=False, description="Enable storing LLM traces in ClickHouse for cost analytics", ) # FastAPI Application Settings uvicorn_workers: int = 1 uvicorn_reload: bool = False uvicorn_timeout_keep_alive: int = 5 use_uvloop: bool = Field(default=False, description="Enable uvloop as asyncio event loop.") use_granian: bool = Field(default=False, description="Use Granian for workers") sqlalchemy_tracing: bool = False # event loop parallelism event_loop_threadpool_max_workers: int = 43 # experimental toggle use_vertex_structured_outputs_experimental: bool = False use_asyncio_shield: bool = True # Gate using Temporal (Lettuce) for file uploads via folders endpoint use_lettuce_for_file_uploads: bool = False # Database pool monitoring enable_db_pool_monitoring: bool = True # Enable connection pool monitoring db_pool_monitoring_interval: int = 30 # Seconds between pool stats collection # cron job parameters enable_batch_job_polling: bool = False poll_running_llm_batches_interval_seconds: int = 5 * 60 poll_lock_retry_interval_seconds: int = 8 * 60 batch_job_polling_lookback_weeks: int = 2 batch_job_polling_batch_size: Optional[int] = None # for OCR mistral_api_key: Optional[str] = None # LLM request timeout settings (model + embedding model) llm_request_timeout_seconds: float = Field(default=60.0, ge=10.0, le=1800.0, description="Timeout for LLM requests in seconds") llm_stream_timeout_seconds: float = Field( default=600.0, ge=10.0, le=1800.0, description="Timeout for LLM streaming requests in seconds" ) # For embeddings enable_pinecone: bool = False pinecone_api_key: Optional[str] = None pinecone_source_index: Optional[str] = "sources" pinecone_agent_index: Optional[str] = "recall" upsert_pinecone_indices: bool = False # For tpuf - currently only for archival memories use_tpuf: bool = False tpuf_api_key: Optional[str] = None tpuf_region: str = "gcp-us-central1" embed_all_messages: bool = False embed_tools: bool = False # For encryption encryption_key: Optional[str] = None # File processing timeout settings file_processing_timeout_minutes: int = 30 file_processing_timeout_error_message: str = "File processing timed out after {} minutes. Please try again." # Letta client settings for tool execution default_base_url: str = Field(default="http://localhost:8283", description="Default base URL for Letta client in tool execution") default_token: Optional[str] = Field(default=None, description="Default token for Letta client in tool execution") # enabling letta_agent_v1 architecture use_letta_v1_agent: bool = False # Archival memory token limit archival_memory_token_limit: int = 8192 # Security: Disable default actor fallback no_default_actor: bool = Field( default=False, description="When true, prevents fallback to default actor in get_actor_or_default_async. Raises NoResultFound if actor_id is None.", ) @property def letta_pg_uri(self) -> str: if self.pg_uri: return self.pg_uri elif self.pg_db and self.pg_user and self.pg_password and self.pg_host and self.pg_port: return f"postgresql+pg8000://{self.pg_user}:{self.pg_password}@{self.pg_host}:{self.pg_port}/{self.pg_db}" else: return "postgresql+pg8000://letta:letta@localhost:5432/letta" # add this property to avoid being returned the default # reference: https://github.com/letta-ai/letta/issues/1362 @property def letta_pg_uri_no_default(self) -> str: if self.pg_uri: return self.pg_uri elif self.pg_db and self.pg_user and self.pg_password and self.pg_host and self.pg_port: return f"postgresql+pg8000://{self.pg_user}:{self.pg_password}@{self.pg_host}:{self.pg_port}/{self.pg_db}" else: return None @property def database_engine(self) -> DatabaseChoice: return DatabaseChoice.POSTGRES if self.letta_pg_uri_no_default else DatabaseChoice.SQLITE @property def plugin_register_dict(self) -> dict: plugins = {} if self.plugin_register: for plugin in self.plugin_register.split(";"): name, target = plugin.split("=") plugins[name] = {"target": target} return plugins @property def use_clickhouse_for_provider_traces(self) -> bool: """Check if ClickHouse backend is configured for provider traces.""" # Access global telemetry_settings (defined at module level after this class) import sys module = sys.modules[__name__] return "clickhouse" in getattr(module, "telemetry_settings").provider_trace_backends class TestSettings(Settings): model_config = SettingsConfigDict(env_prefix="letta_test_", extra="ignore") letta_dir: Path | None = Field(Path.home() / ".letta/test", alias="LETTA_TEST_DIR") class LogSettings(BaseSettings): model_config = SettingsConfigDict(env_prefix="letta_logging_", extra="ignore") debug: bool = Field(default=False, description="Enable debugging for logging") json_logging: bool = Field( default=False, description="Enable structured JSON logging (recommended).", ) log_level: str | None = Field("WARNING", description="Logging level") letta_log_path: Path | None = Field(Path.home() / ".letta" / "logs" / "Letta.log") verbose_telemetry_logging: bool = Field(default=False) class TelemetrySettings(BaseSettings): """Configuration for telemetry and observability integrations.""" model_config = SettingsConfigDict(env_prefix="letta_telemetry_", extra="ignore") # Datadog APM and Profiling enable_datadog: bool = Field(default=False, description="Enable Datadog profiling. Environment is pulled from settings.environment.") datadog_agent_host: str = Field( default="localhost", description="Datadog agent hostname or IP address. Use service name for Kubernetes (e.g., 'datadog-cluster-agent').", ) datadog_agent_port: int = Field(default=8126, ge=1, le=65535, description="Datadog trace agent port (typically 8126 for traces).") datadog_service_name: str = Field(default="letta-server", description="Service name for Datadog profiling.") datadog_profiling_enabled: bool = Field(default=False, description="Enable Datadog profiling.") datadog_profiling_memory_enabled: bool = Field(default=False, description="Enable memory profiling in Datadog.") datadog_profiling_heap_enabled: bool = Field(default=False, description="Enable heap profiling in Datadog.") # Datadog Source Code Integration (optional, tightly coupled with profiling) # These settings link profiling data and traces to specific Git commits, # enabling code navigation directly from Datadog UI to GitHub/GitLab. datadog_git_repository_url: str | None = Field( default=None, validation_alias=AliasChoices("DD_GIT_REPOSITORY_URL", "datadog_git_repository_url"), description="Git repository URL (e.g., 'https://github.com/org/repo'). Set at build time.", ) datadog_git_commit_sha: str | None = Field( default=None, validation_alias=AliasChoices("DD_GIT_COMMIT_SHA", "datadog_git_commit_sha"), description="Git commit SHA for the deployed code. Set at build time with 'git rev-parse HEAD'.", ) datadog_main_package: str = Field( default="letta", validation_alias=AliasChoices("DD_MAIN_PACKAGE", "datadog_main_package"), description="Primary Python package name for source code linking. Datadog uses this setting to determine which code is 'yours' vs. third-party dependencies.", ) # Provider trace backend selection (comma-separated for multi-backend support) provider_trace_backend: str = Field( default="postgres", description="Provider trace storage backends (comma-separated): 'postgres', 'clickhouse', 'socket'. Example: 'postgres,socket' for dual-write.", ) socket_path: str = Field( default="/var/run/telemetry/telemetry.sock", validation_alias=AliasChoices("TELEMETRY_SOCKET", "socket_path"), description="Unix socket path for socket backend.", ) source: str | None = Field( default=None, description="Source identifier for telemetry (memgpt-server, lettuce-py, etc.).", ) provider_trace_pg_metadata_only: bool = Field( default=False, description="Write only metadata to Postgres (no request/response JSON). Requires provider_trace_metadata table to exist.", ) @property def provider_trace_backends(self) -> list[str]: """Parse comma-separated backend list.""" return [b.strip() for b in self.provider_trace_backend.split(",") if b.strip()] @property def socket_backend_enabled(self) -> bool: """Check if socket backend is enabled.""" return "socket" in self.provider_trace_backends # singleton settings = Settings(_env_parse_none_str="None") test_settings = TestSettings() model_settings = ModelSettings() tool_settings = ToolSettings() summarizer_settings = SummarizerSettings() log_settings = LogSettings() telemetry_settings = TelemetrySettings()