From 0c06dbf047b6a1d6b90b39fdd507e09c48d2fbb0 Mon Sep 17 00:00:00 2001 From: cthomas Date: Wed, 12 Nov 2025 07:56:55 -0800 Subject: [PATCH] feat: remove ssl allocation from startup (#6127) --- letta/server/rest_api/routers/v1/messages.py | 2 ++ letta/server/server.py | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/letta/server/rest_api/routers/v1/messages.py b/letta/server/rest_api/routers/v1/messages.py index 1b4a2ca9..e4fd7f29 100644 --- a/letta/server/rest_api/routers/v1/messages.py +++ b/letta/server/rest_api/routers/v1/messages.py @@ -191,6 +191,8 @@ async def cancel_batch( # TODO: Extend to providers beyond anthropic # TODO: For now, we only support anthropic # Cancel the job + if server.anthropic_async_client is None: + raise HTTPException(status_code=501, detail="Batch job cancellation is not enabled") anthropic_batch_id = llm_batch_job.create_batch_response.id await server.anthropic_async_client.messages.batches.cancel(anthropic_batch_id) diff --git a/letta/server/server.py b/letta/server/server.py index 79968f5e..a834f4c9 100644 --- a/letta/server/server.py +++ b/letta/server/server.py @@ -184,10 +184,18 @@ class SyncServer(object): message_manager=self.message_manager, ) - # A resusable httpx client - timeout = httpx.Timeout(connect=10.0, read=20.0, write=10.0, pool=10.0) - limits = httpx.Limits(max_connections=100, max_keepalive_connections=80, keepalive_expiry=300) - self.httpx_client = httpx.AsyncClient(timeout=timeout, follow_redirects=True, limits=limits) + if settings.enable_batch_job_polling: + # A resusable httpx client + timeout = httpx.Timeout(connect=10.0, read=20.0, write=10.0, pool=10.0) + limits = httpx.Limits(max_connections=100, max_keepalive_connections=80, keepalive_expiry=300) + self.httpx_client = httpx.AsyncClient(timeout=timeout, follow_redirects=True, limits=limits) + + # TODO: Replace this with the Anthropic client we have in house + # Reuse the shared httpx client to prevent duplicate SSL contexts and connection pools + self.anthropic_async_client = AsyncAnthropic(http_client=self.httpx_client) + else: + self.httpx_client = None + self.anthropic_async_client = None # For MCP # TODO: remove this @@ -198,9 +206,6 @@ class SyncServer(object): self._llm_config_cache = {} self._embedding_config_cache = {} - # TODO: Replace this with the Anthropic client we have in house - self.anthropic_async_client = AsyncAnthropic() - # collect providers (always has Letta as a default) self._enabled_providers: List[Provider] = [LettaProvider(name="letta")] if model_settings.openai_api_key: