From 43aa97b7d2d7ece18efc3c3de62081e2cd6d1b9e Mon Sep 17 00:00:00 2001 From: Kian Jones <11655409+kianjones9@users.noreply.github.com> Date: Wed, 10 Dec 2025 16:56:33 -0800 Subject: [PATCH] fix: prevent db connection pool exhaustion in MCP tool creation (#6621) Problem: When creating an MCP server with many tools, the code used asyncio.gather to create all tools concurrently. Each tool creation involves database operations (INSERT with upsert logic), leading to N concurrent database connections. Example: An MCP server with 50 tools creates 50 simultaneous database connections during server creation, exhausting the connection pool. Root cause: asyncio.gather(*[create_mcp_tool_async(...) for tool in tools]) processes all tool creations concurrently, each opening a DB session. Solution: Create tools sequentially instead of concurrently. While this takes longer for server creation, it prevents database connection pool exhaustion and maintains error handling by catching exceptions per tool. Changes: - apps/core/letta/services/mcp_manager.py: - Replaced asyncio.gather with sequential for loop - Maintained return_exceptions=True behavior with try/except - Added explanatory comment about db pool exhaustion prevention Impact: With 50 MCP tools: - Before: 50 concurrent DB connections (pool exhaustion) - After: 1 DB connection at a time (no pool exhaustion) Note: This follows the same pattern as PR #6617, #6619, and #6620 which fixed similar issues in file operations, multi-agent execution, and file status checks. --- letta/services/mcp_manager.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/letta/services/mcp_manager.py b/letta/services/mcp_manager.py index b344ec3f..37f2d33d 100644 --- a/letta/services/mcp_manager.py +++ b/letta/services/mcp_manager.py @@ -492,17 +492,23 @@ class MCPManager: # Filter out invalid tools valid_tools = [tool for tool in mcp_tools if not (tool.health and tool.health.status == "INVALID")] - # Register in parallel + # Register tools sequentially to avoid exhausting database connection pool + # When an MCP server has many tools (e.g., 50+), concurrent tool creation can create + # too many simultaneous database connections, causing pool exhaustion errors if valid_tools: - tool_tasks = [] + results = [] for mcp_tool in valid_tools: tool_create = ToolCreate.from_mcp(mcp_server_name=created_server.server_name, mcp_tool=mcp_tool) - task = self.tool_manager.create_mcp_tool_async( - tool_create=tool_create, mcp_server_name=created_server.server_name, mcp_server_id=created_server.id, actor=actor - ) - tool_tasks.append(task) - - results = await asyncio.gather(*tool_tasks, return_exceptions=True) + try: + result = await self.tool_manager.create_mcp_tool_async( + tool_create=tool_create, + mcp_server_name=created_server.server_name, + mcp_server_id=created_server.id, + actor=actor, + ) + results.append(result) + except Exception as e: + results.append(e) successful = sum(1 for r in results if not isinstance(r, Exception)) failed = len(results) - successful