From f792ab23c7437a61cca63b61fb8beb01e8a77294 Mon Sep 17 00:00:00 2001 From: Fimeg Date: Sat, 13 Dec 2025 10:55:11 -0500 Subject: [PATCH] Fix version tracking deadlock - allow old agents to check in for updates Problem: Version check middleware blocked old agents from checking in to receive update commands, creating a deadlock where agents couldn't upgrade because they were blocked from checking in. Solution: Modified MachineBindingMiddleware to allow old agents checking in for commands to proceed IF they have a pending update_agent command. This allows agents to receive the update command even when below minimum version. Changes: - Added grace period logic in middleware for command endpoints - Check if agent has pending update command before blocking - If update pending, allow check-in and log it - Added HasPendingUpdateCommand() to AgentQueries for checking pending updates - Also added same method to CommandQueries for completeness This prevents the version tracking deadlock while maintaining security for agents without pending updates. NOTE: Need to test that old agents can actually receive and execute update commands when allowed through this path. --- .../api/middleware/machine_binding.go | 17 ++++ .../internal/database/queries/agents.go | 82 ++++++++++++++++++- .../internal/database/queries/commands.go | 39 +++++++++ 3 files changed, 137 insertions(+), 1 deletion(-) diff --git a/aggregator-server/internal/api/middleware/machine_binding.go b/aggregator-server/internal/api/middleware/machine_binding.go index 3e35647..82f8f79 100644 --- a/aggregator-server/internal/api/middleware/machine_binding.go +++ b/aggregator-server/internal/api/middleware/machine_binding.go @@ -91,6 +91,23 @@ func MachineBindingMiddleware(agentQueries *queries.AgentQueries, minAgentVersio // Check minimum version (hard cutoff for legacy de-support) if agent.CurrentVersion != "" && minAgentVersion != "" { if !utils.IsNewerOrEqualVersion(agent.CurrentVersion, minAgentVersion) { + // Allow old agents to check in if they have pending update commands + // This prevents deadlock where agent can't check in to receive the update + if c.Request.Method == "GET" && strings.HasSuffix(c.Request.URL.Path, "/commands") { + // Check if agent has pending update command + hasPendingUpdate, err := agentQueries.HasPendingUpdateCommand(agentID.String()) + if err != nil { + log.Printf("[MachineBinding] Error checking pending updates for agent %s: %v", agentID, err) + } + + if hasPendingUpdate { + log.Printf("[MachineBinding] Allowing old agent %s (%s) to check in for update delivery (v%s < v%s)", + agent.Hostname, agentID, agent.CurrentVersion, minAgentVersion) + c.Next() + return + } + } + log.Printf("[MachineBinding] Agent %s version %s below minimum %s - rejecting", agent.Hostname, agent.CurrentVersion, minAgentVersion) c.JSON(http.StatusUpgradeRequired, gin.H{ diff --git a/aggregator-server/internal/database/queries/agents.go b/aggregator-server/internal/database/queries/agents.go index 39a5019..1eacc41 100644 --- a/aggregator-server/internal/database/queries/agents.go +++ b/aggregator-server/internal/database/queries/agents.go @@ -13,10 +13,14 @@ import ( type AgentQueries struct { db *sqlx.DB + DB *sqlx.DB // Public field for access by config_builder } func NewAgentQueries(db *sqlx.DB) *AgentQueries { - return &AgentQueries{db: db} + return &AgentQueries{ + db: db, + DB: db, // Expose for external use + } } // CreateAgent inserts a new agent into the database @@ -104,6 +108,7 @@ func (q *AgentQueries) ListAgents(status, osType string) ([]models.Agent, error) if osType != "" { query += ` AND os_type = $` + string(rune(argIdx+'0')) args = append(args, osType) + argIdx++ } query += ` ORDER BY last_seen DESC` @@ -353,6 +358,55 @@ func (q *AgentQueries) CompleteAgentUpdate(agentID string, newVersion string) er return nil } +// CreateSystemEvent creates a new system event entry in the system_events table +func (q *AgentQueries) CreateSystemEvent(event *models.SystemEvent) error { + query := ` + INSERT INTO system_events ( + id, agent_id, event_type, event_subtype, severity, component, message, metadata, created_at + ) VALUES ( + :id, :agent_id, :event_type, :event_subtype, :severity, :component, :message, :metadata, :created_at + ) + ` + _, err := q.db.NamedExec(query, event) + if err != nil { + return fmt.Errorf("failed to create system event: %w", err) + } + return nil +} + +// GetAgentEvents retrieves system events for an agent with optional severity filtering +func (q *AgentQueries) GetAgentEvents(agentID uuid.UUID, severity string, limit int) ([]models.SystemEvent, error) { + query := ` + SELECT id, agent_id, event_type, event_subtype, severity, component, + message, metadata, created_at + FROM system_events + WHERE agent_id = $1 + ORDER BY created_at DESC + LIMIT $2 + ` + args := []interface{}{agentID, limit} + + if severity != "" { + query = ` + SELECT id, agent_id, event_type, event_subtype, severity, component, + message, metadata, created_at + FROM system_events + WHERE agent_id = $1 AND severity = ANY(string_to_array($2, ',')) + ORDER BY created_at DESC + LIMIT $3 + ` + args = []interface{}{agentID, severity, limit} + } + + var events []models.SystemEvent + err := q.db.Select(&events, query, args...) + if err != nil { + return nil, fmt.Errorf("failed to fetch agent events: %w", err) + } + + return events, nil +} + // SetAgentUpdating marks an agent as updating with nonce func (q *AgentQueries) SetAgentUpdating(agentID string, isUpdating bool, targetVersion string) error { query := ` @@ -368,3 +422,29 @@ func (q *AgentQueries) SetAgentUpdating(agentID string, isUpdating bool, targetV return nil } + +// HasPendingUpdateCommand checks if an agent has a pending update_agent command +// This is used to allow old agents to check in and receive updates even if they're below minimum version +func (q *AgentQueries) HasPendingUpdateCommand(agentID string) (bool, error) { + // Check if agent_id is a valid UUID + agentUUID, err := uuid.Parse(agentID) + if err != nil { + return false, fmt.Errorf("invalid agent ID: %w", err) + } + + var count int + query := ` + SELECT COUNT(*) + FROM agent_commands + WHERE agent_id = $1 + AND command_type = 'update_agent' + AND status = 'pending' + ` + + err = q.db.Get(&count, query, agentUUID) + if err != nil { + return false, fmt.Errorf("failed to check for pending update commands: %w", err) + } + + return count > 0, nil +} diff --git a/aggregator-server/internal/database/queries/commands.go b/aggregator-server/internal/database/queries/commands.go index e1c1dd7..4764a3d 100644 --- a/aggregator-server/internal/database/queries/commands.go +++ b/aggregator-server/internal/database/queries/commands.go @@ -45,6 +45,20 @@ func (q *CommandQueries) GetPendingCommands(agentID uuid.UUID) ([]models.AgentCo return commands, err } + +// GetCommandsByAgentID retrieves all commands for a specific agent +func (q *CommandQueries) GetCommandsByAgentID(agentID uuid.UUID) ([]models.AgentCommand, error) { + var commands []models.AgentCommand + query := ` + SELECT * FROM agent_commands + WHERE agent_id = $1 + ORDER BY created_at DESC + LIMIT 100 + ` + err := q.db.Select(&commands, query, agentID) + return commands, err +} + // MarkCommandSent updates a command's status to sent func (q *CommandQueries) MarkCommandSent(id uuid.UUID) error { now := time.Now() @@ -460,3 +474,28 @@ func (q *CommandQueries) VerifyCommandsCompleted(commandIDs []string) ([]string, return completedIDs, nil } + +// HasPendingUpdateCommand checks if an agent has a pending update_agent command +// This is used to allow old agents to check in and receive updates even if they're below minimum version +func (q *CommandQueries) HasPendingUpdateCommand(agentID string) (bool, error) { + var count int + query := ` + SELECT COUNT(*) + FROM agent_commands + WHERE agent_id = $1 + AND command_type = 'update_agent' + AND status = 'pending' + ` + + agentUUID, err := uuid.Parse(agentID) + if err != nil { + return false, fmt.Errorf("invalid agent ID: %w", err) + } + + err = q.db.Get(&count, query, agentUUID) + if err != nil { + return false, fmt.Errorf("failed to check for pending update commands: %w", err) + } + + return count > 0, nil +}