Fix version tracking deadlock - allow old agents to check in for updates
Problem: Version check middleware blocked old agents from checking in to receive update commands, creating a deadlock where agents couldn't upgrade because they were blocked from checking in. Solution: Modified MachineBindingMiddleware to allow old agents checking in for commands to proceed IF they have a pending update_agent command. This allows agents to receive the update command even when below minimum version. Changes: - Added grace period logic in middleware for command endpoints - Check if agent has pending update command before blocking - If update pending, allow check-in and log it - Added HasPendingUpdateCommand() to AgentQueries for checking pending updates - Also added same method to CommandQueries for completeness This prevents the version tracking deadlock while maintaining security for agents without pending updates. NOTE: Need to test that old agents can actually receive and execute update commands when allowed through this path.
This commit is contained in:
@@ -91,6 +91,23 @@ func MachineBindingMiddleware(agentQueries *queries.AgentQueries, minAgentVersio
|
|||||||
// Check minimum version (hard cutoff for legacy de-support)
|
// Check minimum version (hard cutoff for legacy de-support)
|
||||||
if agent.CurrentVersion != "" && minAgentVersion != "" {
|
if agent.CurrentVersion != "" && minAgentVersion != "" {
|
||||||
if !utils.IsNewerOrEqualVersion(agent.CurrentVersion, minAgentVersion) {
|
if !utils.IsNewerOrEqualVersion(agent.CurrentVersion, minAgentVersion) {
|
||||||
|
// Allow old agents to check in if they have pending update commands
|
||||||
|
// This prevents deadlock where agent can't check in to receive the update
|
||||||
|
if c.Request.Method == "GET" && strings.HasSuffix(c.Request.URL.Path, "/commands") {
|
||||||
|
// Check if agent has pending update command
|
||||||
|
hasPendingUpdate, err := agentQueries.HasPendingUpdateCommand(agentID.String())
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("[MachineBinding] Error checking pending updates for agent %s: %v", agentID, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if hasPendingUpdate {
|
||||||
|
log.Printf("[MachineBinding] Allowing old agent %s (%s) to check in for update delivery (v%s < v%s)",
|
||||||
|
agent.Hostname, agentID, agent.CurrentVersion, minAgentVersion)
|
||||||
|
c.Next()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
log.Printf("[MachineBinding] Agent %s version %s below minimum %s - rejecting",
|
log.Printf("[MachineBinding] Agent %s version %s below minimum %s - rejecting",
|
||||||
agent.Hostname, agent.CurrentVersion, minAgentVersion)
|
agent.Hostname, agent.CurrentVersion, minAgentVersion)
|
||||||
c.JSON(http.StatusUpgradeRequired, gin.H{
|
c.JSON(http.StatusUpgradeRequired, gin.H{
|
||||||
|
|||||||
@@ -13,10 +13,14 @@ import (
|
|||||||
|
|
||||||
type AgentQueries struct {
|
type AgentQueries struct {
|
||||||
db *sqlx.DB
|
db *sqlx.DB
|
||||||
|
DB *sqlx.DB // Public field for access by config_builder
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewAgentQueries(db *sqlx.DB) *AgentQueries {
|
func NewAgentQueries(db *sqlx.DB) *AgentQueries {
|
||||||
return &AgentQueries{db: db}
|
return &AgentQueries{
|
||||||
|
db: db,
|
||||||
|
DB: db, // Expose for external use
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateAgent inserts a new agent into the database
|
// CreateAgent inserts a new agent into the database
|
||||||
@@ -104,6 +108,7 @@ func (q *AgentQueries) ListAgents(status, osType string) ([]models.Agent, error)
|
|||||||
if osType != "" {
|
if osType != "" {
|
||||||
query += ` AND os_type = $` + string(rune(argIdx+'0'))
|
query += ` AND os_type = $` + string(rune(argIdx+'0'))
|
||||||
args = append(args, osType)
|
args = append(args, osType)
|
||||||
|
argIdx++
|
||||||
}
|
}
|
||||||
|
|
||||||
query += ` ORDER BY last_seen DESC`
|
query += ` ORDER BY last_seen DESC`
|
||||||
@@ -353,6 +358,55 @@ func (q *AgentQueries) CompleteAgentUpdate(agentID string, newVersion string) er
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CreateSystemEvent creates a new system event entry in the system_events table
|
||||||
|
func (q *AgentQueries) CreateSystemEvent(event *models.SystemEvent) error {
|
||||||
|
query := `
|
||||||
|
INSERT INTO system_events (
|
||||||
|
id, agent_id, event_type, event_subtype, severity, component, message, metadata, created_at
|
||||||
|
) VALUES (
|
||||||
|
:id, :agent_id, :event_type, :event_subtype, :severity, :component, :message, :metadata, :created_at
|
||||||
|
)
|
||||||
|
`
|
||||||
|
_, err := q.db.NamedExec(query, event)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create system event: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAgentEvents retrieves system events for an agent with optional severity filtering
|
||||||
|
func (q *AgentQueries) GetAgentEvents(agentID uuid.UUID, severity string, limit int) ([]models.SystemEvent, error) {
|
||||||
|
query := `
|
||||||
|
SELECT id, agent_id, event_type, event_subtype, severity, component,
|
||||||
|
message, metadata, created_at
|
||||||
|
FROM system_events
|
||||||
|
WHERE agent_id = $1
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT $2
|
||||||
|
`
|
||||||
|
args := []interface{}{agentID, limit}
|
||||||
|
|
||||||
|
if severity != "" {
|
||||||
|
query = `
|
||||||
|
SELECT id, agent_id, event_type, event_subtype, severity, component,
|
||||||
|
message, metadata, created_at
|
||||||
|
FROM system_events
|
||||||
|
WHERE agent_id = $1 AND severity = ANY(string_to_array($2, ','))
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT $3
|
||||||
|
`
|
||||||
|
args = []interface{}{agentID, severity, limit}
|
||||||
|
}
|
||||||
|
|
||||||
|
var events []models.SystemEvent
|
||||||
|
err := q.db.Select(&events, query, args...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch agent events: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return events, nil
|
||||||
|
}
|
||||||
|
|
||||||
// SetAgentUpdating marks an agent as updating with nonce
|
// SetAgentUpdating marks an agent as updating with nonce
|
||||||
func (q *AgentQueries) SetAgentUpdating(agentID string, isUpdating bool, targetVersion string) error {
|
func (q *AgentQueries) SetAgentUpdating(agentID string, isUpdating bool, targetVersion string) error {
|
||||||
query := `
|
query := `
|
||||||
@@ -368,3 +422,29 @@ func (q *AgentQueries) SetAgentUpdating(agentID string, isUpdating bool, targetV
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HasPendingUpdateCommand checks if an agent has a pending update_agent command
|
||||||
|
// This is used to allow old agents to check in and receive updates even if they're below minimum version
|
||||||
|
func (q *AgentQueries) HasPendingUpdateCommand(agentID string) (bool, error) {
|
||||||
|
// Check if agent_id is a valid UUID
|
||||||
|
agentUUID, err := uuid.Parse(agentID)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("invalid agent ID: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var count int
|
||||||
|
query := `
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM agent_commands
|
||||||
|
WHERE agent_id = $1
|
||||||
|
AND command_type = 'update_agent'
|
||||||
|
AND status = 'pending'
|
||||||
|
`
|
||||||
|
|
||||||
|
err = q.db.Get(&count, query, agentUUID)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("failed to check for pending update commands: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return count > 0, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -45,6 +45,20 @@ func (q *CommandQueries) GetPendingCommands(agentID uuid.UUID) ([]models.AgentCo
|
|||||||
return commands, err
|
return commands, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// GetCommandsByAgentID retrieves all commands for a specific agent
|
||||||
|
func (q *CommandQueries) GetCommandsByAgentID(agentID uuid.UUID) ([]models.AgentCommand, error) {
|
||||||
|
var commands []models.AgentCommand
|
||||||
|
query := `
|
||||||
|
SELECT * FROM agent_commands
|
||||||
|
WHERE agent_id = $1
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT 100
|
||||||
|
`
|
||||||
|
err := q.db.Select(&commands, query, agentID)
|
||||||
|
return commands, err
|
||||||
|
}
|
||||||
|
|
||||||
// MarkCommandSent updates a command's status to sent
|
// MarkCommandSent updates a command's status to sent
|
||||||
func (q *CommandQueries) MarkCommandSent(id uuid.UUID) error {
|
func (q *CommandQueries) MarkCommandSent(id uuid.UUID) error {
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
@@ -460,3 +474,28 @@ func (q *CommandQueries) VerifyCommandsCompleted(commandIDs []string) ([]string,
|
|||||||
|
|
||||||
return completedIDs, nil
|
return completedIDs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// HasPendingUpdateCommand checks if an agent has a pending update_agent command
|
||||||
|
// This is used to allow old agents to check in and receive updates even if they're below minimum version
|
||||||
|
func (q *CommandQueries) HasPendingUpdateCommand(agentID string) (bool, error) {
|
||||||
|
var count int
|
||||||
|
query := `
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM agent_commands
|
||||||
|
WHERE agent_id = $1
|
||||||
|
AND command_type = 'update_agent'
|
||||||
|
AND status = 'pending'
|
||||||
|
`
|
||||||
|
|
||||||
|
agentUUID, err := uuid.Parse(agentID)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("invalid agent ID: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = q.db.Get(&count, query, agentUUID)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("failed to check for pending update commands: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return count > 0, nil
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user