Fix version tracking deadlock - allow old agents to check in for updates

Problem: Version check middleware blocked old agents from checking in to receive
update commands, creating a deadlock where agents couldn't upgrade because they
were blocked from checking in.

Solution: Modified MachineBindingMiddleware to allow old agents checking in for
commands to proceed IF they have a pending update_agent command. This allows
agents to receive the update command even when below minimum version.

Changes:
- Added grace period logic in middleware for command endpoints
- Check if agent has pending update command before blocking
- If update pending, allow check-in and log it
- Added HasPendingUpdateCommand() to AgentQueries for checking pending updates
- Also added same method to CommandQueries for completeness

This prevents the version tracking deadlock while maintaining security for
agents without pending updates.

NOTE: Need to test that old agents can actually receive and execute update
commands when allowed through this path.
This commit is contained in:
Fimeg
2025-12-13 10:55:11 -05:00
parent 40598c2203
commit f792ab23c7
3 changed files with 137 additions and 1 deletions

View File

@@ -13,10 +13,14 @@ import (
type AgentQueries struct {
db *sqlx.DB
DB *sqlx.DB // Public field for access by config_builder
}
func NewAgentQueries(db *sqlx.DB) *AgentQueries {
return &AgentQueries{db: db}
return &AgentQueries{
db: db,
DB: db, // Expose for external use
}
}
// CreateAgent inserts a new agent into the database
@@ -104,6 +108,7 @@ func (q *AgentQueries) ListAgents(status, osType string) ([]models.Agent, error)
if osType != "" {
query += ` AND os_type = $` + string(rune(argIdx+'0'))
args = append(args, osType)
argIdx++
}
query += ` ORDER BY last_seen DESC`
@@ -353,6 +358,55 @@ func (q *AgentQueries) CompleteAgentUpdate(agentID string, newVersion string) er
return nil
}
// CreateSystemEvent creates a new system event entry in the system_events table
func (q *AgentQueries) CreateSystemEvent(event *models.SystemEvent) error {
query := `
INSERT INTO system_events (
id, agent_id, event_type, event_subtype, severity, component, message, metadata, created_at
) VALUES (
:id, :agent_id, :event_type, :event_subtype, :severity, :component, :message, :metadata, :created_at
)
`
_, err := q.db.NamedExec(query, event)
if err != nil {
return fmt.Errorf("failed to create system event: %w", err)
}
return nil
}
// GetAgentEvents retrieves system events for an agent with optional severity filtering
func (q *AgentQueries) GetAgentEvents(agentID uuid.UUID, severity string, limit int) ([]models.SystemEvent, error) {
query := `
SELECT id, agent_id, event_type, event_subtype, severity, component,
message, metadata, created_at
FROM system_events
WHERE agent_id = $1
ORDER BY created_at DESC
LIMIT $2
`
args := []interface{}{agentID, limit}
if severity != "" {
query = `
SELECT id, agent_id, event_type, event_subtype, severity, component,
message, metadata, created_at
FROM system_events
WHERE agent_id = $1 AND severity = ANY(string_to_array($2, ','))
ORDER BY created_at DESC
LIMIT $3
`
args = []interface{}{agentID, severity, limit}
}
var events []models.SystemEvent
err := q.db.Select(&events, query, args...)
if err != nil {
return nil, fmt.Errorf("failed to fetch agent events: %w", err)
}
return events, nil
}
// SetAgentUpdating marks an agent as updating with nonce
func (q *AgentQueries) SetAgentUpdating(agentID string, isUpdating bool, targetVersion string) error {
query := `
@@ -368,3 +422,29 @@ func (q *AgentQueries) SetAgentUpdating(agentID string, isUpdating bool, targetV
return nil
}
// HasPendingUpdateCommand checks if an agent has a pending update_agent command
// This is used to allow old agents to check in and receive updates even if they're below minimum version
func (q *AgentQueries) HasPendingUpdateCommand(agentID string) (bool, error) {
// Check if agent_id is a valid UUID
agentUUID, err := uuid.Parse(agentID)
if err != nil {
return false, fmt.Errorf("invalid agent ID: %w", err)
}
var count int
query := `
SELECT COUNT(*)
FROM agent_commands
WHERE agent_id = $1
AND command_type = 'update_agent'
AND status = 'pending'
`
err = q.db.Get(&count, query, agentUUID)
if err != nil {
return false, fmt.Errorf("failed to check for pending update commands: %w", err)
}
return count > 0, nil
}