Files
Redflag/docs/historical/OPTION_B_IMPLEMENTATION_PLAN.md

11 KiB

Option B: Remove scan_updates - Complete Implementation Plan

Date: December 22, 2025 Version: v0.1.28 Objective: Remove monolithic scan_updates, enforce individual subsystem scanning


Executive Summary

Remove the old scan_updates command type entirely. Enforce use of individual subsystem scans (scan_dnf, scan_apt, scan_docker, etc.) across the entire stack.

Impact: Breaking change requiring frontend updates Benefit: Eliminates confusion, forces explicit subsystem selection


Phase 1: Remove Server Endpoint (10 minutes)

1.1 Delete TriggerScan Handler

File: aggregator-server/internal/api/handlers/agents.go:744-776

// DELETE ENTIRE FUNCTION (lines 744-776)
// Function: TriggerScan(c *gin.Context)
// Purpose: Creates monolithic scan_updates command

// Remove from file:
func (h *AgentHandler) TriggerScan(c *gin.Context) {
    var req struct {
        AgentIDs []uuid.UUID `json:"agent_ids" binding:"required"`
    }

    if err := c.ShouldBindJSON(&req); err != nil {
        c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request"})
        return
    }

    // ... rest of function ...
}

1.2 Remove Route Registration

File: aggregator-server/cmd/server/main.go:484

// REMOVE THIS LINE:
dashboard.POST("/agents/:id/scan", agentHandler.TriggerScan)

// Verify no other routes reference TriggerScan
// Search: grep -r "TriggerScan" aggregator-server/

Phase 2: Fix Docker Handler Command Type (2 minutes)

2.1 Update Command Type for Docker Updates

File: aggregator-server/internal/api/handlers/docker.go:461

// BEFORE (Line 461):
CommandType: models.CommandTypeScanUpdates, // Reuse scan for Docker updates

// AFTER:
CommandType: models.CommandTypeInstallUpdate, // Install Docker image update

Rationale: Docker updates are installations, not scans


Phase 3: Create Migration 024 (5 minutes)

3.1 Create Migration File

File: aggregator-server/internal/database/migrations/024_disable_updates_subsystem.up.sql

-- Migration: Disable legacy updates subsystem
-- Purpose: Clean up from monolithic scan_updates to individual scanners
-- Version: 0.1.28
-- Date: 2025-12-22

-- Disable all 'updates' subsystems (legacy monolithic scanner)
UPDATE agent_subsystems
SET enabled = false,
    auto_run = false,
    deprecated = true,
    updated_at = NOW()
WHERE subsystem = 'updates';

-- Add comment tracking this migration
COMMENT ON TABLE agent_subsystems IS 'Agent subsystems configuration. Legacy updates subsystem disabled in v0.1.28';

-- Log migration completion
INSERT INTO schema_migrations (version) VALUES
('024_disable_updates_subsystem.up.sql');

3.2 Create Down Migration

File: aggregator-server/internal/database/migrations/024_disable_updates_subsystem.down.sql

-- Re-enable updates subsystem (rollback)
UPDATE agent_subsystems
SET enabled = true,
    auto_run = true,
    deprecated = false,
    updated_at = NOW()
WHERE subsystem = 'updates';

Phase 4: Remove Agent Console Support (5 minutes)

4.1 Remove scan_updates from Console Agent

File: aggregator-agent/cmd/agent/main.go:1041-1090

// REMOVE THIS CASE (approximately lines 1041-1090):
case "scan_updates":
    log.Printf("Received scan updates command")

    // Report starting scan
    logReport.Subsystem = "updates"
    logReport.Metadata = map[string]string{
        "scanner_type": "bulk",
        "scanners":     "apt,dnf,windows,winget",
    }

    // Run orchestrated scan
    results, err := scanOrchestrator.ScanAll(ctx)
    if err != nil {
        log.Printf("ScanAll failed: %v", err)
        return fmt.Errorf("scan failed: %w", err)
    }
    // ... rest of handler ...

Phase 5: Remove Agent Windows Service Support (15 minutes)

5.1 Remove scan_updates from Windows Service

File: aggregator-agent/internal/service/windows.go:233-410

// REMOVE THIS CASE (lines 233-410):
case "scan_updates":
    log.Printf("Windows service received scan updates command")

    h.logScanAttempt(cmd.CommandType, agentID)

    ctx, cancel := context.WithTimeout(context.Background(), cmd.Timeout)
    defer cancel()

    results := []orchestrator.ScanResult{}

    // APT scanner (if available)
    if scanner := scanOrchestrator.GetScanner("apt"); scanner != nil {
        result, err := scanner.Scan(ctx)
        if err != nil {
            h.logScannerError("apt", err)
        } else {
            results = append(results, result)
        }
    }

    // DNF scanner
    if scanner := scanOrchestrator.GetScanner("dnf"); scanner != nil {
        result, err := scanner.Scan(ctx)
        if err != nil {
            h.logScannerError("dnf", err)
        } else {
            results = append(results, result)
        }
    }

    // Windows Update scanner
    if scanner := scanOrchestrator.GetScanner("windows"); scanner != nil {
        result, err := scanner.Scan(ctx)
        if err != nil {
            h.logScannerError("windows", err)
        } else {
            results = append(results, result)
        }
    }

    // Winget scanner
    if scanner := scanOrchestrator.GetScanner("winget"); scanner != nil {
        result, err := scanner.Scan(ctx)
        if err != nil {
            h.logScannerError("winget", err)
        } else {
            results = append(results, result)
        }
    }

    // ... error handling and report generation ...

Phase 6: Frontend Updates (10 minutes)

6.1 Update API Client

File: aggregator-web/src/lib/api.ts:119-126

// REMOVE THESE ENDPOINTS (lines 119-126):
export const agentApi = {
  // OLD BULK SCAN - REMOVE
  triggerScan: async (agentIDs: string[]): Promise<void> => {
    await api.post('/agents/scan', { agent_ids: agentIDs });
  },

  // OLD INDIVIDUAL SCAN - REMOVE
  scanAgent: async (id: string): Promise<void> => {
    await api.post(`/agents/${id}/scan`);
  },

  // KEEP THIS - Individual subsystem scans
  triggerSubsystem: async (agentId: string, subsystem: string): Promise<void> => {
    await api.post(`/agents/${agentId}/subsystems/${subsystem}/trigger`);
  },
};

6.2 Update Agent List Scan Button

File: aggregator-web/src/pages/Agents.tsx:1131

// BEFORE (Line 1131):
const handleScanSelected = async () => {
  if (selectedAgents.length === 0) return;

  try {
    setIsScanning(true);
    await scanMultipleMutation.mutateAsync(selectedAgents);
    toast.success(`Scan started for ${selectedAgents.length} agents`);
  } catch (error) {
    toast.error('Failed to start scan');
  } finally {
    setIsScanning(false);
  }
};

// AFTER:
const handleScanSelected = async () => {
  if (selectedAgents.length === 0) return;

  // For each selected agent, scan available subsystems
  try {
    setIsScanning(true);

    for (const agentId of selectedAgents) {
      // Get agent info to determine available subsystems
      const agent = agents.find(a => a.id === agentId);
      if (!agent) continue;

      // Trigger scan for each enabled subsystem
      for (const subsystem of agent.subsystems) {
        if (subsystem.enabled) {
          await agentApi.triggerSubsystem(agentId, subsystem.name);
        }
      }
    }

    toast.success(`Scans started for ${selectedAgents.length} agents`);
  } catch (error) {
    toast.error('Failed to start scans');
  } finally {
    setIsScanning(false);
  }
};

6.3 Update React Query Hook

File: aggregator-web/src/hooks/useAgents.ts:47

// REMOVE THIS HOOK (lines 47-55):
export const useScanMultipleAgents = () => {
  return useMutation({
    mutationFn: async (agentIDs: string[]) => {
      await agentApi.triggerScan(agentIDs);
    },
  });
};

// REPLACED WITH: Use individual subsystem scans instead

Phase 7: Testing (15 minutes)

7.1 Test Individual Subsystem Scans

# Test each subsystem individually:
curl -X POST http://localhost:8080/api/v1/agents/{agent-id}/subsystems/apt/trigger
curl -X POST http://localhost:8080/api/v1/agents/{agent-id}/subsystems/dnf/trigger
curl -X POST http://localhost:8080/api/v1/agents/{agent-id}/subsystems/docker/trigger

# Verify in agent logs:
tail -f /var/log/redflag-agent.log | grep "scan_"

7.2 Verify Old Endpoint Removed

# Should return 404:
curl -X POST http://localhost:8080/api/v1/agents/{agent-id}/scan

# Should return 404:
curl -X POST http://localhost:8080/api/v1/agents/scan

7.3 Test Frontend Scan Button

// Open Agents page
// Select multiple agents
// Click "Scan Selected"
// Verify: Calls triggerSubsystem for each agent's enabled subsystems

Verification Checklist

Before Committing:

  • TriggerScan handler completely removed
  • /agents/:id/scan route removed from router
  • scan_updates case removed from console agent
  • scan_updates case removed from Windows service agent
  • Docker handler uses CommandTypeInstallUpdate
  • Frontend uses triggerSubsystem() exclusively
  • Migration 024 created and tested
  • All individual subsystem scans tested
  • Old endpoints return 404
  • Build succeeds without errors

After Deployment:

  • Agents receive and process individual scan commands
  • Scan results appear in UI
  • No references to scan_updates in logs
  • All subsystems (apt, dnf, docker, windows, winget) working

Rollback Plan

If critical issues arise:

  1. Restore from Git:

    git revert HEAD
    
  2. Restore scan_updates Support:

    • Revert all changes listed in Phases 1-5
    • Restore TriggerScan handler and route
    • Restore agent scan_updates handlers
  3. Database Rollback:

    cd aggregator-server
    go run cmd/migrate/main.go -migrate-down 1
    

Breaking Changes Documentation

For Users

  • The bulk "Scan" button on Agents page now triggers individual subsystem scans
  • Old scan_updates command type no longer supported
  • Each subsystem scan appears as separate history entry
  • More granular control over what gets scanned

For API Consumers

  • POST /api/v1/agents/:id/scan → Removed (404)
  • POST /api/v1/agents/scan → Removed (bulk scan endpoint)
  • Use POST /api/v1/agents/:id/subsystems/:subsystem/trigger instead

For Developers

  • CommandTypeScanUpdates constant → Removed
  • TriggerScan handler → Removed
  • Agent switch cases → Removed
  • Update frontend to use triggerSubsystem() exclusively

Total Time Estimate

Conservative: 60 minutes (1 hour)

  • Phase 1 (Server): 10 min
  • Phase 2 (Docker): 2 min
  • Phase 3 (Migration): 5 min
  • Phase 4 (Console Agent): 5 min
  • Phase 5 (Windows Service): 15 min
  • Phase 6 (Frontend): 10 min
  • Phase 7 (Testing): 15 min

Realistic with debugging: 90 minutes


Decision Required

Before proceeding, we need to decide:

Q1: Do we want a deprecation period?

  • Option A: Remove immediately (clean break)
  • Option B: Deprecate now, remove in v0.1.29 (grace period)

Q2: Should the "Scan" button on Agents page:

  • Option A: Scan all subsystems for each agent
  • Option B: Show submenu to pick which subsystem to scan
  • Option C: Scan only enabled subsystems (current plan)

Q3: Do we keep the old monolithic orchestrator.ScanAll() function?

  • Option A: Delete it entirely
  • Option B: Keep for potential future use (like "emergency scan all")

My recommendations: A, C, B (remove immediately, scan enabled subsystems, keep ScanAll)


Status: Plan complete, awaiting approval Next Step: Execute phases if approved Risk Level: MEDIUM (breaking change, but well-defined scope)