feat: granular subsystem commands with parallel scanner execution

Split monolithic scan_updates into individual subsystems (updates/storage/system/docker). Scanners now run in parallel via goroutines - cuts scan time roughly in half, maybe more. Agent changes: - Orchestrator pattern for scanner management - New scanners: storage (disk metrics), system (cpu/mem/processes) - New commands: scan_storage, scan_system, scan_docker - Wrapped existing scanners (APT/DNF/Docker/Windows/Winget) with common interface - Version bump to 0.1.20 Server changes: - Migration 015: agent_subsystems table with trigger for auto-init - Subsystem CRUD: enable/disable, interval (5min-24hr), auto-run toggle - API routes: /api/v1/agents/:id/subsystems/* (9 endpoints) - Stats tracking per subsystem Web UI changes: - ChatTimeline shows subsystem-specific labels and icons - AgentScanners got interactive toggles, interval dropdowns, manual trigger buttons - TypeScript types added for subsystems Backward compatible with legacy scan_updates - for now. Bugs probably exist somewhere.
2025-11-01 20:34:00 -04:00
parent bf4d46529f
commit 3690472396
19 changed files with 2151 additions and 253 deletions
--- a/aggregator-agent/internal/orchestrator/orchestrator.go
+++ b/aggregator-agent/internal/orchestrator/orchestrator.go
@@ -0,0 +1,261 @@
+package orchestrator
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"sync"
+	"time"
+
+	"github.com/Fimeg/RedFlag/aggregator-agent/internal/circuitbreaker"
+	"github.com/Fimeg/RedFlag/aggregator-agent/internal/client"
+)
+
+// Scanner represents a generic update scanner
+type Scanner interface {
+	// IsAvailable checks if the scanner is available on this system
+	IsAvailable() bool
+
+	// Scan performs the actual scanning and returns update items
+	Scan() ([]client.UpdateReportItem, error)
+
+	// Name returns the scanner name for logging
+	Name() string
+}
+
+// ScannerConfig holds configuration for a single scanner
+type ScannerConfig struct {
+	Scanner        Scanner
+	CircuitBreaker *circuitbreaker.CircuitBreaker
+	Timeout        time.Duration
+	Enabled        bool
+}
+
+// ScanResult holds the result of a scanner execution
+type ScanResult struct {
+	ScannerName string
+	Updates     []client.UpdateReportItem
+	Error       error
+	Duration    time.Duration
+	Status      string // "success", "failed", "disabled", "unavailable", "skipped"
+}
+
+// Orchestrator manages and coordinates multiple scanners
+type Orchestrator struct {
+	scanners map[string]*ScannerConfig
+	mu       sync.RWMutex
+}
+
+// NewOrchestrator creates a new scanner orchestrator
+func NewOrchestrator() *Orchestrator {
+	return &Orchestrator{
+		scanners: make(map[string]*ScannerConfig),
+	}
+}
+
+// RegisterScanner adds a scanner to the orchestrator
+func (o *Orchestrator) RegisterScanner(name string, scanner Scanner, cb *circuitbreaker.CircuitBreaker, timeout time.Duration, enabled bool) {
+	o.mu.Lock()
+	defer o.mu.Unlock()
+
+	o.scanners[name] = &ScannerConfig{
+		Scanner:        scanner,
+		CircuitBreaker: cb,
+		Timeout:        timeout,
+		Enabled:        enabled,
+	}
+}
+
+// ScanAll executes all registered scanners in parallel
+func (o *Orchestrator) ScanAll(ctx context.Context) ([]ScanResult, []client.UpdateReportItem) {
+	o.mu.RLock()
+	defer o.mu.RUnlock()
+
+	var wg sync.WaitGroup
+	resultsChan := make(chan ScanResult, len(o.scanners))
+
+	// Launch goroutine for each scanner
+	for name, scannerConfig := range o.scanners {
+		wg.Add(1)
+		go func(name string, cfg *ScannerConfig) {
+			defer wg.Done()
+			result := o.executeScan(ctx, name, cfg)
+			resultsChan <- result
+		}(name, scannerConfig)
+	}
+
+	// Wait for all scanners to complete
+	wg.Wait()
+	close(resultsChan)
+
+	// Collect results
+	var results []ScanResult
+	var allUpdates []client.UpdateReportItem
+
+	for result := range resultsChan {
+		results = append(results, result)
+		if result.Error == nil && len(result.Updates) > 0 {
+			allUpdates = append(allUpdates, result.Updates...)
+		}
+	}
+
+	return results, allUpdates
+}
+
+// ScanSingle executes a single scanner by name
+func (o *Orchestrator) ScanSingle(ctx context.Context, scannerName string) (ScanResult, error) {
+	o.mu.RLock()
+	defer o.mu.RUnlock()
+
+	cfg, exists := o.scanners[scannerName]
+	if !exists {
+		return ScanResult{
+			ScannerName: scannerName,
+			Status:      "failed",
+			Error:       fmt.Errorf("scanner not found: %s", scannerName),
+		}, fmt.Errorf("scanner not found: %s", scannerName)
+	}
+
+	return o.executeScan(ctx, scannerName, cfg), nil
+}
+
+// executeScan runs a single scanner with circuit breaker and timeout protection
+func (o *Orchestrator) executeScan(ctx context.Context, name string, cfg *ScannerConfig) ScanResult {
+	result := ScanResult{
+		ScannerName: name,
+		Status:      "failed",
+	}
+
+	startTime := time.Now()
+	defer func() {
+		result.Duration = time.Since(startTime)
+	}()
+
+	// Check if enabled
+	if !cfg.Enabled {
+		result.Status = "disabled"
+		log.Printf("[%s] Scanner disabled via configuration", name)
+		return result
+	}
+
+	// Check if available
+	if !cfg.Scanner.IsAvailable() {
+		result.Status = "unavailable"
+		log.Printf("[%s] Scanner not available on this system", name)
+		return result
+	}
+
+	// Execute with circuit breaker and timeout
+	log.Printf("[%s] Starting scan...", name)
+
+	var updates []client.UpdateReportItem
+
+	err := cfg.CircuitBreaker.Call(func() error {
+		// Create timeout context
+		timeoutCtx, cancel := context.WithTimeout(ctx, cfg.Timeout)
+		defer cancel()
+
+		// Channel for scan result
+		type scanResult struct {
+			updates []client.UpdateReportItem
+			err     error
+		}
+		scanChan := make(chan scanResult, 1)
+
+		// Run scan in goroutine
+		go func() {
+			u, e := cfg.Scanner.Scan()
+			scanChan <- scanResult{updates: u, err: e}
+		}()
+
+		// Wait for scan or timeout
+		select {
+		case <-timeoutCtx.Done():
+			return fmt.Errorf("scan timeout after %v", cfg.Timeout)
+		case res := <-scanChan:
+			if res.err != nil {
+				return res.err
+			}
+			updates = res.updates
+			return nil
+		}
+	})
+
+	if err != nil {
+		result.Error = err
+		result.Status = "failed"
+		log.Printf("[%s] Scan failed: %v", name, err)
+		return result
+	}
+
+	result.Updates = updates
+	result.Status = "success"
+	log.Printf("[%s] Scan completed: found %d updates (took %v)", name, len(updates), result.Duration)
+
+	return result
+}
+
+// GetScannerNames returns a list of all registered scanner names
+func (o *Orchestrator) GetScannerNames() []string {
+	o.mu.RLock()
+	defer o.mu.RUnlock()
+
+	names := make([]string, 0, len(o.scanners))
+	for name := range o.scanners {
+		names = append(names, name)
+	}
+	return names
+}
+
+// FormatScanSummary creates a human-readable summary of scan results
+func FormatScanSummary(results []ScanResult) (stdout string, stderr string, exitCode int) {
+	var successResults []string
+	var errorMessages []string
+	totalUpdates := 0
+
+	for _, result := range results {
+		switch result.Status {
+		case "success":
+			msg := fmt.Sprintf("%s: Found %d updates (%.2fs)",
+				result.ScannerName, len(result.Updates), result.Duration.Seconds())
+			successResults = append(successResults, msg)
+			totalUpdates += len(result.Updates)
+
+		case "failed":
+			msg := fmt.Sprintf("%s: %v", result.ScannerName, result.Error)
+			errorMessages = append(errorMessages, msg)
+
+		case "disabled":
+			successResults = append(successResults, fmt.Sprintf("%s: Disabled", result.ScannerName))
+
+		case "unavailable":
+			successResults = append(successResults, fmt.Sprintf("%s: Not available", result.ScannerName))
+		}
+	}
+
+	// Build stdout
+	if len(successResults) > 0 {
+		stdout = "Scan Results:\n"
+		for _, msg := range successResults {
+			stdout += fmt.Sprintf("  - %s\n", msg)
+		}
+		stdout += fmt.Sprintf("\nTotal Updates Found: %d\n", totalUpdates)
+	}
+
+	// Build stderr
+	if len(errorMessages) > 0 {
+		stderr = "Scan Errors:\n"
+		for _, msg := range errorMessages {
+			stderr += fmt.Sprintf("  - %s\n", msg)
+		}
+	}
+
+	// Determine exit code
+	if len(errorMessages) > 0 {
+		exitCode = 1
+	} else {
+		exitCode = 0
+	}
+
+	return stdout, stderr, exitCode
+}