feat: granular subsystem commands with parallel scanner execution
Split monolithic scan_updates into individual subsystems (updates/storage/system/docker). Scanners now run in parallel via goroutines - cuts scan time roughly in half, maybe more. Agent changes: - Orchestrator pattern for scanner management - New scanners: storage (disk metrics), system (cpu/mem/processes) - New commands: scan_storage, scan_system, scan_docker - Wrapped existing scanners (APT/DNF/Docker/Windows/Winget) with common interface - Version bump to 0.1.20 Server changes: - Migration 015: agent_subsystems table with trigger for auto-init - Subsystem CRUD: enable/disable, interval (5min-24hr), auto-run toggle - API routes: /api/v1/agents/:id/subsystems/* (9 endpoints) - Stats tracking per subsystem Web UI changes: - ChatTimeline shows subsystem-specific labels and icons - AgentScanners got interactive toggles, interval dropdowns, manual trigger buttons - TypeScript types added for subsystems Backward compatible with legacy scan_updates - for now. Bugs probably exist somewhere.
This commit is contained in:
261
aggregator-agent/internal/orchestrator/orchestrator.go
Normal file
261
aggregator-agent/internal/orchestrator/orchestrator.go
Normal file
@@ -0,0 +1,261 @@
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/Fimeg/RedFlag/aggregator-agent/internal/circuitbreaker"
|
||||
"github.com/Fimeg/RedFlag/aggregator-agent/internal/client"
|
||||
)
|
||||
|
||||
// Scanner represents a generic update scanner
|
||||
type Scanner interface {
|
||||
// IsAvailable checks if the scanner is available on this system
|
||||
IsAvailable() bool
|
||||
|
||||
// Scan performs the actual scanning and returns update items
|
||||
Scan() ([]client.UpdateReportItem, error)
|
||||
|
||||
// Name returns the scanner name for logging
|
||||
Name() string
|
||||
}
|
||||
|
||||
// ScannerConfig holds configuration for a single scanner
|
||||
type ScannerConfig struct {
|
||||
Scanner Scanner
|
||||
CircuitBreaker *circuitbreaker.CircuitBreaker
|
||||
Timeout time.Duration
|
||||
Enabled bool
|
||||
}
|
||||
|
||||
// ScanResult holds the result of a scanner execution
|
||||
type ScanResult struct {
|
||||
ScannerName string
|
||||
Updates []client.UpdateReportItem
|
||||
Error error
|
||||
Duration time.Duration
|
||||
Status string // "success", "failed", "disabled", "unavailable", "skipped"
|
||||
}
|
||||
|
||||
// Orchestrator manages and coordinates multiple scanners
|
||||
type Orchestrator struct {
|
||||
scanners map[string]*ScannerConfig
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
// NewOrchestrator creates a new scanner orchestrator
|
||||
func NewOrchestrator() *Orchestrator {
|
||||
return &Orchestrator{
|
||||
scanners: make(map[string]*ScannerConfig),
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterScanner adds a scanner to the orchestrator
|
||||
func (o *Orchestrator) RegisterScanner(name string, scanner Scanner, cb *circuitbreaker.CircuitBreaker, timeout time.Duration, enabled bool) {
|
||||
o.mu.Lock()
|
||||
defer o.mu.Unlock()
|
||||
|
||||
o.scanners[name] = &ScannerConfig{
|
||||
Scanner: scanner,
|
||||
CircuitBreaker: cb,
|
||||
Timeout: timeout,
|
||||
Enabled: enabled,
|
||||
}
|
||||
}
|
||||
|
||||
// ScanAll executes all registered scanners in parallel
|
||||
func (o *Orchestrator) ScanAll(ctx context.Context) ([]ScanResult, []client.UpdateReportItem) {
|
||||
o.mu.RLock()
|
||||
defer o.mu.RUnlock()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
resultsChan := make(chan ScanResult, len(o.scanners))
|
||||
|
||||
// Launch goroutine for each scanner
|
||||
for name, scannerConfig := range o.scanners {
|
||||
wg.Add(1)
|
||||
go func(name string, cfg *ScannerConfig) {
|
||||
defer wg.Done()
|
||||
result := o.executeScan(ctx, name, cfg)
|
||||
resultsChan <- result
|
||||
}(name, scannerConfig)
|
||||
}
|
||||
|
||||
// Wait for all scanners to complete
|
||||
wg.Wait()
|
||||
close(resultsChan)
|
||||
|
||||
// Collect results
|
||||
var results []ScanResult
|
||||
var allUpdates []client.UpdateReportItem
|
||||
|
||||
for result := range resultsChan {
|
||||
results = append(results, result)
|
||||
if result.Error == nil && len(result.Updates) > 0 {
|
||||
allUpdates = append(allUpdates, result.Updates...)
|
||||
}
|
||||
}
|
||||
|
||||
return results, allUpdates
|
||||
}
|
||||
|
||||
// ScanSingle executes a single scanner by name
|
||||
func (o *Orchestrator) ScanSingle(ctx context.Context, scannerName string) (ScanResult, error) {
|
||||
o.mu.RLock()
|
||||
defer o.mu.RUnlock()
|
||||
|
||||
cfg, exists := o.scanners[scannerName]
|
||||
if !exists {
|
||||
return ScanResult{
|
||||
ScannerName: scannerName,
|
||||
Status: "failed",
|
||||
Error: fmt.Errorf("scanner not found: %s", scannerName),
|
||||
}, fmt.Errorf("scanner not found: %s", scannerName)
|
||||
}
|
||||
|
||||
return o.executeScan(ctx, scannerName, cfg), nil
|
||||
}
|
||||
|
||||
// executeScan runs a single scanner with circuit breaker and timeout protection
|
||||
func (o *Orchestrator) executeScan(ctx context.Context, name string, cfg *ScannerConfig) ScanResult {
|
||||
result := ScanResult{
|
||||
ScannerName: name,
|
||||
Status: "failed",
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
defer func() {
|
||||
result.Duration = time.Since(startTime)
|
||||
}()
|
||||
|
||||
// Check if enabled
|
||||
if !cfg.Enabled {
|
||||
result.Status = "disabled"
|
||||
log.Printf("[%s] Scanner disabled via configuration", name)
|
||||
return result
|
||||
}
|
||||
|
||||
// Check if available
|
||||
if !cfg.Scanner.IsAvailable() {
|
||||
result.Status = "unavailable"
|
||||
log.Printf("[%s] Scanner not available on this system", name)
|
||||
return result
|
||||
}
|
||||
|
||||
// Execute with circuit breaker and timeout
|
||||
log.Printf("[%s] Starting scan...", name)
|
||||
|
||||
var updates []client.UpdateReportItem
|
||||
|
||||
err := cfg.CircuitBreaker.Call(func() error {
|
||||
// Create timeout context
|
||||
timeoutCtx, cancel := context.WithTimeout(ctx, cfg.Timeout)
|
||||
defer cancel()
|
||||
|
||||
// Channel for scan result
|
||||
type scanResult struct {
|
||||
updates []client.UpdateReportItem
|
||||
err error
|
||||
}
|
||||
scanChan := make(chan scanResult, 1)
|
||||
|
||||
// Run scan in goroutine
|
||||
go func() {
|
||||
u, e := cfg.Scanner.Scan()
|
||||
scanChan <- scanResult{updates: u, err: e}
|
||||
}()
|
||||
|
||||
// Wait for scan or timeout
|
||||
select {
|
||||
case <-timeoutCtx.Done():
|
||||
return fmt.Errorf("scan timeout after %v", cfg.Timeout)
|
||||
case res := <-scanChan:
|
||||
if res.err != nil {
|
||||
return res.err
|
||||
}
|
||||
updates = res.updates
|
||||
return nil
|
||||
}
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
result.Error = err
|
||||
result.Status = "failed"
|
||||
log.Printf("[%s] Scan failed: %v", name, err)
|
||||
return result
|
||||
}
|
||||
|
||||
result.Updates = updates
|
||||
result.Status = "success"
|
||||
log.Printf("[%s] Scan completed: found %d updates (took %v)", name, len(updates), result.Duration)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// GetScannerNames returns a list of all registered scanner names
|
||||
func (o *Orchestrator) GetScannerNames() []string {
|
||||
o.mu.RLock()
|
||||
defer o.mu.RUnlock()
|
||||
|
||||
names := make([]string, 0, len(o.scanners))
|
||||
for name := range o.scanners {
|
||||
names = append(names, name)
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
// FormatScanSummary creates a human-readable summary of scan results
|
||||
func FormatScanSummary(results []ScanResult) (stdout string, stderr string, exitCode int) {
|
||||
var successResults []string
|
||||
var errorMessages []string
|
||||
totalUpdates := 0
|
||||
|
||||
for _, result := range results {
|
||||
switch result.Status {
|
||||
case "success":
|
||||
msg := fmt.Sprintf("%s: Found %d updates (%.2fs)",
|
||||
result.ScannerName, len(result.Updates), result.Duration.Seconds())
|
||||
successResults = append(successResults, msg)
|
||||
totalUpdates += len(result.Updates)
|
||||
|
||||
case "failed":
|
||||
msg := fmt.Sprintf("%s: %v", result.ScannerName, result.Error)
|
||||
errorMessages = append(errorMessages, msg)
|
||||
|
||||
case "disabled":
|
||||
successResults = append(successResults, fmt.Sprintf("%s: Disabled", result.ScannerName))
|
||||
|
||||
case "unavailable":
|
||||
successResults = append(successResults, fmt.Sprintf("%s: Not available", result.ScannerName))
|
||||
}
|
||||
}
|
||||
|
||||
// Build stdout
|
||||
if len(successResults) > 0 {
|
||||
stdout = "Scan Results:\n"
|
||||
for _, msg := range successResults {
|
||||
stdout += fmt.Sprintf(" - %s\n", msg)
|
||||
}
|
||||
stdout += fmt.Sprintf("\nTotal Updates Found: %d\n", totalUpdates)
|
||||
}
|
||||
|
||||
// Build stderr
|
||||
if len(errorMessages) > 0 {
|
||||
stderr = "Scan Errors:\n"
|
||||
for _, msg := range errorMessages {
|
||||
stderr += fmt.Sprintf(" - %s\n", msg)
|
||||
}
|
||||
}
|
||||
|
||||
// Determine exit code
|
||||
if len(errorMessages) > 0 {
|
||||
exitCode = 1
|
||||
} else {
|
||||
exitCode = 0
|
||||
}
|
||||
|
||||
return stdout, stderr, exitCode
|
||||
}
|
||||
Reference in New Issue
Block a user