feat: add resilience and reliability features for agent subsystems
Added circuit breakers with configurable timeouts for all subsystems (APT, DNF, Docker, Windows, Winget, Storage). Replaces cron-based scheduler with priority queue that should scale beyond 1000+ agents if your homelab is that big. Command acknowledgment system ensures results aren't lost on network failures or restarts. Agent tracks pending acknowledgments with persistent state and automatic retry. - Circuit breakers: 3 failures in 1min opens circuit, 30s cooldown - Per-subsystem timeouts: 30s-10min depending on scanner - Priority queue scheduler: O(log n), worker pool, jitter, backpressure - Acknowledgments: at-least-once delivery, max 10 retries over 24h - All tests passing (26/26)
This commit is contained in:
55
aggregator-agent/test_disk.go
Normal file
55
aggregator-agent/test_disk.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"github.com/Fimeg/RedFlag/aggregator-agent/internal/system"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Test lightweight metrics (most common use case)
|
||||
fmt.Println("=== Enhanced Lightweight Metrics Test ===")
|
||||
metrics, err := system.GetLightweightMetrics()
|
||||
if err != nil {
|
||||
log.Printf("Error getting lightweight metrics: %v", err)
|
||||
} else {
|
||||
// Pretty print the JSON
|
||||
jsonData, _ := json.MarshalIndent(metrics, "", " ")
|
||||
fmt.Printf("LightweightMetrics:\n%s\n\n", jsonData)
|
||||
|
||||
// Show key findings
|
||||
fmt.Printf("Root Disk: %.1fGB used / %.1fGB total (%.1f%%)\n",
|
||||
metrics.DiskUsedGB, metrics.DiskTotalGB, metrics.DiskPercent)
|
||||
|
||||
if metrics.LargestDiskTotalGB > 0 {
|
||||
fmt.Printf("Largest Disk (%s): %.1fGB used / %.1fGB total (%.1f%%)\n",
|
||||
metrics.LargestDiskMount, metrics.LargestDiskUsedGB, metrics.LargestDiskTotalGB, metrics.LargestDiskPercent)
|
||||
} else {
|
||||
fmt.Printf("No largest disk detected (this might be the issue!)\n")
|
||||
}
|
||||
}
|
||||
|
||||
// Test full system info (detailed disk inventory)
|
||||
fmt.Println("\n=== Enhanced System Info Test ===")
|
||||
sysInfo, err := system.GetSystemInfo("test-v0.1.5")
|
||||
if err != nil {
|
||||
log.Printf("Error getting system info: %v", err)
|
||||
} else {
|
||||
fmt.Printf("Found %d disks:\n", len(sysInfo.DiskInfo))
|
||||
for i, disk := range sysInfo.DiskInfo {
|
||||
fmt.Printf(" Disk %d: %s (%s) - %s, %.1fGB used / %.1fGB total (%.1f%%)",
|
||||
i+1, disk.Mountpoint, disk.Filesystem, disk.DiskType,
|
||||
float64(disk.Used)/(1024*1024*1024), float64(disk.Total)/(1024*1024*1024), disk.UsedPercent)
|
||||
|
||||
if disk.IsRoot {
|
||||
fmt.Printf(" [ROOT]")
|
||||
}
|
||||
if disk.IsLargest {
|
||||
fmt.Printf(" [LARGEST]")
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user