Files
Redflag/aggregator-agent/internal/circuitbreaker/circuitbreaker_test.go
Fimeg bf4d46529f feat: add resilience and reliability features for agent subsystems
Added circuit breakers with configurable timeouts for all subsystems (APT, DNF, Docker, Windows, Winget, Storage). Replaces cron-based scheduler with priority queue that should scale beyond 1000+ agents if your homelab is that big.

Command acknowledgment system ensures results aren't lost on network failures or restarts. Agent tracks pending acknowledgments with persistent state and automatic retry.

- Circuit breakers: 3 failures in 1min opens circuit, 30s cooldown
- Per-subsystem timeouts: 30s-10min depending on scanner
- Priority queue scheduler: O(log n), worker pool, jitter, backpressure
- Acknowledgments: at-least-once delivery, max 10 retries over 24h
- All tests passing (26/26)
2025-11-01 18:42:41 -04:00

139 lines
3.3 KiB
Go

package circuitbreaker
import (
"errors"
"testing"
"time"
)
func TestCircuitBreaker_NormalOperation(t *testing.T) {
cb := New("test", Config{
FailureThreshold: 3,
FailureWindow: 1 * time.Minute,
OpenDuration: 1 * time.Minute,
HalfOpenAttempts: 2,
})
// Should allow calls in closed state
err := cb.Call(func() error { return nil })
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if cb.State() != StateClosed {
t.Fatalf("expected state closed, got %v", cb.State())
}
}
func TestCircuitBreaker_OpensAfterFailures(t *testing.T) {
cb := New("test", Config{
FailureThreshold: 3,
FailureWindow: 1 * time.Minute,
OpenDuration: 100 * time.Millisecond,
HalfOpenAttempts: 2,
})
testErr := errors.New("test error")
// Record 3 failures
for i := 0; i < 3; i++ {
cb.Call(func() error { return testErr })
}
// Should now be open
if cb.State() != StateOpen {
t.Fatalf("expected state open after %d failures, got %v", 3, cb.State())
}
// Next call should fail fast
err := cb.Call(func() error { return nil })
if err == nil {
t.Fatal("expected circuit breaker to reject call, but it succeeded")
}
}
func TestCircuitBreaker_HalfOpenRecovery(t *testing.T) {
cb := New("test", Config{
FailureThreshold: 2,
FailureWindow: 1 * time.Minute,
OpenDuration: 50 * time.Millisecond,
HalfOpenAttempts: 2,
})
testErr := errors.New("test error")
// Open the circuit
cb.Call(func() error { return testErr })
cb.Call(func() error { return testErr })
if cb.State() != StateOpen {
t.Fatal("circuit should be open")
}
// Wait for open duration
time.Sleep(60 * time.Millisecond)
// Should transition to half-open and allow call
err := cb.Call(func() error { return nil })
if err != nil {
t.Fatalf("expected call to succeed in half-open state, got %v", err)
}
if cb.State() != StateHalfOpen {
t.Fatalf("expected half-open state, got %v", cb.State())
}
// One more success should close it
cb.Call(func() error { return nil })
if cb.State() != StateClosed {
t.Fatalf("expected closed state after %d successes, got %v", 2, cb.State())
}
}
func TestCircuitBreaker_HalfOpenFailure(t *testing.T) {
cb := New("test", Config{
FailureThreshold: 2,
FailureWindow: 1 * time.Minute,
OpenDuration: 50 * time.Millisecond,
HalfOpenAttempts: 2,
})
testErr := errors.New("test error")
// Open the circuit
cb.Call(func() error { return testErr })
cb.Call(func() error { return testErr })
// Wait and attempt in half-open
time.Sleep(60 * time.Millisecond)
cb.Call(func() error { return nil }) // Half-open
// Fail in half-open - should go back to open
cb.Call(func() error { return testErr })
if cb.State() != StateOpen {
t.Fatalf("expected open state after half-open failure, got %v", cb.State())
}
}
func TestCircuitBreaker_Stats(t *testing.T) {
cb := New("test-subsystem", Config{
FailureThreshold: 3,
FailureWindow: 1 * time.Minute,
OpenDuration: 1 * time.Minute,
HalfOpenAttempts: 2,
})
stats := cb.GetStats()
if stats.Name != "test-subsystem" {
t.Fatalf("expected name 'test-subsystem', got %s", stats.Name)
}
if stats.State != "closed" {
t.Fatalf("expected state 'closed', got %s", stats.State)
}
if stats.RecentFailures != 0 {
t.Fatalf("expected 0 failures, got %d", stats.RecentFailures)
}
}