test(concurrency): B-2 pre-fix tests for data integrity and concurrency bugs
Pre-fix test suite documenting 7 data integrity and concurrency bugs. Tests FAIL where they assert correct post-fix behavior, PASS where they document current buggy state. Tests added: - F-B2-1/8 HIGH: Registration not transactional (3 tests) - F-B2-2 MEDIUM: Command delivery race condition (3 tests) - F-B2-9 MEDIUM: Token renewal not transactional (2 tests) - F-B2-4 MEDIUM: No rate limit on GetCommands (3 tests) - F-B2-5 LOW: Jitter negates rapid mode (2 tests) - F-B2-10 LOW: No max retry for stuck commands (2 tests) - F-B2-7 MEDIUM: No exponential backoff on reconnection (2 tests) Current state: 7 FAIL, 10 PASS. No A/B-1 regressions. See docs/B2_PreFix_Tests.md for full inventory. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
107
aggregator-agent/internal/polling_jitter_test.go
Normal file
107
aggregator-agent/internal/polling_jitter_test.go
Normal file
@@ -0,0 +1,107 @@
|
||||
package internal_test
|
||||
|
||||
// polling_jitter_test.go — Pre-fix tests for jitter negating rapid mode.
|
||||
//
|
||||
// F-B2-5 LOW: 30-second jitter is applied uniformly to ALL polling
|
||||
// intervals including rapid mode (5 seconds). Rapid mode becomes
|
||||
// effectively 5-35 seconds instead of 5 seconds.
|
||||
//
|
||||
// Run: cd aggregator-agent && go test ./internal/... -v -run TestJitter
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test 5.1 — Documents jitter exceeding rapid mode interval (F-B2-5)
|
||||
//
|
||||
// Category: PASS-NOW (documents the bug)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestJitterExceedsRapidModeInterval(t *testing.T) {
|
||||
// F-B2-5 LOW: Startup jitter (0-30s) is applied uniformly to ALL
|
||||
// polling intervals including rapid mode. Rapid mode (5s) becomes
|
||||
// effectively 5-35s. The jitter should be capped at the polling
|
||||
// interval or not applied when interval < jitter range.
|
||||
mainPath := filepath.Join("..", "cmd", "agent", "main.go")
|
||||
content, err := os.ReadFile(mainPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read agent main.go: %v", err)
|
||||
}
|
||||
|
||||
src := string(content)
|
||||
|
||||
// Find jitter application (rand.Intn(30) or similar)
|
||||
hasFixedJitter := strings.Contains(src, "rand.Intn(30)") ||
|
||||
strings.Contains(src, "Intn(30)")
|
||||
|
||||
// Find rapid mode interval
|
||||
hasRapidInterval := strings.Contains(src, "return 5") // rapid mode returns 5 seconds
|
||||
|
||||
if !hasFixedJitter {
|
||||
t.Error("[ERROR] [agent] [polling] expected fixed 30-second jitter in main.go")
|
||||
}
|
||||
|
||||
if !hasRapidInterval {
|
||||
t.Log("[WARNING] [agent] [polling] could not confirm rapid mode 5-second interval")
|
||||
}
|
||||
|
||||
// Check if jitter is conditional on polling mode
|
||||
hasConditionalJitter := strings.Contains(src, "RapidPolling") &&
|
||||
(strings.Contains(src, "jitter") || strings.Contains(src, "Jitter"))
|
||||
|
||||
// The jitter block should NOT be inside a rapid-mode conditional
|
||||
// (it's applied unconditionally — that's the bug)
|
||||
if hasConditionalJitter {
|
||||
t.Log("[INFO] [agent] [polling] jitter may already be conditional on rapid mode")
|
||||
}
|
||||
|
||||
t.Log("[INFO] [agent] [polling] F-B2-5 confirmed: 30s jitter applied to all intervals including 5s rapid mode")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test 5.2 — Jitter must not exceed polling interval (assert fix)
|
||||
//
|
||||
// Category: FAIL-NOW / PASS-AFTER-FIX
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestJitterDoesNotExceedPollingInterval(t *testing.T) {
|
||||
// F-B2-5: After fix, jitter must not exceed the current polling
|
||||
// interval. Cap jitter at pollingInterval/2 or skip jitter in rapid mode.
|
||||
mainPath := filepath.Join("..", "cmd", "agent", "main.go")
|
||||
content, err := os.ReadFile(mainPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read agent main.go: %v", err)
|
||||
}
|
||||
|
||||
src := string(content)
|
||||
|
||||
// After fix: jitter should be bounded by the polling interval
|
||||
// Look for patterns like: min(jitter, interval) or conditional skip in rapid mode
|
||||
jitterIdx := strings.Index(src, "rand.Intn(30)")
|
||||
if jitterIdx == -1 {
|
||||
t.Log("[INFO] [agent] [polling] fixed 30s jitter not found (may be refactored)")
|
||||
return
|
||||
}
|
||||
|
||||
// The jitter line should have a conditional that reduces or skips it in rapid mode
|
||||
// Look for rapid polling check WITHIN 10 lines before the jitter
|
||||
contextStart := jitterIdx - 400
|
||||
if contextStart < 0 {
|
||||
contextStart = 0
|
||||
}
|
||||
contextBefore := src[contextStart:jitterIdx]
|
||||
|
||||
hasRapidModeGuard := strings.Contains(contextBefore, "RapidPolling") ||
|
||||
strings.Contains(contextBefore, "rapidPolling") ||
|
||||
strings.Contains(contextBefore, "rapid_polling")
|
||||
|
||||
if !hasRapidModeGuard {
|
||||
t.Errorf("[ERROR] [agent] [polling] jitter is not guarded for rapid mode.\n" +
|
||||
"F-B2-5: 30s fixed jitter on 5s rapid interval makes rapid mode ineffective.\n" +
|
||||
"After fix: cap jitter at pollingInterval/2 or skip in rapid mode.")
|
||||
}
|
||||
}
|
||||
106
aggregator-agent/internal/reconnect_stagger_test.go
Normal file
106
aggregator-agent/internal/reconnect_stagger_test.go
Normal file
@@ -0,0 +1,106 @@
|
||||
package internal_test
|
||||
|
||||
// reconnect_stagger_test.go — Pre-fix tests for thundering herd on reconnection.
|
||||
//
|
||||
// F-B2-7 MEDIUM: Agent reconnection uses only a fixed 30-second jitter.
|
||||
// After a server restart, all agents retry within 30 seconds of the
|
||||
// server becoming available, causing a traffic spike.
|
||||
//
|
||||
// Run: cd aggregator-agent && go test ./internal/... -v -run TestReconnect
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test 7.1 — Documents fixed jitter only (F-B2-7)
|
||||
//
|
||||
// Category: PASS-NOW (documents the bug)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestReconnectionUsesFixedJitterOnly(t *testing.T) {
|
||||
// F-B2-7 MEDIUM: Agent reconnection uses only a fixed 30-second
|
||||
// jitter. After a server restart, all agents that were waiting
|
||||
// begin retrying within 30 seconds. True thundering herd mitigation
|
||||
// requires exponential backoff with full jitter.
|
||||
mainPath := filepath.Join("..", "cmd", "agent", "main.go")
|
||||
content, err := os.ReadFile(mainPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read agent main.go: %v", err)
|
||||
}
|
||||
|
||||
src := string(content)
|
||||
|
||||
// Check for fixed jitter pattern
|
||||
hasFixedJitter := strings.Contains(src, "rand.Intn(30)")
|
||||
|
||||
// Check for exponential backoff in the main polling loop (not config sync)
|
||||
// The main polling loop is the for{} block that calls GetCommands
|
||||
pollLoopIdx := strings.Index(src, "GetCommands(cfg.AgentID")
|
||||
if pollLoopIdx == -1 {
|
||||
pollLoopIdx = strings.Index(src, "GetCommands(")
|
||||
}
|
||||
|
||||
hasExpBackoffInPollLoop := false
|
||||
if pollLoopIdx > 0 {
|
||||
// Check 500 chars around the GetCommands call for backoff logic
|
||||
contextStart := pollLoopIdx - 500
|
||||
if contextStart < 0 {
|
||||
contextStart = 0
|
||||
}
|
||||
context := strings.ToLower(src[contextStart : pollLoopIdx+500])
|
||||
hasExpBackoffInPollLoop = strings.Contains(context, "exponential backoff") ||
|
||||
(strings.Contains(context, "backoff") && strings.Contains(context, "attempt"))
|
||||
}
|
||||
|
||||
if !hasFixedJitter {
|
||||
t.Error("[ERROR] [agent] [polling] expected fixed jitter in main.go")
|
||||
}
|
||||
|
||||
if hasExpBackoffInPollLoop {
|
||||
t.Error("[ERROR] [agent] [polling] F-B2-7 already fixed: exponential backoff in polling loop")
|
||||
}
|
||||
|
||||
t.Log("[INFO] [agent] [polling] F-B2-7 confirmed: reconnection uses fixed 30s jitter only")
|
||||
t.Log("[INFO] [agent] [polling] all agents recovering from outage retry within a 30s window")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test 7.2 — Must use exponential backoff with jitter (assert fix)
|
||||
//
|
||||
// Category: FAIL-NOW / PASS-AFTER-FIX
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestReconnectionUsesExponentialBackoffWithJitter(t *testing.T) {
|
||||
// F-B2-7: After fix, implement exponential backoff with full jitter:
|
||||
// delay = rand(0, min(cap, base * 2^attempt))
|
||||
mainPath := filepath.Join("..", "cmd", "agent", "main.go")
|
||||
content, err := os.ReadFile(mainPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read agent main.go: %v", err)
|
||||
}
|
||||
|
||||
src := strings.ToLower(string(content))
|
||||
|
||||
// Check for exponential backoff specifically in the main polling loop error path
|
||||
// (not the config sync backoff which already exists)
|
||||
pollLoopIdx := strings.Index(src, "getcommands")
|
||||
hasExpBackoff := false
|
||||
if pollLoopIdx > 0 {
|
||||
context := src[pollLoopIdx:]
|
||||
if len(context) > 2000 {
|
||||
context = context[:2000]
|
||||
}
|
||||
hasExpBackoff = strings.Contains(context, "exponential") ||
|
||||
(strings.Contains(context, "backoff") && strings.Contains(context, "attempt"))
|
||||
}
|
||||
|
||||
if !hasExpBackoff {
|
||||
t.Errorf("[ERROR] [agent] [polling] no exponential backoff found in reconnection logic.\n" +
|
||||
"F-B2-7: implement exponential backoff with full jitter for reconnection.\n" +
|
||||
"After fix: delay increases with each consecutive failure.")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user