fix(database): B-1 schema integrity and migration fixes
- Fix migration 024 self-insert and bad column reference (F-B1-1, F-B1-2) Uses existing enabled/auto_run columns instead of non-existent deprecated - Abort server on migration failure instead of warning (F-B1-11) main.go now calls log.Fatalf, prints [INFO] only on success - Fix migration 018 scanner_config filename suffix (F-B1-3) Renumbered to 027 with .up.sql suffix - Remove GRANT to non-existent role in scanner_config (F-B1-4) - Resolve duplicate migration numbers 009 and 012 (F-B1-13) Renamed to 009b and 012b for unique lexical sorting - Add IF NOT EXISTS to all non-idempotent migrations (F-B1-15) Fixed: 011, 012, 017, 023, 023a - Replace N+1 dashboard stats loop with GetAllUpdateStats (F-B1-6) Single aggregate query replaces per-agent loop - Add composite index on agent_commands(status, sent_at) (F-B1-5) New migration 028 with partial index for timeout service - Add background refresh token cleanup goroutine (F-B1-10) 24-hour ticker calls CleanupExpiredTokens - ETHOS log format in migration runner (no emojis) All 55 tests pass (41 server + 14 agent). No regressions. See docs/B1_Fix_Implementation.md and DEV-025 through DEV-028. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -36,44 +36,38 @@ type DashboardStats struct {
|
||||
UpdatesByType map[string]int `json:"updates_by_type"`
|
||||
}
|
||||
|
||||
// GetDashboardStats returns dashboard statistics using the new state table
|
||||
// GetDashboardStats returns dashboard statistics using aggregate queries (F-B1-6 fix)
|
||||
func (h *StatsHandler) GetDashboardStats(c *gin.Context) {
|
||||
// Get all agents
|
||||
// Get all agents for online/offline count
|
||||
agents, err := h.agentQueries.ListAgents("", "")
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to get agents"})
|
||||
return
|
||||
}
|
||||
|
||||
// Calculate stats
|
||||
stats := DashboardStats{
|
||||
TotalAgents: len(agents),
|
||||
UpdatesByType: make(map[string]int),
|
||||
TotalAgents: len(agents),
|
||||
UpdatesByType: make(map[string]int),
|
||||
}
|
||||
|
||||
// Count online/offline agents based on last_seen timestamp
|
||||
// Count online/offline agents
|
||||
for _, agent := range agents {
|
||||
// Consider agent online if it has checked in within the last 10 minutes
|
||||
if time.Since(agent.LastSeen) <= 10*time.Minute {
|
||||
stats.OnlineAgents++
|
||||
} else {
|
||||
stats.OfflineAgents++
|
||||
}
|
||||
}
|
||||
|
||||
// Get update stats for each agent using the new state table
|
||||
agentStats, err := h.updateQueries.GetUpdateStatsFromState(agent.ID)
|
||||
if err != nil {
|
||||
// Log error but continue with other agents
|
||||
continue
|
||||
}
|
||||
|
||||
// Aggregate stats across all agents
|
||||
stats.PendingUpdates += agentStats.PendingUpdates
|
||||
stats.FailedUpdates += agentStats.FailedUpdates
|
||||
stats.CriticalUpdates += agentStats.CriticalUpdates
|
||||
stats.ImportantUpdates += agentStats.ImportantUpdates
|
||||
stats.ModerateUpdates += agentStats.ModerateUpdates
|
||||
stats.LowUpdates += agentStats.LowUpdates
|
||||
// Single aggregate query for all update stats (replaces N+1 per-agent loop)
|
||||
updateStats, err := h.updateQueries.GetAllUpdateStats()
|
||||
if err == nil {
|
||||
stats.PendingUpdates = updateStats.PendingUpdates
|
||||
stats.FailedUpdates = updateStats.FailedUpdates
|
||||
stats.CriticalUpdates = updateStats.CriticalUpdates
|
||||
stats.ImportantUpdates = updateStats.ImportantUpdates
|
||||
stats.ModerateUpdates = updateStats.ModerateUpdates
|
||||
stats.LowUpdates = updateStats.LowUpdates
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, stats)
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
package handlers_test
|
||||
|
||||
// stats_n1_test.go — Pre-fix tests for N+1 query in GetDashboardStats.
|
||||
// stats_n1_test.go — Tests for N+1 query fix in GetDashboardStats.
|
||||
//
|
||||
// F-B1-6 HIGH: GetDashboardStats (stats.go) executes one DB query per agent
|
||||
// on every dashboard load. With 100 agents = 101 queries per request.
|
||||
//
|
||||
// Run: cd aggregator-server && go test ./internal/api/handlers/... -v -run TestGetDashboardStats
|
||||
// F-B1-6 FIXED: GetDashboardStats now uses GetAllUpdateStats() (single
|
||||
// aggregate query) instead of GetUpdateStatsFromState() per agent.
|
||||
|
||||
import (
|
||||
"os"
|
||||
@@ -14,13 +12,8 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test 5.1 — Documents the N+1 loop (F-B1-6)
|
||||
//
|
||||
// Category: PASS-NOW (documents the bug)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestGetDashboardStatsHasNPlusOneLoop(t *testing.T) {
|
||||
// POST-FIX: GetUpdateStatsFromState should NOT be called inside a loop
|
||||
statsPath := filepath.Join(".", "stats.go")
|
||||
content, err := os.ReadFile(statsPath)
|
||||
if err != nil {
|
||||
@@ -29,45 +22,30 @@ func TestGetDashboardStatsHasNPlusOneLoop(t *testing.T) {
|
||||
|
||||
src := string(content)
|
||||
|
||||
// Check for the N+1 pattern: query called inside a range loop
|
||||
hasListAgents := strings.Contains(src, "ListAgents")
|
||||
hasLoopQuery := strings.Contains(src, "GetUpdateStatsFromState")
|
||||
|
||||
// Both patterns should be present — ListAgents followed by a per-agent query
|
||||
if !hasListAgents || !hasLoopQuery {
|
||||
t.Error("[ERROR] [server] [handlers] F-B1-6 already fixed: " +
|
||||
"N+1 loop pattern not found in stats.go")
|
||||
}
|
||||
|
||||
// Check that the query is inside a for/range loop
|
||||
// Find "for _, agent := range" and then "GetUpdateStatsFromState" after it
|
||||
// The old pattern: GetUpdateStatsFromState inside a range loop
|
||||
forIdx := strings.Index(src, "for _, agent := range")
|
||||
if forIdx == -1 {
|
||||
forIdx = strings.Index(src, "for _, a := range")
|
||||
}
|
||||
if forIdx == -1 {
|
||||
t.Error("[ERROR] [server] [handlers] no agent range loop found in stats.go")
|
||||
// No agent loop at all — that's fine if GetAllUpdateStats is used instead
|
||||
if strings.Contains(src, "GetAllUpdateStats") {
|
||||
t.Log("[INFO] [server] [handlers] F-B1-6 FIXED: using aggregate query instead of per-agent loop")
|
||||
return
|
||||
}
|
||||
t.Error("[ERROR] [server] [handlers] no agent loop AND no GetAllUpdateStats found")
|
||||
return
|
||||
}
|
||||
|
||||
queryIdx := strings.Index(src[forIdx:], "GetUpdateStatsFromState")
|
||||
if queryIdx == -1 {
|
||||
t.Error("[ERROR] [server] [handlers] F-B1-6 already fixed: " +
|
||||
"GetUpdateStatsFromState not inside agent loop")
|
||||
return
|
||||
// If there IS a loop, check that GetUpdateStatsFromState is NOT inside it
|
||||
loopBody := src[forIdx:]
|
||||
if len(loopBody) > 1000 {
|
||||
loopBody = loopBody[:1000]
|
||||
}
|
||||
if strings.Contains(loopBody, "GetUpdateStatsFromState") {
|
||||
t.Error("[ERROR] [server] [handlers] F-B1-6 NOT FIXED: GetUpdateStatsFromState still inside agent loop")
|
||||
} else {
|
||||
t.Log("[INFO] [server] [handlers] F-B1-6 FIXED: no per-agent query in loop")
|
||||
}
|
||||
|
||||
t.Log("[INFO] [server] [handlers] F-B1-6 confirmed: GetUpdateStatsFromState called inside agent loop")
|
||||
t.Log("[INFO] [server] [handlers] this executes 1 DB query per agent on every dashboard load")
|
||||
t.Log("[INFO] [server] [handlers] after fix: replace with single JOIN query")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test 5.2 — Dashboard stats must NOT have per-agent query loop (assert fix)
|
||||
//
|
||||
// Category: FAIL-NOW / PASS-AFTER-FIX
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestGetDashboardStatsUsesJoin(t *testing.T) {
|
||||
statsPath := filepath.Join(".", "stats.go")
|
||||
content, err := os.ReadFile(statsPath)
|
||||
@@ -77,24 +55,23 @@ func TestGetDashboardStatsUsesJoin(t *testing.T) {
|
||||
|
||||
src := string(content)
|
||||
|
||||
// After fix: GetUpdateStatsFromState should NOT appear inside a for/range loop
|
||||
forIdx := strings.Index(src, "for _, agent := range")
|
||||
if forIdx == -1 {
|
||||
forIdx = strings.Index(src, "for _, a := range")
|
||||
// Must use GetAllUpdateStats (single aggregate) not GetUpdateStatsFromState (per-agent)
|
||||
if !strings.Contains(src, "GetAllUpdateStats") {
|
||||
t.Errorf("[ERROR] [server] [handlers] GetAllUpdateStats not found in stats.go.\n" +
|
||||
"F-B1-6: dashboard stats must use a single aggregate query.")
|
||||
}
|
||||
|
||||
// Must NOT have GetUpdateStatsFromState in a loop
|
||||
forIdx := strings.Index(src, "for _, agent := range")
|
||||
if forIdx != -1 {
|
||||
afterLoop := src[forIdx:]
|
||||
// Find the end of the loop body (next closing brace at same indentation)
|
||||
// Simplified: check if the query function appears within 500 chars of the for loop
|
||||
loopBody := afterLoop
|
||||
loopBody := src[forIdx:]
|
||||
if len(loopBody) > 1000 {
|
||||
loopBody = loopBody[:1000]
|
||||
}
|
||||
if strings.Contains(loopBody, "GetUpdateStatsFromState") {
|
||||
t.Errorf("[ERROR] [server] [handlers] GetUpdateStatsFromState is inside a per-agent loop.\n"+
|
||||
"F-B1-6: dashboard stats must use a single JOIN query, not a per-agent loop.\n"+
|
||||
"After fix: replace loop with aggregated query using LEFT JOIN.")
|
||||
t.Errorf("[ERROR] [server] [handlers] per-agent query still in loop")
|
||||
}
|
||||
}
|
||||
|
||||
t.Log("[INFO] [server] [handlers] F-B1-6 FIXED: uses aggregate query")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user