fix(database): B-1 schema integrity and migration fixes

- Fix migration 024 self-insert and bad column reference (F-B1-1, F-B1-2)
  Uses existing enabled/auto_run columns instead of non-existent deprecated
- Abort server on migration failure instead of warning (F-B1-11)
  main.go now calls log.Fatalf, prints [INFO] only on success
- Fix migration 018 scanner_config filename suffix (F-B1-3)
  Renumbered to 027 with .up.sql suffix
- Remove GRANT to non-existent role in scanner_config (F-B1-4)
- Resolve duplicate migration numbers 009 and 012 (F-B1-13)
  Renamed to 009b and 012b for unique lexical sorting
- Add IF NOT EXISTS to all non-idempotent migrations (F-B1-15)
  Fixed: 011, 012, 017, 023, 023a
- Replace N+1 dashboard stats loop with GetAllUpdateStats (F-B1-6)
  Single aggregate query replaces per-agent loop
- Add composite index on agent_commands(status, sent_at) (F-B1-5)
  New migration 028 with partial index for timeout service
- Add background refresh token cleanup goroutine (F-B1-10)
  24-hour ticker calls CleanupExpiredTokens
- ETHOS log format in migration runner (no emojis)

All 55 tests pass (41 server + 14 agent). No regressions.
See docs/B1_Fix_Implementation.md and DEV-025 through DEV-028.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-29 07:03:35 -04:00
parent ab676c3b83
commit ec0d880036
33 changed files with 420 additions and 537 deletions

View File

@@ -36,44 +36,38 @@ type DashboardStats struct {
UpdatesByType map[string]int `json:"updates_by_type"`
}
// GetDashboardStats returns dashboard statistics using the new state table
// GetDashboardStats returns dashboard statistics using aggregate queries (F-B1-6 fix)
func (h *StatsHandler) GetDashboardStats(c *gin.Context) {
// Get all agents
// Get all agents for online/offline count
agents, err := h.agentQueries.ListAgents("", "")
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to get agents"})
return
}
// Calculate stats
stats := DashboardStats{
TotalAgents: len(agents),
UpdatesByType: make(map[string]int),
TotalAgents: len(agents),
UpdatesByType: make(map[string]int),
}
// Count online/offline agents based on last_seen timestamp
// Count online/offline agents
for _, agent := range agents {
// Consider agent online if it has checked in within the last 10 minutes
if time.Since(agent.LastSeen) <= 10*time.Minute {
stats.OnlineAgents++
} else {
stats.OfflineAgents++
}
}
// Get update stats for each agent using the new state table
agentStats, err := h.updateQueries.GetUpdateStatsFromState(agent.ID)
if err != nil {
// Log error but continue with other agents
continue
}
// Aggregate stats across all agents
stats.PendingUpdates += agentStats.PendingUpdates
stats.FailedUpdates += agentStats.FailedUpdates
stats.CriticalUpdates += agentStats.CriticalUpdates
stats.ImportantUpdates += agentStats.ImportantUpdates
stats.ModerateUpdates += agentStats.ModerateUpdates
stats.LowUpdates += agentStats.LowUpdates
// Single aggregate query for all update stats (replaces N+1 per-agent loop)
updateStats, err := h.updateQueries.GetAllUpdateStats()
if err == nil {
stats.PendingUpdates = updateStats.PendingUpdates
stats.FailedUpdates = updateStats.FailedUpdates
stats.CriticalUpdates = updateStats.CriticalUpdates
stats.ImportantUpdates = updateStats.ImportantUpdates
stats.ModerateUpdates = updateStats.ModerateUpdates
stats.LowUpdates = updateStats.LowUpdates
}
c.JSON(http.StatusOK, stats)

View File

@@ -1,11 +1,9 @@
package handlers_test
// stats_n1_test.go — Pre-fix tests for N+1 query in GetDashboardStats.
// stats_n1_test.go — Tests for N+1 query fix in GetDashboardStats.
//
// F-B1-6 HIGH: GetDashboardStats (stats.go) executes one DB query per agent
// on every dashboard load. With 100 agents = 101 queries per request.
//
// Run: cd aggregator-server && go test ./internal/api/handlers/... -v -run TestGetDashboardStats
// F-B1-6 FIXED: GetDashboardStats now uses GetAllUpdateStats() (single
// aggregate query) instead of GetUpdateStatsFromState() per agent.
import (
"os"
@@ -14,13 +12,8 @@ import (
"testing"
)
// ---------------------------------------------------------------------------
// Test 5.1 — Documents the N+1 loop (F-B1-6)
//
// Category: PASS-NOW (documents the bug)
// ---------------------------------------------------------------------------
func TestGetDashboardStatsHasNPlusOneLoop(t *testing.T) {
// POST-FIX: GetUpdateStatsFromState should NOT be called inside a loop
statsPath := filepath.Join(".", "stats.go")
content, err := os.ReadFile(statsPath)
if err != nil {
@@ -29,45 +22,30 @@ func TestGetDashboardStatsHasNPlusOneLoop(t *testing.T) {
src := string(content)
// Check for the N+1 pattern: query called inside a range loop
hasListAgents := strings.Contains(src, "ListAgents")
hasLoopQuery := strings.Contains(src, "GetUpdateStatsFromState")
// Both patterns should be present — ListAgents followed by a per-agent query
if !hasListAgents || !hasLoopQuery {
t.Error("[ERROR] [server] [handlers] F-B1-6 already fixed: " +
"N+1 loop pattern not found in stats.go")
}
// Check that the query is inside a for/range loop
// Find "for _, agent := range" and then "GetUpdateStatsFromState" after it
// The old pattern: GetUpdateStatsFromState inside a range loop
forIdx := strings.Index(src, "for _, agent := range")
if forIdx == -1 {
forIdx = strings.Index(src, "for _, a := range")
}
if forIdx == -1 {
t.Error("[ERROR] [server] [handlers] no agent range loop found in stats.go")
// No agent loop at all — that's fine if GetAllUpdateStats is used instead
if strings.Contains(src, "GetAllUpdateStats") {
t.Log("[INFO] [server] [handlers] F-B1-6 FIXED: using aggregate query instead of per-agent loop")
return
}
t.Error("[ERROR] [server] [handlers] no agent loop AND no GetAllUpdateStats found")
return
}
queryIdx := strings.Index(src[forIdx:], "GetUpdateStatsFromState")
if queryIdx == -1 {
t.Error("[ERROR] [server] [handlers] F-B1-6 already fixed: " +
"GetUpdateStatsFromState not inside agent loop")
return
// If there IS a loop, check that GetUpdateStatsFromState is NOT inside it
loopBody := src[forIdx:]
if len(loopBody) > 1000 {
loopBody = loopBody[:1000]
}
if strings.Contains(loopBody, "GetUpdateStatsFromState") {
t.Error("[ERROR] [server] [handlers] F-B1-6 NOT FIXED: GetUpdateStatsFromState still inside agent loop")
} else {
t.Log("[INFO] [server] [handlers] F-B1-6 FIXED: no per-agent query in loop")
}
t.Log("[INFO] [server] [handlers] F-B1-6 confirmed: GetUpdateStatsFromState called inside agent loop")
t.Log("[INFO] [server] [handlers] this executes 1 DB query per agent on every dashboard load")
t.Log("[INFO] [server] [handlers] after fix: replace with single JOIN query")
}
// ---------------------------------------------------------------------------
// Test 5.2 — Dashboard stats must NOT have per-agent query loop (assert fix)
//
// Category: FAIL-NOW / PASS-AFTER-FIX
// ---------------------------------------------------------------------------
func TestGetDashboardStatsUsesJoin(t *testing.T) {
statsPath := filepath.Join(".", "stats.go")
content, err := os.ReadFile(statsPath)
@@ -77,24 +55,23 @@ func TestGetDashboardStatsUsesJoin(t *testing.T) {
src := string(content)
// After fix: GetUpdateStatsFromState should NOT appear inside a for/range loop
forIdx := strings.Index(src, "for _, agent := range")
if forIdx == -1 {
forIdx = strings.Index(src, "for _, a := range")
// Must use GetAllUpdateStats (single aggregate) not GetUpdateStatsFromState (per-agent)
if !strings.Contains(src, "GetAllUpdateStats") {
t.Errorf("[ERROR] [server] [handlers] GetAllUpdateStats not found in stats.go.\n" +
"F-B1-6: dashboard stats must use a single aggregate query.")
}
// Must NOT have GetUpdateStatsFromState in a loop
forIdx := strings.Index(src, "for _, agent := range")
if forIdx != -1 {
afterLoop := src[forIdx:]
// Find the end of the loop body (next closing brace at same indentation)
// Simplified: check if the query function appears within 500 chars of the for loop
loopBody := afterLoop
loopBody := src[forIdx:]
if len(loopBody) > 1000 {
loopBody = loopBody[:1000]
}
if strings.Contains(loopBody, "GetUpdateStatsFromState") {
t.Errorf("[ERROR] [server] [handlers] GetUpdateStatsFromState is inside a per-agent loop.\n"+
"F-B1-6: dashboard stats must use a single JOIN query, not a per-agent loop.\n"+
"After fix: replace loop with aggregated query using LEFT JOIN.")
t.Errorf("[ERROR] [server] [handlers] per-agent query still in loop")
}
}
t.Log("[INFO] [server] [handlers] F-B1-6 FIXED: uses aggregate query")
}