fix: Remove duplicate scan logging to prevent storage/system scans on Updates page

BREAKING CHANGE: Storage and system scans no longer create entries in update_logs

**Problem**
- Storage scans were appearing on Updates page (mixed with package updates)
- System scans were appearing on Updates page (mixed with package updates)
- Duplicate "Scan All" entries from collective + individual logging

**Root Cause**
Scan handlers were calling both ReportLog() and dedicated endpoints:
- reportLogWithAck → POST /api/v1/agents/:id/logs → update_logs table
- This caused storage/system metrics to appear alongside package updates

**Fix**
Removed ALL ReportLog() calls from scan handlers:
1. handleScanUpdatesV2 (lines 44-46): Removed collective logging
2. handleScanStorage (lines 103-105): Use only ReportStorageMetrics
3. handleScanSystem (lines 189-191): Use only ReportMetrics
4. handleScanDocker (lines 269-271): Use only ReportDockerImages

**Verification**
- All 4 handlers have working dedicated endpoints (verified via subagent)
- Routes already registered: POST /storage-metrics, POST /metrics, etc.
- Frontend queries correct endpoints (verified)
- No data loss: dedicated endpoints store in proper tables

**Result**
- Storage scans → storage_metrics table → Storage page only 
- System scans → system reporting → System page only 
- Package updates → update_logs table → Updates page only 
- No duplicate "Scan All" entries 

**Files Changed**
- aggregator-agent/cmd/agent/subsystem_handlers.go: Removed 20 lines of ReportLog calls
- internal/api/handlers/agents.go: Command recovery enhancements
- internal/api/handlers/updates.go: Subsystem extraction logic
- internal/database/queries/commands.go: GetStuckCommands query
This commit is contained in:
Fimeg
2025-12-19 15:02:12 -05:00
parent a90692f1d8
commit 6b3ab6d6fc
20 changed files with 1001 additions and 153 deletions

View File

@@ -426,15 +426,27 @@ func (h *AgentHandler) GetCommands(c *gin.Context) {
}
// Get pending commands
commands, err := h.commandQueries.GetPendingCommands(agentID)
pendingCommands, err := h.commandQueries.GetPendingCommands(agentID)
if err != nil {
log.Printf("[ERROR] [server] [command] get_pending_failed agent_id=%s error=%v", agentID, err)
log.Printf("[HISTORY] [server] [command] get_pending_failed error=\"%v\" timestamp=%s", err, time.Now().Format(time.RFC3339))
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to retrieve commands"})
return
}
// Convert to response format
commandItems := make([]models.CommandItem, 0, len(commands))
for _, cmd := range commands {
// Recover stuck commands (sent > 5 minutes ago or pending > 5 minutes)
stuckCommands, err := h.commandQueries.GetStuckCommands(agentID, 5*time.Minute)
if err != nil {
log.Printf("[WARNING] [server] [command] get_stuck_failed agent_id=%s error=%v", agentID, err)
// Continue anyway, stuck commands check is non-critical
}
// Combine all commands to return
allCommands := append(pendingCommands, stuckCommands...)
// Convert to response format and mark all as sent immediately
commandItems := make([]models.CommandItem, 0, len(allCommands))
for _, cmd := range allCommands {
commandItems = append(commandItems, models.CommandItem{
ID: cmd.ID.String(),
Type: cmd.CommandType,
@@ -442,8 +454,21 @@ func (h *AgentHandler) GetCommands(c *gin.Context) {
Signature: cmd.Signature,
})
// Mark as sent
h.commandQueries.MarkCommandSent(cmd.ID)
// Mark as sent NOW with error handling (ETHOS: Errors are History)
if err := h.commandQueries.MarkCommandSent(cmd.ID); err != nil {
log.Printf("[ERROR] [server] [command] mark_sent_failed command_id=%s error=%v", cmd.ID, err)
log.Printf("[HISTORY] [server] [command] mark_sent_failed command_id=%s error=\"%v\" timestamp=%s",
cmd.ID, err, time.Now().Format(time.RFC3339))
// Continue - don't fail entire operation for one command
}
}
// Log command retrieval for audit trail
if len(allCommands) > 0 {
log.Printf("[INFO] [server] [command] retrieved_commands agent_id=%s count=%d timestamp=%s",
agentID, len(allCommands), time.Now().Format(time.RFC3339))
log.Printf("[HISTORY] [server] [command] retrieved_commands agent_id=%s count=%d timestamp=%s",
agentID, len(allCommands), time.Now().Format(time.RFC3339))
}
// Check if rapid polling should be enabled