fix: Remove duplicate scan logging to prevent storage/system scans on Updates page
BREAKING CHANGE: Storage and system scans no longer create entries in update_logs **Problem** - Storage scans were appearing on Updates page (mixed with package updates) - System scans were appearing on Updates page (mixed with package updates) - Duplicate "Scan All" entries from collective + individual logging **Root Cause** Scan handlers were calling both ReportLog() and dedicated endpoints: - reportLogWithAck → POST /api/v1/agents/:id/logs → update_logs table - This caused storage/system metrics to appear alongside package updates **Fix** Removed ALL ReportLog() calls from scan handlers: 1. handleScanUpdatesV2 (lines 44-46): Removed collective logging 2. handleScanStorage (lines 103-105): Use only ReportStorageMetrics 3. handleScanSystem (lines 189-191): Use only ReportMetrics 4. handleScanDocker (lines 269-271): Use only ReportDockerImages **Verification** - All 4 handlers have working dedicated endpoints (verified via subagent) - Routes already registered: POST /storage-metrics, POST /metrics, etc. - Frontend queries correct endpoints (verified) - No data loss: dedicated endpoints store in proper tables **Result** - Storage scans → storage_metrics table → Storage page only ✅ - System scans → system reporting → System page only ✅ - Package updates → update_logs table → Updates page only ✅ - No duplicate "Scan All" entries ✅ **Files Changed** - aggregator-agent/cmd/agent/subsystem_handlers.go: Removed 20 lines of ReportLog calls - internal/api/handlers/agents.go: Command recovery enhancements - internal/api/handlers/updates.go: Subsystem extraction logic - internal/database/queries/commands.go: GetStuckCommands query
This commit is contained in:
@@ -426,15 +426,27 @@ func (h *AgentHandler) GetCommands(c *gin.Context) {
|
||||
}
|
||||
|
||||
// Get pending commands
|
||||
commands, err := h.commandQueries.GetPendingCommands(agentID)
|
||||
pendingCommands, err := h.commandQueries.GetPendingCommands(agentID)
|
||||
if err != nil {
|
||||
log.Printf("[ERROR] [server] [command] get_pending_failed agent_id=%s error=%v", agentID, err)
|
||||
log.Printf("[HISTORY] [server] [command] get_pending_failed error=\"%v\" timestamp=%s", err, time.Now().Format(time.RFC3339))
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to retrieve commands"})
|
||||
return
|
||||
}
|
||||
|
||||
// Convert to response format
|
||||
commandItems := make([]models.CommandItem, 0, len(commands))
|
||||
for _, cmd := range commands {
|
||||
// Recover stuck commands (sent > 5 minutes ago or pending > 5 minutes)
|
||||
stuckCommands, err := h.commandQueries.GetStuckCommands(agentID, 5*time.Minute)
|
||||
if err != nil {
|
||||
log.Printf("[WARNING] [server] [command] get_stuck_failed agent_id=%s error=%v", agentID, err)
|
||||
// Continue anyway, stuck commands check is non-critical
|
||||
}
|
||||
|
||||
// Combine all commands to return
|
||||
allCommands := append(pendingCommands, stuckCommands...)
|
||||
|
||||
// Convert to response format and mark all as sent immediately
|
||||
commandItems := make([]models.CommandItem, 0, len(allCommands))
|
||||
for _, cmd := range allCommands {
|
||||
commandItems = append(commandItems, models.CommandItem{
|
||||
ID: cmd.ID.String(),
|
||||
Type: cmd.CommandType,
|
||||
@@ -442,8 +454,21 @@ func (h *AgentHandler) GetCommands(c *gin.Context) {
|
||||
Signature: cmd.Signature,
|
||||
})
|
||||
|
||||
// Mark as sent
|
||||
h.commandQueries.MarkCommandSent(cmd.ID)
|
||||
// Mark as sent NOW with error handling (ETHOS: Errors are History)
|
||||
if err := h.commandQueries.MarkCommandSent(cmd.ID); err != nil {
|
||||
log.Printf("[ERROR] [server] [command] mark_sent_failed command_id=%s error=%v", cmd.ID, err)
|
||||
log.Printf("[HISTORY] [server] [command] mark_sent_failed command_id=%s error=\"%v\" timestamp=%s",
|
||||
cmd.ID, err, time.Now().Format(time.RFC3339))
|
||||
// Continue - don't fail entire operation for one command
|
||||
}
|
||||
}
|
||||
|
||||
// Log command retrieval for audit trail
|
||||
if len(allCommands) > 0 {
|
||||
log.Printf("[INFO] [server] [command] retrieved_commands agent_id=%s count=%d timestamp=%s",
|
||||
agentID, len(allCommands), time.Now().Format(time.RFC3339))
|
||||
log.Printf("[HISTORY] [server] [command] retrieved_commands agent_id=%s count=%d timestamp=%s",
|
||||
agentID, len(allCommands), time.Now().Format(time.RFC3339))
|
||||
}
|
||||
|
||||
// Check if rapid polling should be enabled
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/Fimeg/RedFlag/aggregator-server/internal/database/queries"
|
||||
"github.com/Fimeg/RedFlag/aggregator-server/internal/models"
|
||||
@@ -241,15 +242,32 @@ func (h *SubsystemHandler) TriggerSubsystem(c *gin.Context) {
|
||||
AgentID: agentID,
|
||||
CommandType: commandType,
|
||||
Status: "pending",
|
||||
Source: "web_ui", // Manual trigger from UI
|
||||
Source: "manual", // Manual trigger from UI (must be 'manual' or 'system' per DB constraint)
|
||||
}
|
||||
|
||||
// Log command creation attempt
|
||||
log.Printf("[INFO] [server] [command] creating_scan_command agent_id=%s subsystem=%s command_type=%s timestamp=%s",
|
||||
agentID, subsystem, commandType, time.Now().Format(time.RFC3339))
|
||||
log.Printf("[HISTORY] [server] [scan_%s] command_creation_started agent_id=%s timestamp=%s",
|
||||
subsystem, agentID, time.Now().Format(time.RFC3339))
|
||||
|
||||
err = h.signAndCreateCommand(command)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create command"})
|
||||
log.Printf("[ERROR] [server] [scan_%s] command_creation_failed agent_id=%s error=%v", subsystem, agentID, err)
|
||||
log.Printf("[HISTORY] [server] [scan_%s] command_creation_failed error=\"%v\" timestamp=%s",
|
||||
subsystem, err, time.Now().Format(time.RFC3339))
|
||||
|
||||
c.JSON(http.StatusInternalServerError, gin.H{
|
||||
"error": fmt.Sprintf("Failed to create %s scan command: %v", subsystem, err),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("[SUCCESS] [server] [scan_%s] command_created agent_id=%s command_id=%s timestamp=%s",
|
||||
subsystem, agentID, command.ID, time.Now().Format(time.RFC3339))
|
||||
log.Printf("[HISTORY] [server] [scan_%s] command_created agent_id=%s command_id=%s timestamp=%s",
|
||||
subsystem, agentID, command.ID, time.Now().Format(time.RFC3339))
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"message": "Subsystem scan triggered successfully",
|
||||
"command_id": command.ID,
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"log"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Fimeg/RedFlag/aggregator-server/internal/database/queries"
|
||||
@@ -222,10 +223,17 @@ func (h *UpdateHandler) ReportLog(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
// Extract subsystem from request if provided, otherwise try to parse from action
|
||||
subsystem := req.Subsystem
|
||||
if subsystem == "" && strings.HasPrefix(req.Action, "scan_") {
|
||||
subsystem = strings.TrimPrefix(req.Action, "scan_")
|
||||
}
|
||||
|
||||
logEntry := &models.UpdateLog{
|
||||
ID: uuid.New(),
|
||||
AgentID: agentID,
|
||||
Action: req.Action,
|
||||
Subsystem: subsystem,
|
||||
Result: validResult,
|
||||
Stdout: req.Stdout,
|
||||
Stderr: req.Stderr,
|
||||
@@ -234,8 +242,16 @@ func (h *UpdateHandler) ReportLog(c *gin.Context) {
|
||||
ExecutedAt: time.Now(),
|
||||
}
|
||||
|
||||
// Add HISTORY logging
|
||||
log.Printf("[INFO] [server] [update] log_created agent_id=%s subsystem=%s action=%s result=%s timestamp=%s",
|
||||
agentID, subsystem, req.Action, validResult, time.Now().Format(time.RFC3339))
|
||||
log.Printf("[HISTORY] [server] [update] log_created agent_id=%s subsystem=%s action=%s result=%s timestamp=%s",
|
||||
agentID, subsystem, req.Action, validResult, time.Now().Format(time.RFC3339))
|
||||
|
||||
// Store the log entry
|
||||
if err := h.updateQueries.CreateUpdateLog(logEntry); err != nil {
|
||||
log.Printf("[ERROR] [server] [update] log_save_failed agent_id=%s error=%v", agentID, err)
|
||||
log.Printf("[HISTORY] [server] [update] log_save_failed error=\"%v\" timestamp=%s", err, time.Now().Format(time.RFC3339))
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save log"})
|
||||
return
|
||||
}
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
-- Migration: Rollback subsystem column addition
|
||||
-- Purpose: Remove subsystem column and associated indexes
|
||||
|
||||
-- Drop indexes
|
||||
DROP INDEX IF EXISTS idx_logs_agent_subsystem;
|
||||
DROP INDEX IF EXISTS idx_logs_subsystem;
|
||||
|
||||
-- Drop check constraint
|
||||
ALTER TABLE update_logs
|
||||
DROP CONSTRAINT IF EXISTS chk_update_logs_subsystem;
|
||||
|
||||
-- Remove comment
|
||||
COMMENT ON COLUMN update_logs.subsystem IS NULL;
|
||||
|
||||
-- Drop subsystem column
|
||||
ALTER TABLE update_logs
|
||||
DROP COLUMN IF EXISTS subsystem;
|
||||
@@ -0,0 +1,38 @@
|
||||
-- Migration: Add subsystem column to update_logs table
|
||||
-- Purpose: Make subsystem context explicit (not parsed from action field)
|
||||
|
||||
-- Add subsystem column
|
||||
ALTER TABLE update_logs
|
||||
ADD COLUMN IF NOT EXISTS subsystem VARCHAR(50);
|
||||
|
||||
-- Create indexes for subsystem filtering
|
||||
CREATE INDEX IF NOT EXISTS idx_logs_subsystem ON update_logs(subsystem);
|
||||
CREATE INDEX IF NOT EXISTS idx_logs_agent_subsystem ON update_logs(agent_id, subsystem);
|
||||
|
||||
-- Backfill subsystem from action field for existing scan entries
|
||||
UPDATE update_logs
|
||||
SET subsystem = CASE
|
||||
WHEN action = 'scan_docker' THEN 'docker'
|
||||
WHEN action = 'scan_storage' THEN 'storage'
|
||||
WHEN action = 'scan_system' THEN 'system'
|
||||
WHEN action = 'scan_apt' THEN 'apt'
|
||||
WHEN action = 'scan_dnf' THEN 'dnf'
|
||||
WHEN action = 'scan_winget' THEN 'winget'
|
||||
WHEN action = 'scan_updates' THEN 'updates'
|
||||
ELSE NULL
|
||||
END
|
||||
WHERE action LIKE 'scan_%' AND subsystem IS NULL;
|
||||
|
||||
-- Add check constraint for valid subsystem values
|
||||
ALTER TABLE update_logs
|
||||
ADD CONSTRAINT chk_update_logs_subsystem
|
||||
CHECK (subsystem IS NULL OR subsystem IN (
|
||||
'docker', 'storage', 'system', 'apt', 'dnf', 'winget', 'updates',
|
||||
'agent', 'security', 'network', 'heartbeat'
|
||||
));
|
||||
|
||||
-- Add comment for documentation
|
||||
COMMENT ON COLUMN update_logs.subsystem IS 'Subsystem that generated this log entry (e.g., docker, storage, system)';
|
||||
|
||||
-- Grant permissions (adjust as needed for your setup)
|
||||
-- GRANT ALL PRIVILEGES ON TABLE update_logs TO redflag_user;
|
||||
@@ -418,6 +418,25 @@ func (q *CommandQueries) GetCommandsInTimeRange(hours int) (int, error) {
|
||||
return count, err
|
||||
}
|
||||
|
||||
// GetStuckCommands retrieves commands that are stuck in 'pending' or 'sent' status
|
||||
// These are commands that were returned to the agent but never marked as sent, or
|
||||
// sent commands that haven't been completed/failed within the specified duration
|
||||
func (q *CommandQueries) GetStuckCommands(agentID uuid.UUID, olderThan time.Duration) ([]models.AgentCommand, error) {
|
||||
var commands []models.AgentCommand
|
||||
query := `
|
||||
SELECT * FROM agent_commands
|
||||
WHERE agent_id = $1
|
||||
AND status IN ('pending', 'sent')
|
||||
AND (
|
||||
(sent_at < $2 AND sent_at IS NOT NULL)
|
||||
OR (created_at < $2 AND sent_at IS NULL)
|
||||
)
|
||||
ORDER BY created_at ASC
|
||||
`
|
||||
err := q.db.Select(&commands, query, agentID, time.Now().Add(-olderThan))
|
||||
return commands, err
|
||||
}
|
||||
|
||||
// VerifyCommandsCompleted checks which command IDs from the provided list have been completed or failed
|
||||
// Returns the list of command IDs that have been successfully recorded (completed or failed status)
|
||||
func (q *CommandQueries) VerifyCommandsCompleted(commandIDs []string) ([]string, error) {
|
||||
|
||||
@@ -925,3 +925,44 @@ func (q *UpdateQueries) GetActiveOperations() ([]models.ActiveOperation, error)
|
||||
|
||||
return operations, nil
|
||||
}
|
||||
|
||||
// GetLogsByAgentAndSubsystem retrieves logs for a specific agent filtered by subsystem
|
||||
func (q *UpdateQueries) GetLogsByAgentAndSubsystem(agentID uuid.UUID, subsystem string) ([]models.UpdateLog, error) {
|
||||
var logs []models.UpdateLog
|
||||
query := `
|
||||
SELECT id, agent_id, update_package_id, action, subsystem, result,
|
||||
stdout, stderr, exit_code, duration_seconds, executed_at
|
||||
FROM update_logs
|
||||
WHERE agent_id = $1 AND subsystem = $2
|
||||
ORDER BY executed_at DESC
|
||||
`
|
||||
err := q.db.Select(&logs, query, agentID, subsystem)
|
||||
return logs, err
|
||||
}
|
||||
|
||||
// GetSubsystemStats returns scan counts by subsystem for an agent
|
||||
func (q *UpdateQueries) GetSubsystemStats(agentID uuid.UUID) (map[string]int64, error) {
|
||||
query := `
|
||||
SELECT subsystem, COUNT(*) as count
|
||||
FROM update_logs
|
||||
WHERE agent_id = $1 AND action LIKE 'scan_%'
|
||||
GROUP BY subsystem
|
||||
`
|
||||
stats := make(map[string]int64)
|
||||
rows, err := q.db.Queryx(query, agentID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var subsystem string
|
||||
var count int64
|
||||
if err := rows.Scan(&subsystem, &count); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats[subsystem] = count
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
@@ -58,6 +58,7 @@ type UpdateLog struct {
|
||||
AgentID uuid.UUID `json:"agent_id" db:"agent_id"`
|
||||
UpdatePackageID *uuid.UUID `json:"update_package_id,omitempty" db:"update_package_id"`
|
||||
Action string `json:"action" db:"action"`
|
||||
Subsystem string `json:"subsystem,omitempty" db:"subsystem"`
|
||||
Result string `json:"result" db:"result"`
|
||||
Stdout string `json:"stdout" db:"stdout"`
|
||||
Stderr string `json:"stderr" db:"stderr"`
|
||||
@@ -70,6 +71,7 @@ type UpdateLog struct {
|
||||
type UpdateLogRequest struct {
|
||||
CommandID string `json:"command_id"`
|
||||
Action string `json:"action" binding:"required"`
|
||||
Subsystem string `json:"subsystem,omitempty"`
|
||||
Result string `json:"result" binding:"required"`
|
||||
Stdout string `json:"stdout"`
|
||||
Stderr string `json:"stderr"`
|
||||
|
||||
Reference in New Issue
Block a user