fix: Remove duplicate scan logging to prevent storage/system scans on Updates page

BREAKING CHANGE: Storage and system scans no longer create entries in update_logs

**Problem**
- Storage scans were appearing on Updates page (mixed with package updates)
- System scans were appearing on Updates page (mixed with package updates)
- Duplicate "Scan All" entries from collective + individual logging

**Root Cause**
Scan handlers were calling both ReportLog() and dedicated endpoints:
- reportLogWithAck → POST /api/v1/agents/:id/logs → update_logs table
- This caused storage/system metrics to appear alongside package updates

**Fix**
Removed ALL ReportLog() calls from scan handlers:
1. handleScanUpdatesV2 (lines 44-46): Removed collective logging
2. handleScanStorage (lines 103-105): Use only ReportStorageMetrics
3. handleScanSystem (lines 189-191): Use only ReportMetrics
4. handleScanDocker (lines 269-271): Use only ReportDockerImages

**Verification**
- All 4 handlers have working dedicated endpoints (verified via subagent)
- Routes already registered: POST /storage-metrics, POST /metrics, etc.
- Frontend queries correct endpoints (verified)
- No data loss: dedicated endpoints store in proper tables

**Result**
- Storage scans → storage_metrics table → Storage page only 
- System scans → system reporting → System page only 
- Package updates → update_logs table → Updates page only 
- No duplicate "Scan All" entries 

**Files Changed**
- aggregator-agent/cmd/agent/subsystem_handlers.go: Removed 20 lines of ReportLog calls
- internal/api/handlers/agents.go: Command recovery enhancements
- internal/api/handlers/updates.go: Subsystem extraction logic
- internal/database/queries/commands.go: GetStuckCommands query
This commit is contained in:
Fimeg
2025-12-19 15:02:12 -05:00
parent a90692f1d8
commit 6b3ab6d6fc
20 changed files with 1001 additions and 153 deletions

View File

@@ -0,0 +1,17 @@
-- Migration: Rollback subsystem column addition
-- Purpose: Remove subsystem column and associated indexes
-- Drop indexes
DROP INDEX IF EXISTS idx_logs_agent_subsystem;
DROP INDEX IF EXISTS idx_logs_subsystem;
-- Drop check constraint
ALTER TABLE update_logs
DROP CONSTRAINT IF EXISTS chk_update_logs_subsystem;
-- Remove comment
COMMENT ON COLUMN update_logs.subsystem IS NULL;
-- Drop subsystem column
ALTER TABLE update_logs
DROP COLUMN IF EXISTS subsystem;

View File

@@ -0,0 +1,38 @@
-- Migration: Add subsystem column to update_logs table
-- Purpose: Make subsystem context explicit (not parsed from action field)
-- Add subsystem column
ALTER TABLE update_logs
ADD COLUMN IF NOT EXISTS subsystem VARCHAR(50);
-- Create indexes for subsystem filtering
CREATE INDEX IF NOT EXISTS idx_logs_subsystem ON update_logs(subsystem);
CREATE INDEX IF NOT EXISTS idx_logs_agent_subsystem ON update_logs(agent_id, subsystem);
-- Backfill subsystem from action field for existing scan entries
UPDATE update_logs
SET subsystem = CASE
WHEN action = 'scan_docker' THEN 'docker'
WHEN action = 'scan_storage' THEN 'storage'
WHEN action = 'scan_system' THEN 'system'
WHEN action = 'scan_apt' THEN 'apt'
WHEN action = 'scan_dnf' THEN 'dnf'
WHEN action = 'scan_winget' THEN 'winget'
WHEN action = 'scan_updates' THEN 'updates'
ELSE NULL
END
WHERE action LIKE 'scan_%' AND subsystem IS NULL;
-- Add check constraint for valid subsystem values
ALTER TABLE update_logs
ADD CONSTRAINT chk_update_logs_subsystem
CHECK (subsystem IS NULL OR subsystem IN (
'docker', 'storage', 'system', 'apt', 'dnf', 'winget', 'updates',
'agent', 'security', 'network', 'heartbeat'
));
-- Add comment for documentation
COMMENT ON COLUMN update_logs.subsystem IS 'Subsystem that generated this log entry (e.g., docker, storage, system)';
-- Grant permissions (adjust as needed for your setup)
-- GRANT ALL PRIVILEGES ON TABLE update_logs TO redflag_user;

View File

@@ -418,6 +418,25 @@ func (q *CommandQueries) GetCommandsInTimeRange(hours int) (int, error) {
return count, err
}
// GetStuckCommands retrieves commands that are stuck in 'pending' or 'sent' status
// These are commands that were returned to the agent but never marked as sent, or
// sent commands that haven't been completed/failed within the specified duration
func (q *CommandQueries) GetStuckCommands(agentID uuid.UUID, olderThan time.Duration) ([]models.AgentCommand, error) {
var commands []models.AgentCommand
query := `
SELECT * FROM agent_commands
WHERE agent_id = $1
AND status IN ('pending', 'sent')
AND (
(sent_at < $2 AND sent_at IS NOT NULL)
OR (created_at < $2 AND sent_at IS NULL)
)
ORDER BY created_at ASC
`
err := q.db.Select(&commands, query, agentID, time.Now().Add(-olderThan))
return commands, err
}
// VerifyCommandsCompleted checks which command IDs from the provided list have been completed or failed
// Returns the list of command IDs that have been successfully recorded (completed or failed status)
func (q *CommandQueries) VerifyCommandsCompleted(commandIDs []string) ([]string, error) {

View File

@@ -925,3 +925,44 @@ func (q *UpdateQueries) GetActiveOperations() ([]models.ActiveOperation, error)
return operations, nil
}
// GetLogsByAgentAndSubsystem retrieves logs for a specific agent filtered by subsystem
func (q *UpdateQueries) GetLogsByAgentAndSubsystem(agentID uuid.UUID, subsystem string) ([]models.UpdateLog, error) {
var logs []models.UpdateLog
query := `
SELECT id, agent_id, update_package_id, action, subsystem, result,
stdout, stderr, exit_code, duration_seconds, executed_at
FROM update_logs
WHERE agent_id = $1 AND subsystem = $2
ORDER BY executed_at DESC
`
err := q.db.Select(&logs, query, agentID, subsystem)
return logs, err
}
// GetSubsystemStats returns scan counts by subsystem for an agent
func (q *UpdateQueries) GetSubsystemStats(agentID uuid.UUID) (map[string]int64, error) {
query := `
SELECT subsystem, COUNT(*) as count
FROM update_logs
WHERE agent_id = $1 AND action LIKE 'scan_%'
GROUP BY subsystem
`
stats := make(map[string]int64)
rows, err := q.db.Queryx(query, agentID)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var subsystem string
var count int64
if err := rows.Scan(&subsystem, &count); err != nil {
return nil, err
}
stats[subsystem] = count
}
return stats, nil
}