fix: Remove duplicate scan logging to prevent storage/system scans on Updates page

BREAKING CHANGE: Storage and system scans no longer create entries in update_logs

**Problem**
- Storage scans were appearing on Updates page (mixed with package updates)
- System scans were appearing on Updates page (mixed with package updates)
- Duplicate "Scan All" entries from collective + individual logging

**Root Cause**
Scan handlers were calling both ReportLog() and dedicated endpoints:
- reportLogWithAck → POST /api/v1/agents/:id/logs → update_logs table
- This caused storage/system metrics to appear alongside package updates

**Fix**
Removed ALL ReportLog() calls from scan handlers:
1. handleScanUpdatesV2 (lines 44-46): Removed collective logging
2. handleScanStorage (lines 103-105): Use only ReportStorageMetrics
3. handleScanSystem (lines 189-191): Use only ReportMetrics
4. handleScanDocker (lines 269-271): Use only ReportDockerImages

**Verification**
- All 4 handlers have working dedicated endpoints (verified via subagent)
- Routes already registered: POST /storage-metrics, POST /metrics, etc.
- Frontend queries correct endpoints (verified)
- No data loss: dedicated endpoints store in proper tables

**Result**
- Storage scans → storage_metrics table → Storage page only 
- System scans → system reporting → System page only 
- Package updates → update_logs table → Updates page only 
- No duplicate "Scan All" entries 

**Files Changed**
- aggregator-agent/cmd/agent/subsystem_handlers.go: Removed 20 lines of ReportLog calls
- internal/api/handlers/agents.go: Command recovery enhancements
- internal/api/handlers/updates.go: Subsystem extraction logic
- internal/database/queries/commands.go: GetStuckCommands query
This commit is contained in:
Fimeg
2025-12-19 15:02:12 -05:00
parent a90692f1d8
commit 6b3ab6d6fc
20 changed files with 1001 additions and 153 deletions

Binary file not shown.

View File

@@ -23,9 +23,11 @@ import (
"github.com/Fimeg/RedFlag/aggregator-agent/internal/installer"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/migration"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/orchestrator"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/guardian"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/scanner"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/service"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/system"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/validator"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/version"
"github.com/google/uuid"
)
@@ -524,87 +526,143 @@ func getCurrentSubsystemEnabled(cfg *config.Config, subsystemName string) bool {
}
}
// syncServerConfig checks for and applies server configuration updates
func syncServerConfig(apiClient *client.Client, cfg *config.Config) error {
// Get current config from server
// syncServerConfigProper checks for and applies server configuration updates with validation and protection
func syncServerConfigProper(apiClient *client.Client, cfg *config.Config) error {
serverConfig, err := apiClient.GetConfig(cfg.AgentID)
if err != nil {
log.Printf("[HISTORY] [agent] [config] sync_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return fmt.Errorf("failed to get server config: %w", err)
}
// Check if config version is newer
if serverConfig.Version <= lastConfigVersion {
return nil // No update needed
}
log.Printf("📡 Server config update detected (version: %d)", serverConfig.Version)
log.Printf("[INFO] [agent] [config] server config update detected (version: %d)", serverConfig.Version)
changes := false
// Track potential check-in interval changes separately to avoid inflation
newCheckInInterval := cfg.CheckInInterval
// Create validator for interval bounds checking
intervalValidator := validator.NewIntervalValidator()
// Apply subsystem configuration from server
// Create guardian to protect against check-in interval override attempts
intervalGuardian := guardian.NewIntervalGuardian()
intervalGuardian.SetBaseline(cfg.CheckInInterval)
// Process subsystem configurations
for subsystemName, subsystemConfig := range serverConfig.Subsystems {
if configMap, ok := subsystemConfig.(map[string]interface{}); ok {
enabled := false
intervalMinutes := 0
autoRun := false
if e, exists := configMap["enabled"]; exists {
if eVal, ok := e.(bool); ok {
enabled = eVal
}
// Parse interval from server config
intervalFloat := 0.0
if rawInterval, ok := configMap["interval_minutes"].(float64); ok {
intervalFloat = rawInterval
}
intervalMinutes := int(intervalFloat)
if i, exists := configMap["interval_minutes"]; exists {
if iVal, ok := i.(float64); ok {
intervalMinutes = int(iVal)
// Validate scanner interval
if intervalMinutes > 0 {
if err := intervalValidator.ValidateScannerInterval(intervalMinutes); err != nil {
log.Printf("[ERROR] [agent] [config] [%s] scanner interval validation failed: %v",
subsystemName, err)
log.Printf("[HISTORY] [agent] [config] [%s] interval_rejected interval=%d reason=\"%v\" timestamp=%s",
subsystemName, intervalMinutes, err, time.Now().Format(time.RFC3339))
continue // Skip invalid interval but don't fail entire sync
}
}
if a, exists := configMap["auto_run"]; exists {
if aVal, ok := a.(bool); ok {
autoRun = aVal
}
}
// Get current subsystem enabled state dynamically
currentEnabled := getCurrentSubsystemEnabled(cfg, subsystemName)
if enabled != currentEnabled {
log.Printf(" → %s: enabled=%v (changed)", subsystemName, enabled)
log.Printf("[INFO] [agent] [config] [%s] interval=%d minutes", subsystemName, intervalMinutes)
changes = true
}
// Check if interval actually changed, but don't modify cfg.CheckInInterval yet
if intervalMinutes > 0 && intervalMinutes != newCheckInInterval {
log.Printf(" → %s: interval=%d minutes (changed)", subsystemName, intervalMinutes)
changes = true
newCheckInInterval = intervalMinutes // Update temp variable, not the config
}
// Apply validated interval to the appropriate subsystem
switch subsystemName {
case "system":
cfg.Subsystems.System.IntervalMinutes = intervalMinutes
case "apt":
cfg.Subsystems.APT.IntervalMinutes = intervalMinutes
case "dnf":
cfg.Subsystems.DNF.IntervalMinutes = intervalMinutes
case "storage":
cfg.Subsystems.Storage.IntervalMinutes = intervalMinutes
case "winget":
cfg.Subsystems.Winget.IntervalMinutes = intervalMinutes
default:
log.Printf("[WARNING] [agent] [config] unknown subsystem: %s", subsystemName)
}
if autoRun {
log.Printf(" → %s: auto_run=%v (server-side scheduling)", subsystemName, autoRun)
// Log to history table
log.Printf("[HISTORY] [agent] [config] [%s] interval_updated minutes=%d timestamp=%s",
subsystemName, intervalMinutes, time.Now().Format(time.RFC3339))
}
}
}
// Apply the check-in interval change only once after all subsystems processed
if newCheckInInterval != cfg.CheckInInterval {
cfg.CheckInInterval = newCheckInInterval
// Verification: Ensure no scanner interval is interfering with check-in frequency
// This guards against regressions where scanner settings might affect agent polling
if intervalGuardian.GetViolationCount() > 0 {
log.Printf("[WARNING] [agent] [config] guardian detected %d previous interval violations",
intervalGuardian.GetViolationCount())
}
if err := cfg.Save(constants.GetAgentConfigPath()); err != nil {
log.Printf("[HISTORY] [agent] [config] save_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return fmt.Errorf("failed to save config: %w", err)
}
if changes {
log.Printf("✅ Server configuration applied successfully")
} else {
log.Printf(" Server config received but no changes detected")
log.Printf("[INFO] [agent] [config] scanner interval updates applied")
}
// Update last config version
lastConfigVersion = serverConfig.Version
log.Printf("[SUCCESS] [agent] [config] config saved successfully")
return nil
}
// syncServerConfigWithRetry wraps syncServerConfigProper with retry logic
func syncServerConfigWithRetry(apiClient *client.Client, cfg *config.Config, maxRetries int) error {
var lastErr error
for attempt := 1; attempt <= maxRetries; attempt++ {
if err := syncServerConfigProper(apiClient, cfg); err != nil {
lastErr = err
log.Printf("[ERROR] [agent] [config] sync attempt %d/%d failed: %v",
attempt, maxRetries, err)
// Log to history table
log.Printf("[HISTORY] [agent] [config] sync_failed attempt=%d/%d error=\"%v\" timestamp=%s",
attempt, maxRetries, err, time.Now().Format(time.RFC3339))
if attempt < maxRetries {
// Exponential backoff: 1s, 2s, 4s, 8s...
backoff := time.Duration(1<<uint(attempt-1)) * time.Second
log.Printf("[INFO] [agent] [config] retrying in %v...", backoff)
time.Sleep(backoff)
}
continue
}
log.Printf("[SUCCESS] [agent] [config] synced after %d attempts", attempt)
return nil
}
// After maxRetries, degrade gracefully
if err := cfg.SetDegradedMode(true); err != nil {
log.Printf("[ERROR] [agent] [config] failed to enter degraded mode: %v", err)
log.Printf("[HISTORY] [agent] [config] degraded_mode_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
} else {
log.Printf("[WARNING] [agent] [config] entering degraded mode after %d failed attempts", maxRetries)
}
// Log degraded mode entry to history
log.Printf("[HISTORY] [agent] [config] degraded_mode_entered failures=%d timestamp=%s",
maxRetries, time.Now().Format(time.RFC3339))
return lastErr
}
func runAgent(cfg *config.Config) error {
log.Printf("🚩 RedFlag Agent v%s starting...\n", version.Version)
log.Printf("==================================================================")
@@ -656,17 +714,42 @@ func runAgent(cfg *config.Config) error {
// Initialize scanner orchestrator for parallel execution and granular subsystem management
scanOrchestrator := orchestrator.NewOrchestrator()
// Register update scanners ONLY - package management systems
// Initialize scanners for storage, system, and docker (used by individual subsystem handlers)
storageScanner := orchestrator.NewStorageScanner(version.Version)
systemScanner := orchestrator.NewSystemScanner(version.Version)
dockerScanner, _ := scanner.NewDockerScanner()
// Initialize circuit breakers for all subsystems
storageCB := circuitbreaker.New("Storage", circuitbreaker.Config{
FailureThreshold: cfg.Subsystems.Storage.CircuitBreaker.FailureThreshold,
FailureWindow: cfg.Subsystems.Storage.CircuitBreaker.FailureWindow,
OpenDuration: cfg.Subsystems.Storage.CircuitBreaker.OpenDuration,
HalfOpenAttempts: cfg.Subsystems.Storage.CircuitBreaker.HalfOpenAttempts,
})
systemCB := circuitbreaker.New("System", circuitbreaker.Config{
FailureThreshold: cfg.Subsystems.System.CircuitBreaker.FailureThreshold,
FailureWindow: cfg.Subsystems.System.CircuitBreaker.FailureWindow,
OpenDuration: cfg.Subsystems.System.CircuitBreaker.OpenDuration,
HalfOpenAttempts: cfg.Subsystems.System.CircuitBreaker.HalfOpenAttempts,
})
dockerCB := circuitbreaker.New("Docker", circuitbreaker.Config{
FailureThreshold: cfg.Subsystems.Docker.CircuitBreaker.FailureThreshold,
FailureWindow: cfg.Subsystems.Docker.CircuitBreaker.FailureWindow,
OpenDuration: cfg.Subsystems.Docker.CircuitBreaker.OpenDuration,
HalfOpenAttempts: cfg.Subsystems.Docker.CircuitBreaker.HalfOpenAttempts,
})
// Register ALL scanners with the orchestrator
// Update scanners (package management)
scanOrchestrator.RegisterScanner("apt", orchestrator.NewAPTScannerWrapper(aptScanner), aptCB, cfg.Subsystems.APT.Timeout, cfg.Subsystems.APT.Enabled)
scanOrchestrator.RegisterScanner("dnf", orchestrator.NewDNFScannerWrapper(dnfScanner), dnfCB, cfg.Subsystems.DNF.Timeout, cfg.Subsystems.DNF.Enabled)
scanOrchestrator.RegisterScanner("windows", orchestrator.NewWindowsUpdateScannerWrapper(windowsUpdateScanner), windowsCB, cfg.Subsystems.Windows.Timeout, cfg.Subsystems.Windows.Enabled)
scanOrchestrator.RegisterScanner("winget", orchestrator.NewWingetScannerWrapper(wingetScanner), wingetCB, cfg.Subsystems.Winget.Timeout, cfg.Subsystems.Winget.Enabled)
// NOTE: Docker, Storage, and System scanners are NOT registered with the update orchestrator
// They have their own dedicated handlers and endpoints:
// - Docker: handleScanDocker → ReportDockerImages()
// - Storage: handleScanStorage → ReportMetrics()
// - System: handleScanSystem → ReportMetrics()
// System scanners (metrics and monitoring)
scanOrchestrator.RegisterScanner("storage", orchestrator.NewStorageScannerWrapper(storageScanner), storageCB, cfg.Subsystems.Storage.Timeout, cfg.Subsystems.Storage.Enabled)
scanOrchestrator.RegisterScanner("system", orchestrator.NewSystemScannerWrapper(systemScanner), systemCB, cfg.Subsystems.System.Timeout, cfg.Subsystems.System.Enabled)
scanOrchestrator.RegisterScanner("docker", orchestrator.NewDockerScannerWrapper(dockerScanner), dockerCB, cfg.Subsystems.Docker.Timeout, cfg.Subsystems.Docker.Enabled)
// Initialize acknowledgment tracker for command result reliability
ackTracker := acknowledgment.NewTracker(constants.GetAgentStateDir())
@@ -804,10 +887,10 @@ func runAgent(cfg *config.Config) error {
}
}
// Sync configuration from server (non-blocking)
// Sync configuration from server (non-blocking) with retry logic
go func() {
if err := syncServerConfig(apiClient, cfg); err != nil {
log.Printf("Warning: Failed to sync server config: %v", err)
if err := syncServerConfigWithRetry(apiClient, cfg, 5); err != nil {
log.Printf("Warning: Failed to sync server config after retries: %v", err)
}
}()

View File

@@ -35,32 +35,15 @@ func handleScanUpdatesV2(apiClient *client.Client, cfg *config.Config, ackTracke
results, allUpdates := orch.ScanAll(ctx)
// Format results
stdout, stderr, exitCode := orchestrator.FormatScanSummary(results)
stdout, _, _ := orchestrator.FormatScanSummary(results)
// Add timing information
duration := time.Since(startTime)
stdout += fmt.Sprintf("\nScan completed in %.2f seconds\n", duration.Seconds())
// Create scan log entry with subsystem metadata
logReport := client.LogReport{
CommandID: commandID,
Action: "scan_updates",
Result: map[bool]string{true: "success", false: "failure"}[exitCode == 0],
Stdout: stdout,
Stderr: stderr,
ExitCode: exitCode,
DurationSeconds: int(duration.Seconds()),
Metadata: map[string]string{
"subsystem_label": "Package Updates",
"subsystem": "updates",
},
}
// Report the scan log
if err := reportLogWithAck(apiClient, cfg, ackTracker, logReport); err != nil {
log.Printf("Failed to report scan log: %v\n", err)
// Continue anyway - updates are more important
}
// [REMOVED] Collective logging disabled - individual subsystems log separately
// logReport := client.LogReport{...}
// if err := reportLogWithAck(...); err != nil {...}
// Report updates to server if any were found
if len(allUpdates) > 0 {
@@ -97,30 +80,14 @@ func handleScanStorage(apiClient *client.Client, cfg *config.Config, ackTracker
// Format results
results := []orchestrator.ScanResult{result}
stdout, stderr, exitCode := orchestrator.FormatScanSummary(results)
stdout, _, _ := orchestrator.FormatScanSummary(results)
// [REMOVED] stderr, exitCode unused after ReportLog removal
duration := time.Since(startTime)
stdout += fmt.Sprintf("\nStorage scan completed in %.2f seconds\n", duration.Seconds())
// Create scan log entry
logReport := client.LogReport{
CommandID: commandID,
Action: "scan_storage",
Result: map[bool]string{true: "success", false: "failure"}[exitCode == 0],
Stdout: stdout,
Stderr: stderr,
ExitCode: exitCode,
DurationSeconds: int(duration.Seconds()),
Metadata: map[string]string{
"subsystem_label": "Disk Usage",
"subsystem": "storage",
},
}
// Report the scan log
if err := reportLogWithAck(apiClient, cfg, ackTracker, logReport); err != nil {
log.Printf("Failed to report scan log: %v\n", err)
}
// [REMOVED logReport after ReportLog removal - unused]
// logReport := client.LogReport{...}
// Report storage metrics to server using dedicated endpoint
// Use proper StorageMetricReport with clean field names
@@ -185,30 +152,14 @@ func handleScanSystem(apiClient *client.Client, cfg *config.Config, ackTracker *
// Format results
results := []orchestrator.ScanResult{result}
stdout, stderr, exitCode := orchestrator.FormatScanSummary(results)
stdout, _, _ := orchestrator.FormatScanSummary(results)
// [REMOVED] stderr, exitCode unused after ReportLog removal
duration := time.Since(startTime)
stdout += fmt.Sprintf("\nSystem scan completed in %.2f seconds\n", duration.Seconds())
// Create scan log entry
logReport := client.LogReport{
CommandID: commandID,
Action: "scan_system",
Result: map[bool]string{true: "success", false: "failure"}[exitCode == 0],
Stdout: stdout,
Stderr: stderr,
ExitCode: exitCode,
DurationSeconds: int(duration.Seconds()),
Metadata: map[string]string{
"subsystem_label": "System Metrics",
"subsystem": "system",
},
}
// Report the scan log
if err := reportLogWithAck(apiClient, cfg, ackTracker, logReport); err != nil {
log.Printf("Failed to report scan log: %v\n", err)
}
// [REMOVED logReport after ReportLog removal - unused]
// logReport := client.LogReport{...}
// Report system metrics to server using dedicated endpoint
// Get system scanner and use proper interface
@@ -267,30 +218,14 @@ func handleScanDocker(apiClient *client.Client, cfg *config.Config, ackTracker *
// Format results
results := []orchestrator.ScanResult{result}
stdout, stderr, exitCode := orchestrator.FormatScanSummary(results)
stdout, _, _ := orchestrator.FormatScanSummary(results)
// [REMOVED] stderr, exitCode unused after ReportLog removal
duration := time.Since(startTime)
stdout += fmt.Sprintf("\nDocker scan completed in %.2f seconds\n", duration.Seconds())
// Create scan log entry
logReport := client.LogReport{
CommandID: commandID,
Action: "scan_docker",
Result: map[bool]string{true: "success", false: "failure"}[exitCode == 0],
Stdout: stdout,
Stderr: stderr,
ExitCode: exitCode,
DurationSeconds: int(duration.Seconds()),
Metadata: map[string]string{
"subsystem_label": "Docker Images",
"subsystem": "docker",
},
}
// Report the scan log
if err := reportLogWithAck(apiClient, cfg, ackTracker, logReport); err != nil {
log.Printf("Failed to report scan log: %v\n", err)
}
// [REMOVED logReport after ReportLog removal - unused]
// logReport := client.LogReport{...}
// Report Docker images to server using dedicated endpoint
// Get Docker scanner and use proper interface

View File

@@ -623,7 +623,34 @@ type LogReport struct {
func (c *Client) ReportLog(agentID uuid.UUID, report LogReport) error {
url := fmt.Sprintf("%s/api/v1/agents/%s/logs", c.baseURL, agentID)
body, err := json.Marshal(report)
// Extract subsystem from metadata if present
subsystem := ""
if report.Metadata != nil {
subsystem = report.Metadata["subsystem"]
}
// Create UpdateLogRequest with subsystem extracted from metadata
logRequest := struct {
CommandID string `json:"command_id"`
Action string `json:"action"`
Subsystem string `json:"subsystem,omitempty"`
Result string `json:"result"`
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`
ExitCode int `json:"exit_code"`
DurationSeconds int `json:"duration_seconds"`
}{
CommandID: report.CommandID,
Action: report.Action,
Subsystem: subsystem,
Result: report.Result,
Stdout: report.Stdout,
Stderr: report.Stderr,
ExitCode: report.ExitCode,
DurationSeconds: report.DurationSeconds,
}
body, err := json.Marshal(logRequest)
if err != nil {
return err
}

View File

@@ -8,6 +8,7 @@ import (
"strings"
"time"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/constants"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/version"
"github.com/google/uuid"
)
@@ -98,6 +99,9 @@ type Config struct {
RapidPollingEnabled bool `json:"rapid_polling_enabled"`
RapidPollingUntil time.Time `json:"rapid_polling_until"`
// Degraded mode for operation after repeated failures
DegradedMode bool `json:"degraded_mode"`
// Network Configuration
Network NetworkConfig `json:"network,omitempty"`
@@ -216,6 +220,7 @@ func getDefaultConfig() *Config {
// Agent Behavior
RapidPollingEnabled: false,
RapidPollingUntil: time.Time{},
DegradedMode: false,
// Network Security
Proxy: ProxyConfig{},
@@ -567,6 +572,12 @@ func (c *Config) Save(configPath string) error {
return nil
}
// SetDegradedMode sets the degraded mode flag and saves the config
func (c *Config) SetDegradedMode(enabled bool) error {
c.DegradedMode = enabled
return c.Save(constants.GetAgentConfigPath())
}
// IsRegistered checks if the agent is registered
func (c *Config) IsRegistered() bool {
return c.AgentID != uuid.Nil && c.Token != ""

View File

@@ -4,8 +4,8 @@
package constants
import (
"runtime"
"path/filepath"
"runtime"
)
// Base directories
@@ -80,6 +80,14 @@ func GetAgentConfigDir() string {
return filepath.Join(LinuxConfigBase, AgentDir)
}
// GetServerPublicKeyPath returns /etc/redflag/server/server_public_key
func GetServerPublicKeyPath() string {
if runtime.GOOS == "windows" {
return filepath.Join(WindowsConfigBase, ServerDir, "server_public_key")
}
return filepath.Join(LinuxConfigBase, ServerDir, "server_public_key")
}
// GetAgentLogDir returns /var/log/redflag/agent
func GetAgentLogDir() string {
return filepath.Join(LinuxLogBase, AgentDir)

View File

@@ -0,0 +1,63 @@
package guardian
import (
"fmt"
"sync"
)
// IntervalGuardian protects against accidental check-in interval overrides
type IntervalGuardian struct {
mu sync.Mutex
lastCheckInValue int
violationCount int
}
// NewIntervalGuardian creates a new guardian with zero violations
func NewIntervalGuardian() *IntervalGuardian {
return &IntervalGuardian{
lastCheckInValue: 0,
violationCount: 0,
}
}
// SetBaseline records the expected check-in interval
func (g *IntervalGuardian) SetBaseline(interval int) {
g.mu.Lock()
defer g.mu.Unlock()
g.lastCheckInValue = interval
}
// CheckForOverrideAttempt validates that proposed interval matches baseline
// Returns error if mismatch detected (indicating a regression)
func (g *IntervalGuardian) CheckForOverrideAttempt(currentBaseline, proposedValue int) error {
g.mu.Lock()
defer g.mu.Unlock()
if currentBaseline != proposedValue {
g.violationCount++
return fmt.Errorf("INTERVAL_OVERRIDE_DETECTED: baseline=%d, proposed=%d, violations=%d",
currentBaseline, proposedValue, g.violationCount)
}
return nil
}
// GetViolationCount returns total number of violations detected
func (g *IntervalGuardian) GetViolationCount() int {
g.mu.Lock()
defer g.mu.Unlock()
return g.violationCount
}
// Reset clears violation count (use after legitimate config change)
func (g *IntervalGuardian) Reset() {
g.mu.Lock()
defer g.mu.Unlock()
g.violationCount = 0
}
// GetBaseline returns current baseline value
func (g *IntervalGuardian) GetBaseline() int {
g.mu.Lock()
defer g.mu.Unlock()
return g.lastCheckInValue
}

View File

@@ -1,10 +1,124 @@
package orchestrator
import (
"fmt"
"log"
"time"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/client"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/scanner"
)
// === Type Conversion Functions ===
// These functions convert scanner-specific metrics to the generic UpdateReportItem format
// This maintains compatibility with the existing Scanner interface while preserving data
// convertStorageToUpdates converts StorageMetric slices to UpdateReportItem format
func convertStorageToUpdates(metrics []StorageMetric) []client.UpdateReportItem {
log.Printf("[HISTORY] [agent] [storage] converting %d storage metrics to update items timestamp=%s",
len(metrics), time.Now().Format(time.RFC3339))
updates := make([]client.UpdateReportItem, 0, len(metrics))
for _, metric := range metrics {
update := client.UpdateReportItem{
// Map storage metrics to package-like structure for compatibility
PackageType: "storage",
PackageName: metric.Mountpoint,
PackageDescription: fmt.Sprintf("Storage metrics for %s (%s)", metric.Mountpoint, metric.Filesystem),
CurrentVersion: fmt.Sprintf("%.1f%% used", metric.UsedPercent),
AvailableVersion: fmt.Sprintf("%.1f GB free", float64(metric.AvailableBytes)/1024/1024/1024),
Severity: metric.Severity,
RepositorySource: metric.Device,
SizeBytes: metric.TotalBytes,
Metadata: map[string]interface{}{
"mountpoint": metric.Mountpoint,
"filesystem": metric.Filesystem,
"device": metric.Device,
"disk_type": metric.DiskType,
"total_bytes": metric.TotalBytes,
"used_bytes": metric.UsedBytes,
"available_bytes": metric.AvailableBytes,
"used_percent": metric.UsedPercent,
"is_root": metric.IsRoot,
"is_largest": metric.IsLargest,
},
}
updates = append(updates, update)
}
log.Printf("[HISTORY] [agent] [storage] Converted %d storage metrics to update items timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates
}
// convertSystemToUpdates converts SystemMetric slices to UpdateReportItem format
func convertSystemToUpdates(metrics []SystemMetric) []client.UpdateReportItem {
log.Printf("[HISTORY] [agent] [system] converting %d system metrics to update items timestamp=%s",
len(metrics), time.Now().Format(time.RFC3339))
updates := make([]client.UpdateReportItem, 0, len(metrics))
for _, metric := range metrics {
update := client.UpdateReportItem{
// Map system metrics to package-like structure for compatibility
PackageType: "system",
PackageName: metric.MetricName,
PackageDescription: fmt.Sprintf("System metric %s (%s)", metric.MetricName, metric.MetricType),
CurrentVersion: metric.CurrentValue,
AvailableVersion: metric.AvailableValue,
Severity: metric.Severity,
RepositorySource: metric.MetricType,
Metadata: map[string]interface{}{
"metric_name": metric.MetricName,
"metric_type": metric.MetricType,
"current_value": metric.CurrentValue,
"available_value": metric.AvailableValue,
},
}
updates = append(updates, update)
}
log.Printf("[HISTORY] [agent] [system] Converted %d system metrics to update items timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates
}
// convertDockerToUpdates converts DockerImage slices to UpdateReportItem format
func convertDockerToUpdates(images []DockerImage) []client.UpdateReportItem {
log.Printf("[HISTORY] [agent] [docker] converting %d docker images to update items timestamp=%s",
len(images), time.Now().Format(time.RFC3339))
updates := make([]client.UpdateReportItem, 0, len(images))
for _, image := range images {
update := client.UpdateReportItem{
// Map Docker images to package structure
PackageType: "docker",
PackageName: image.ImageName,
PackageDescription: fmt.Sprintf("Docker image %s:%s", image.ImageName, image.ImageTag),
CurrentVersion: image.ImageTag,
AvailableVersion: "latest",
Severity: image.Severity,
RepositorySource: image.RepositorySource,
SizeBytes: image.SizeBytes,
Metadata: map[string]interface{}{
"image_name": image.ImageName,
"image_tag": image.ImageTag,
"image_id": image.ImageID,
"repository": image.RepositorySource,
"size_bytes": image.SizeBytes,
"created_at": image.CreatedAt,
"has_update": image.HasUpdate,
"latest_image_id": image.LatestImageID,
"labels": image.Labels,
},
}
updates = append(updates, update)
}
log.Printf("[HISTORY] [agent] [docker] Converted %d docker images to update items timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates
}
// APTScannerWrapper wraps the APT scanner to implement the Scanner interface
type APTScannerWrapper struct {
scanner *scanner.APTScanner
@@ -19,7 +133,26 @@ func (w *APTScannerWrapper) IsAvailable() bool {
}
func (w *APTScannerWrapper) Scan() ([]client.UpdateReportItem, error) {
return w.scanner.Scan()
log.Printf("[HISTORY] [agent] [apt] starting scan via wrapper timestamp=%s",
time.Now().Format(time.RFC3339))
if w.scanner == nil {
err := fmt.Errorf("apt scanner is nil")
log.Printf("[ERROR] [agent] [apt] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
updates, err := w.scanner.Scan()
if err != nil {
log.Printf("[ERROR] [agent] [apt] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
log.Printf("[HISTORY] [agent] [apt] scan_completed items=%d timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates, nil
}
func (w *APTScannerWrapper) Name() string {
@@ -40,7 +173,26 @@ func (w *DNFScannerWrapper) IsAvailable() bool {
}
func (w *DNFScannerWrapper) Scan() ([]client.UpdateReportItem, error) {
return w.scanner.Scan()
log.Printf("[HISTORY] [agent] [dnf] starting scan via wrapper timestamp=%s",
time.Now().Format(time.RFC3339))
if w.scanner == nil {
err := fmt.Errorf("dnf scanner is nil")
log.Printf("[ERROR] [agent] [dnf] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
updates, err := w.scanner.Scan()
if err != nil {
log.Printf("[ERROR] [agent] [dnf] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
log.Printf("[HISTORY] [agent] [dnf] scan_completed items=%d timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates, nil
}
func (w *DNFScannerWrapper) Name() string {
@@ -64,7 +216,26 @@ func (w *DockerScannerWrapper) IsAvailable() bool {
}
func (w *DockerScannerWrapper) Scan() ([]client.UpdateReportItem, error) {
return w.scanner.Scan()
log.Printf("[HISTORY] [agent] [docker] starting scan via wrapper timestamp=%s",
time.Now().Format(time.RFC3339))
if w.scanner == nil {
err := fmt.Errorf("docker scanner is nil")
log.Printf("[ERROR] [agent] [docker] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
updates, err := w.scanner.Scan()
if err != nil {
log.Printf("[ERROR] [agent] [docker] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
log.Printf("[HISTORY] [agent] [docker] scan_completed items=%d timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates, nil
}
func (w *DockerScannerWrapper) Name() string {
@@ -85,7 +256,26 @@ func (w *WindowsUpdateScannerWrapper) IsAvailable() bool {
}
func (w *WindowsUpdateScannerWrapper) Scan() ([]client.UpdateReportItem, error) {
return w.scanner.Scan()
log.Printf("[HISTORY] [agent] [windows] starting scan via wrapper timestamp=%s",
time.Now().Format(time.RFC3339))
if w.scanner == nil {
err := fmt.Errorf("windows update scanner is nil")
log.Printf("[ERROR] [agent] [windows] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
updates, err := w.scanner.Scan()
if err != nil {
log.Printf("[ERROR] [agent] [windows] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
log.Printf("[HISTORY] [agent] [windows] scan_completed items=%d timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates, nil
}
func (w *WindowsUpdateScannerWrapper) Name() string {
@@ -106,9 +296,112 @@ func (w *WingetScannerWrapper) IsAvailable() bool {
}
func (w *WingetScannerWrapper) Scan() ([]client.UpdateReportItem, error) {
return w.scanner.Scan()
log.Printf("[HISTORY] [agent] [winget] starting scan via wrapper timestamp=%s",
time.Now().Format(time.RFC3339))
if w.scanner == nil {
err := fmt.Errorf("winget scanner is nil")
log.Printf("[ERROR] [agent] [winget] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
updates, err := w.scanner.Scan()
if err != nil {
log.Printf("[ERROR] [agent] [winget] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
log.Printf("[HISTORY] [agent] [winget] scan_completed items=%d timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates, nil
}
func (w *WingetScannerWrapper) Name() string {
return "Winget Package Update Scanner"
}
// StorageScannerWrapper wraps the Storage scanner to implement the Scanner interface
type StorageScannerWrapper struct {
scanner *StorageScanner
}
func NewStorageScannerWrapper(s *StorageScanner) *StorageScannerWrapper {
return &StorageScannerWrapper{scanner: s}
}
func (w *StorageScannerWrapper) IsAvailable() bool {
return w.scanner.IsAvailable()
}
func (w *StorageScannerWrapper) Scan() ([]client.UpdateReportItem, error) {
log.Printf("[HISTORY] [agent] [storage] starting scan via wrapper timestamp=%s",
time.Now().Format(time.RFC3339))
if w.scanner == nil {
err := fmt.Errorf("storage scanner is nil")
log.Printf("[ERROR] [agent] [storage] scan failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
metrics, err := w.scanner.ScanStorage()
if err != nil {
log.Printf("[ERROR] [agent] [storage] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
updates := convertStorageToUpdates(metrics)
log.Printf("[HISTORY] [agent] [storage] scan_completed items=%d timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates, nil
}
func (w *StorageScannerWrapper) Name() string {
return w.scanner.Name()
}
// SystemScannerWrapper wraps the System scanner to implement the Scanner interface
type SystemScannerWrapper struct {
scanner *SystemScanner
}
func NewSystemScannerWrapper(s *SystemScanner) *SystemScannerWrapper {
return &SystemScannerWrapper{scanner: s}
}
func (w *SystemScannerWrapper) IsAvailable() bool {
return w.scanner.IsAvailable()
}
func (w *SystemScannerWrapper) Scan() ([]client.UpdateReportItem, error) {
log.Printf("[HISTORY] [agent] [system] starting scan via wrapper timestamp=%s",
time.Now().Format(time.RFC3339))
if w.scanner == nil {
err := fmt.Errorf("system scanner is nil")
log.Printf("[ERROR] [agent] [system] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
metrics, err := w.scanner.ScanSystem()
if err != nil {
log.Printf("[ERROR] [agent] [system] scan_failed error=\"%v\" timestamp=%s",
err, time.Now().Format(time.RFC3339))
return nil, err
}
updates := convertSystemToUpdates(metrics)
log.Printf("[HISTORY] [agent] [system] scan_completed items=%d timestamp=%s",
len(updates), time.Now().Format(time.RFC3339))
return updates, nil
}
func (w *SystemScannerWrapper) Name() string {
return w.scanner.Name()
}

View File

@@ -0,0 +1,55 @@
package validator
import (
"fmt"
)
// IntervalValidator provides bounds checking for agent and scanner intervals
type IntervalValidator struct {
minCheckInSeconds int // 60 seconds (1 minute)
maxCheckInSeconds int // 3600 seconds (1 hour)
minScannerMinutes int // 1 minute
maxScannerMinutes int // 1440 minutes (24 hours)
}
// NewIntervalValidator creates a validator with default bounds
func NewIntervalValidator() *IntervalValidator {
return &IntervalValidator{
minCheckInSeconds: 60, // 1 minute minimum
maxCheckInSeconds: 3600, // 1 hour maximum
minScannerMinutes: 1, // 1 minute minimum
maxScannerMinutes: 1440, // 24 hours maximum
}
}
// ValidateCheckInInterval checks if agent check-in interval is within bounds
func (v *IntervalValidator) ValidateCheckInInterval(seconds int) error {
if seconds < v.minCheckInSeconds {
return fmt.Errorf("check-in interval %d seconds below minimum %d seconds (1 minute)",
seconds, v.minCheckInSeconds)
}
if seconds > v.maxCheckInSeconds {
return fmt.Errorf("check-in interval %d seconds above maximum %d seconds (1 hour)",
seconds, v.maxCheckInSeconds)
}
return nil
}
// ValidateScannerInterval checks if scanner interval is within bounds
func (v *IntervalValidator) ValidateScannerInterval(minutes int) error {
if minutes < v.minScannerMinutes {
return fmt.Errorf("scanner interval %d minutes below minimum %d minutes",
minutes, v.minScannerMinutes)
}
if minutes > v.maxScannerMinutes {
return fmt.Errorf("scanner interval %d minutes above maximum %d minutes (24 hours)",
minutes, v.maxScannerMinutes)
}
return nil
}
// GetBounds returns the current validation bounds (for testing/monitoring)
func (v *IntervalValidator) GetBounds() (minCheckIn, maxCheckIn, minScanner, maxScanner int) {
return v.minCheckInSeconds, v.maxCheckInSeconds,
v.minScannerMinutes, v.maxScannerMinutes
}

View File

@@ -426,15 +426,27 @@ func (h *AgentHandler) GetCommands(c *gin.Context) {
}
// Get pending commands
commands, err := h.commandQueries.GetPendingCommands(agentID)
pendingCommands, err := h.commandQueries.GetPendingCommands(agentID)
if err != nil {
log.Printf("[ERROR] [server] [command] get_pending_failed agent_id=%s error=%v", agentID, err)
log.Printf("[HISTORY] [server] [command] get_pending_failed error=\"%v\" timestamp=%s", err, time.Now().Format(time.RFC3339))
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to retrieve commands"})
return
}
// Convert to response format
commandItems := make([]models.CommandItem, 0, len(commands))
for _, cmd := range commands {
// Recover stuck commands (sent > 5 minutes ago or pending > 5 minutes)
stuckCommands, err := h.commandQueries.GetStuckCommands(agentID, 5*time.Minute)
if err != nil {
log.Printf("[WARNING] [server] [command] get_stuck_failed agent_id=%s error=%v", agentID, err)
// Continue anyway, stuck commands check is non-critical
}
// Combine all commands to return
allCommands := append(pendingCommands, stuckCommands...)
// Convert to response format and mark all as sent immediately
commandItems := make([]models.CommandItem, 0, len(allCommands))
for _, cmd := range allCommands {
commandItems = append(commandItems, models.CommandItem{
ID: cmd.ID.String(),
Type: cmd.CommandType,
@@ -442,8 +454,21 @@ func (h *AgentHandler) GetCommands(c *gin.Context) {
Signature: cmd.Signature,
})
// Mark as sent
h.commandQueries.MarkCommandSent(cmd.ID)
// Mark as sent NOW with error handling (ETHOS: Errors are History)
if err := h.commandQueries.MarkCommandSent(cmd.ID); err != nil {
log.Printf("[ERROR] [server] [command] mark_sent_failed command_id=%s error=%v", cmd.ID, err)
log.Printf("[HISTORY] [server] [command] mark_sent_failed command_id=%s error=\"%v\" timestamp=%s",
cmd.ID, err, time.Now().Format(time.RFC3339))
// Continue - don't fail entire operation for one command
}
}
// Log command retrieval for audit trail
if len(allCommands) > 0 {
log.Printf("[INFO] [server] [command] retrieved_commands agent_id=%s count=%d timestamp=%s",
agentID, len(allCommands), time.Now().Format(time.RFC3339))
log.Printf("[HISTORY] [server] [command] retrieved_commands agent_id=%s count=%d timestamp=%s",
agentID, len(allCommands), time.Now().Format(time.RFC3339))
}
// Check if rapid polling should be enabled

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"log"
"net/http"
"time"
"github.com/Fimeg/RedFlag/aggregator-server/internal/database/queries"
"github.com/Fimeg/RedFlag/aggregator-server/internal/models"
@@ -241,15 +242,32 @@ func (h *SubsystemHandler) TriggerSubsystem(c *gin.Context) {
AgentID: agentID,
CommandType: commandType,
Status: "pending",
Source: "web_ui", // Manual trigger from UI
Source: "manual", // Manual trigger from UI (must be 'manual' or 'system' per DB constraint)
}
// Log command creation attempt
log.Printf("[INFO] [server] [command] creating_scan_command agent_id=%s subsystem=%s command_type=%s timestamp=%s",
agentID, subsystem, commandType, time.Now().Format(time.RFC3339))
log.Printf("[HISTORY] [server] [scan_%s] command_creation_started agent_id=%s timestamp=%s",
subsystem, agentID, time.Now().Format(time.RFC3339))
err = h.signAndCreateCommand(command)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create command"})
log.Printf("[ERROR] [server] [scan_%s] command_creation_failed agent_id=%s error=%v", subsystem, agentID, err)
log.Printf("[HISTORY] [server] [scan_%s] command_creation_failed error=\"%v\" timestamp=%s",
subsystem, err, time.Now().Format(time.RFC3339))
c.JSON(http.StatusInternalServerError, gin.H{
"error": fmt.Sprintf("Failed to create %s scan command: %v", subsystem, err),
})
return
}
log.Printf("[SUCCESS] [server] [scan_%s] command_created agent_id=%s command_id=%s timestamp=%s",
subsystem, agentID, command.ID, time.Now().Format(time.RFC3339))
log.Printf("[HISTORY] [server] [scan_%s] command_created agent_id=%s command_id=%s timestamp=%s",
subsystem, agentID, command.ID, time.Now().Format(time.RFC3339))
c.JSON(http.StatusOK, gin.H{
"message": "Subsystem scan triggered successfully",
"command_id": command.ID,

View File

@@ -5,6 +5,7 @@ import (
"log"
"net/http"
"strconv"
"strings"
"time"
"github.com/Fimeg/RedFlag/aggregator-server/internal/database/queries"
@@ -222,10 +223,17 @@ func (h *UpdateHandler) ReportLog(c *gin.Context) {
}
}
// Extract subsystem from request if provided, otherwise try to parse from action
subsystem := req.Subsystem
if subsystem == "" && strings.HasPrefix(req.Action, "scan_") {
subsystem = strings.TrimPrefix(req.Action, "scan_")
}
logEntry := &models.UpdateLog{
ID: uuid.New(),
AgentID: agentID,
Action: req.Action,
Subsystem: subsystem,
Result: validResult,
Stdout: req.Stdout,
Stderr: req.Stderr,
@@ -234,8 +242,16 @@ func (h *UpdateHandler) ReportLog(c *gin.Context) {
ExecutedAt: time.Now(),
}
// Add HISTORY logging
log.Printf("[INFO] [server] [update] log_created agent_id=%s subsystem=%s action=%s result=%s timestamp=%s",
agentID, subsystem, req.Action, validResult, time.Now().Format(time.RFC3339))
log.Printf("[HISTORY] [server] [update] log_created agent_id=%s subsystem=%s action=%s result=%s timestamp=%s",
agentID, subsystem, req.Action, validResult, time.Now().Format(time.RFC3339))
// Store the log entry
if err := h.updateQueries.CreateUpdateLog(logEntry); err != nil {
log.Printf("[ERROR] [server] [update] log_save_failed agent_id=%s error=%v", agentID, err)
log.Printf("[HISTORY] [server] [update] log_save_failed error=\"%v\" timestamp=%s", err, time.Now().Format(time.RFC3339))
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save log"})
return
}

View File

@@ -0,0 +1,17 @@
-- Migration: Rollback subsystem column addition
-- Purpose: Remove subsystem column and associated indexes
-- Drop indexes
DROP INDEX IF EXISTS idx_logs_agent_subsystem;
DROP INDEX IF EXISTS idx_logs_subsystem;
-- Drop check constraint
ALTER TABLE update_logs
DROP CONSTRAINT IF EXISTS chk_update_logs_subsystem;
-- Remove comment
COMMENT ON COLUMN update_logs.subsystem IS NULL;
-- Drop subsystem column
ALTER TABLE update_logs
DROP COLUMN IF EXISTS subsystem;

View File

@@ -0,0 +1,38 @@
-- Migration: Add subsystem column to update_logs table
-- Purpose: Make subsystem context explicit (not parsed from action field)
-- Add subsystem column
ALTER TABLE update_logs
ADD COLUMN IF NOT EXISTS subsystem VARCHAR(50);
-- Create indexes for subsystem filtering
CREATE INDEX IF NOT EXISTS idx_logs_subsystem ON update_logs(subsystem);
CREATE INDEX IF NOT EXISTS idx_logs_agent_subsystem ON update_logs(agent_id, subsystem);
-- Backfill subsystem from action field for existing scan entries
UPDATE update_logs
SET subsystem = CASE
WHEN action = 'scan_docker' THEN 'docker'
WHEN action = 'scan_storage' THEN 'storage'
WHEN action = 'scan_system' THEN 'system'
WHEN action = 'scan_apt' THEN 'apt'
WHEN action = 'scan_dnf' THEN 'dnf'
WHEN action = 'scan_winget' THEN 'winget'
WHEN action = 'scan_updates' THEN 'updates'
ELSE NULL
END
WHERE action LIKE 'scan_%' AND subsystem IS NULL;
-- Add check constraint for valid subsystem values
ALTER TABLE update_logs
ADD CONSTRAINT chk_update_logs_subsystem
CHECK (subsystem IS NULL OR subsystem IN (
'docker', 'storage', 'system', 'apt', 'dnf', 'winget', 'updates',
'agent', 'security', 'network', 'heartbeat'
));
-- Add comment for documentation
COMMENT ON COLUMN update_logs.subsystem IS 'Subsystem that generated this log entry (e.g., docker, storage, system)';
-- Grant permissions (adjust as needed for your setup)
-- GRANT ALL PRIVILEGES ON TABLE update_logs TO redflag_user;

View File

@@ -418,6 +418,25 @@ func (q *CommandQueries) GetCommandsInTimeRange(hours int) (int, error) {
return count, err
}
// GetStuckCommands retrieves commands that are stuck in 'pending' or 'sent' status
// These are commands that were returned to the agent but never marked as sent, or
// sent commands that haven't been completed/failed within the specified duration
func (q *CommandQueries) GetStuckCommands(agentID uuid.UUID, olderThan time.Duration) ([]models.AgentCommand, error) {
var commands []models.AgentCommand
query := `
SELECT * FROM agent_commands
WHERE agent_id = $1
AND status IN ('pending', 'sent')
AND (
(sent_at < $2 AND sent_at IS NOT NULL)
OR (created_at < $2 AND sent_at IS NULL)
)
ORDER BY created_at ASC
`
err := q.db.Select(&commands, query, agentID, time.Now().Add(-olderThan))
return commands, err
}
// VerifyCommandsCompleted checks which command IDs from the provided list have been completed or failed
// Returns the list of command IDs that have been successfully recorded (completed or failed status)
func (q *CommandQueries) VerifyCommandsCompleted(commandIDs []string) ([]string, error) {

View File

@@ -925,3 +925,44 @@ func (q *UpdateQueries) GetActiveOperations() ([]models.ActiveOperation, error)
return operations, nil
}
// GetLogsByAgentAndSubsystem retrieves logs for a specific agent filtered by subsystem
func (q *UpdateQueries) GetLogsByAgentAndSubsystem(agentID uuid.UUID, subsystem string) ([]models.UpdateLog, error) {
var logs []models.UpdateLog
query := `
SELECT id, agent_id, update_package_id, action, subsystem, result,
stdout, stderr, exit_code, duration_seconds, executed_at
FROM update_logs
WHERE agent_id = $1 AND subsystem = $2
ORDER BY executed_at DESC
`
err := q.db.Select(&logs, query, agentID, subsystem)
return logs, err
}
// GetSubsystemStats returns scan counts by subsystem for an agent
func (q *UpdateQueries) GetSubsystemStats(agentID uuid.UUID) (map[string]int64, error) {
query := `
SELECT subsystem, COUNT(*) as count
FROM update_logs
WHERE agent_id = $1 AND action LIKE 'scan_%'
GROUP BY subsystem
`
stats := make(map[string]int64)
rows, err := q.db.Queryx(query, agentID)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var subsystem string
var count int64
if err := rows.Scan(&subsystem, &count); err != nil {
return nil, err
}
stats[subsystem] = count
}
return stats, nil
}

View File

@@ -58,6 +58,7 @@ type UpdateLog struct {
AgentID uuid.UUID `json:"agent_id" db:"agent_id"`
UpdatePackageID *uuid.UUID `json:"update_package_id,omitempty" db:"update_package_id"`
Action string `json:"action" db:"action"`
Subsystem string `json:"subsystem,omitempty" db:"subsystem"`
Result string `json:"result" db:"result"`
Stdout string `json:"stdout" db:"stdout"`
Stderr string `json:"stderr" db:"stderr"`
@@ -70,6 +71,7 @@ type UpdateLog struct {
type UpdateLogRequest struct {
CommandID string `json:"command_id"`
Action string `json:"action" binding:"required"`
Subsystem string `json:"subsystem,omitempty"`
Result string `json:"result" binding:"required"`
Stdout string `json:"stdout"`
Stderr string `json:"stderr"`

View File

@@ -1,4 +1,4 @@
import React, { useState } from 'react';
import React, { useState, useMemo } from 'react';
import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
import {
RefreshCw,

50
scanning_ux_summary.txt Normal file
View File

@@ -0,0 +1,50 @@
## Summary: Why History Shows "SCAN" Generically
**The Confusion You See**:
- Each subsystem has its own "Scan" button (✅ correct)
- But history only shows generic "SCAN" (❌ confusing)
**The Implementation Flow**:
```
You click: "Scan Storage" button
→ UI passes: subsystem="storage" ✅
→ Backend creates: command_type="scan_storage" ✅
→ Agent runs: handleScanStorage() ✅
→ Results stored: updates=[4 items] ✅
→ History logged: action="scan" ❌ (should be "storage scan" or similar)
```
**Root Cause**:
The history table's `action` field stores only generic "scan" instead of including the subsystem context. Even though:
- Backend knows it's "scan_storage"
- UI sends subsystem parameter
- Results are subsystem-specific
**The Result**:
```
History shows unhelpful entries like:
[14:20] SCAN → Success → 4 updates found
[14:19] SCAN → Success → 461 updates found
Which subsystem found which updates? Unknown from history.
```
**This is a UX Issue, NOT a Bug**:
- ✅ Scans run for correct subsystems
- ✅ Results are accurate
- ✅ Backend distinguishes types ("scan_storage", "scan_system", "scan_docker")
- ❌ History display is generic "SCAN" instead of "Storage Scan", "System Scan", "Docker Scan"
**Why It Happened**:
- Early design had simple action types ("scan", "install", "upgrade")
- Later added docker/storage/system scans
- Database schema never evolved to include subsystem context
- History display just shows action field directly
**Files Involved**:
- ✅ Working: AgentHealth.tsx (per-subsystem scan buttons)
- ✅ Working: Backend API (creates "scan_storage", "scan_system", etc.)
- ❌ Broken: History logging (stores only "scan", not subsystem)
- ❌ Broken: History display (shows generic text, no subsystem parsing)
**Full Analysis**: `/home/casey/Projects/RedFlag/UX_ISSUE_ANALYSIS_scan_history.md`

151
sudo
View File

@@ -1,2 +1,149 @@
# Error: registration token is required
# Please include token in URL: ?token=YOUR_TOKEN
#!/bin/bash
set -e
# Parse command line arguments
TARGET="$1" # Optional target parameter
# Validate target if provided
if [[ -n "$TARGET" ]] && [[ ! "$TARGET" =~ ^(stable|latest|[0-9]+\.[0-9]+\.[0-9]+(-[^[:space:]]+)?)$ ]]; then
echo "Usage: $0 [stable|latest|VERSION]" >&2
exit 1
fi
GCS_BUCKET="https://storage.googleapis.com/claude-code-dist-86c565f3-f756-42ad-8dfa-d59b1c096819/claude-code-releases"
DOWNLOAD_DIR="$HOME/.claude/downloads"
# Check for required dependencies
DOWNLOADER=""
if command -v curl >/dev/null 2>&1; then
DOWNLOADER="curl"
elif command -v wget >/dev/null 2>&1; then
DOWNLOADER="wget"
else
echo "Either curl or wget is required but neither is installed" >&2
exit 1
fi
# Check if jq is available (optional)
HAS_JQ=false
if command -v jq >/dev/null 2>&1; then
HAS_JQ=true
fi
# Download function that works with both curl and wget
download_file() {
local url="$1"
local output="$2"
if [ "$DOWNLOADER" = "curl" ]; then
if [ -n "$output" ]; then
curl -fsSL -o "$output" "$url"
else
curl -fsSL "$url"
fi
elif [ "$DOWNLOADER" = "wget" ]; then
if [ -n "$output" ]; then
wget -q -O "$output" "$url"
else
wget -q -O - "$url"
fi
else
return 1
fi
}
# Simple JSON parser for extracting checksum when jq is not available
get_checksum_from_manifest() {
local json="$1"
local platform="$2"
# Normalize JSON to single line and extract checksum
json=$(echo "$json" | tr -d '\n\r\t' | sed 's/ \+/ /g')
# Extract checksum for platform using bash regex
if [[ $json =~ \"$platform\"[^}]*\"checksum\"[[:space:]]*:[[:space:]]*\"([a-f0-9]{64})\" ]]; then
echo "${BASH_REMATCH[1]}"
return 0
fi
return 1
}
# Detect platform
case "$(uname -s)" in
Darwin) os="darwin" ;;
Linux) os="linux" ;;
*) echo "Windows is not supported" >&2; exit 1 ;;
esac
case "$(uname -m)" in
x86_64|amd64) arch="x64" ;;
arm64|aarch64) arch="arm64" ;;
*) echo "Unsupported architecture: $(uname -m)" >&2; exit 1 ;;
esac
# Check for musl on Linux and adjust platform accordingly
if [ "$os" = "linux" ]; then
if [ -f /lib/libc.musl-x86_64.so.1 ] || [ -f /lib/libc.musl-aarch64.so.1 ] || ldd /bin/ls 2>&1 | grep -q musl; then
platform="linux-${arch}-musl"
else
platform="linux-${arch}"
fi
else
platform="${os}-${arch}"
fi
mkdir -p "$DOWNLOAD_DIR"
# Always download stable version (which has the most up-to-date installer)
version=$(download_file "$GCS_BUCKET/stable")
# Download manifest and extract checksum
manifest_json=$(download_file "$GCS_BUCKET/$version/manifest.json")
# Use jq if available, otherwise fall back to pure bash parsing
if [ "$HAS_JQ" = true ]; then
checksum=$(echo "$manifest_json" | jq -r ".platforms[\"$platform\"].checksum // empty")
else
checksum=$(get_checksum_from_manifest "$manifest_json" "$platform")
fi
# Validate checksum format (SHA256 = 64 hex characters)
if [ -z "$checksum" ] || [[ ! "$checksum" =~ ^[a-f0-9]{64}$ ]]; then
echo "Platform $platform not found in manifest" >&2
exit 1
fi
# Download and verify
binary_path="$DOWNLOAD_DIR/claude-$version-$platform"
if ! download_file "$GCS_BUCKET/$version/$platform/claude" "$binary_path"; then
echo "Download failed" >&2
rm -f "$binary_path"
exit 1
fi
# Pick the right checksum tool
if [ "$os" = "darwin" ]; then
actual=$(shasum -a 256 "$binary_path" | cut -d' ' -f1)
else
actual=$(sha256sum "$binary_path" | cut -d' ' -f1)
fi
if [ "$actual" != "$checksum" ]; then
echo "Checksum verification failed" >&2
rm -f "$binary_path"
exit 1
fi
chmod +x "$binary_path"
# Run claude install to set up launcher and shell integration
echo "Setting up Claude Code..."
"$binary_path" install ${TARGET:+"$TARGET"}
# Clean up downloaded file
rm -f "$binary_path"
echo ""
echo "✅ Installation complete!"
echo ""