fix: agent acknowledgment recursion and subsystem UI improvements

- Fix recursive call in reportLogWithAck that caused infinite loop
- Add machine binding and security API endpoints
- Enhance AgentScanners component with security status display
- Update scheduler and timeout service reliability
- Remove deprecated install.sh script
- Add subsystem configuration and logging improvements
This commit is contained in:
Fimeg
2025-11-03 21:02:57 -05:00
parent d0f13e5da7
commit 57be3754c6
19 changed files with 665 additions and 409 deletions

View File

@@ -57,8 +57,8 @@ func reportLogWithAck(apiClient *client.Client, cfg *config.Config, ackTracker *
log.Printf("Warning: Failed to save acknowledgment for command %s: %v", logReport.CommandID, err)
}
// Report the log to the server
if err := reportLogWithAck(apiClient, cfg, ackTracker, logReport); err != nil {
// Report the log to the server (FIX: was calling itself recursively!)
if err := apiClient.ReportLog(cfg.AgentID, logReport); err != nil {
// If reporting failed, increment retry count but don't remove from pending
ackTracker.IncrementRetry(logReport.CommandID)
return err
@@ -623,7 +623,12 @@ func runAgent(cfg *config.Config) error {
pendingAcks := ackTracker.GetPending()
if len(pendingAcks) > 0 {
metrics.PendingAcknowledgments = pendingAcks
log.Printf("Including %d pending acknowledgments in check-in: %v", len(pendingAcks), pendingAcks)
} else {
log.Printf("No pending acknowledgments to send")
}
} else {
log.Printf("Metrics is nil - not sending system information or acknowledgments")
}
// Get commands from server (with optional metrics)

View File

@@ -307,10 +307,12 @@ func handleUpdateAgent(apiClient *client.Client, cfg *config.Config, ackTracker
// Validate nonce for replay protection
log.Printf("[tunturi_ed25519] Validating nonce...")
log.Printf("[SECURITY] Nonce validation - UUID: %s, Timestamp: %s", nonceUUIDStr, nonceTimestampStr)
if err := validateNonce(nonceUUIDStr, nonceTimestampStr, nonceSignature); err != nil {
log.Printf("[SECURITY] ✗ Nonce validation FAILED: %v", err)
return fmt.Errorf("[tunturi_ed25519] nonce validation failed: %w", err)
}
log.Printf("[tunturi_ed25519] ✓ Nonce validated")
log.Printf("[SECURITY] ✓ Nonce validated successfully")
// Record start time for duration calculation
updateStartTime := time.Now()

View File

@@ -1,257 +0,0 @@
#!/bin/bash
set -e
# RedFlag Agent Installation Script
# This script installs the RedFlag agent as a systemd service with proper permissions
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
AGENT_USER="redflag-agent"
AGENT_HOME="/var/lib/redflag-agent"
AGENT_BINARY="/usr/local/bin/redflag-agent"
SUDOERS_FILE="/etc/sudoers.d/redflag-agent"
SERVICE_FILE="/etc/systemd/system/redflag-agent.service"
echo "=== RedFlag Agent Installation ==="
echo ""
# Check if running as root
if [ "$EUID" -ne 0 ]; then
echo "ERROR: This script must be run as root (use sudo)"
exit 1
fi
# Function to create user if doesn't exist
create_user() {
if id "$AGENT_USER" &>/dev/null; then
echo "✓ User $AGENT_USER already exists"
else
echo "Creating system user $AGENT_USER..."
useradd -r -s /bin/false -d "$AGENT_HOME" -m "$AGENT_USER"
echo "✓ User $AGENT_USER created"
fi
# Add user to docker group for Docker update scanning
if getent group docker &>/dev/null; then
echo "Adding $AGENT_USER to docker group..."
usermod -aG docker "$AGENT_USER"
echo "✓ User $AGENT_USER added to docker group"
else
echo "⚠ Docker group not found - Docker updates will not be available"
echo " (Install Docker first, then reinstall the agent to enable Docker support)"
fi
}
# Function to build agent binary
build_agent() {
echo "Building agent binary..."
cd "$SCRIPT_DIR"
go build -o redflag-agent ./cmd/agent
echo "✓ Agent binary built"
}
# Function to install agent binary
install_binary() {
echo "Installing agent binary to $AGENT_BINARY..."
cp "$SCRIPT_DIR/redflag-agent" "$AGENT_BINARY"
chmod 755 "$AGENT_BINARY"
chown root:root "$AGENT_BINARY"
echo "✓ Agent binary installed"
# Set SELinux context for binary if SELinux is enabled
if command -v getenforce >/dev/null 2>&1 && [ "$(getenforce)" != "Disabled" ]; then
echo "SELinux detected, setting file context for binary..."
restorecon -v "$AGENT_BINARY" 2>/dev/null || true
echo "✓ SELinux context set for binary"
fi
}
# Function to install sudoers configuration
install_sudoers() {
echo "Installing sudoers configuration..."
cat > "$SUDOERS_FILE" <<'EOF'
# RedFlag Agent minimal sudo permissions
# This file is generated automatically during RedFlag agent installation
# APT package management commands
redflag-agent ALL=(root) NOPASSWD: /usr/bin/apt-get update
redflag-agent ALL=(root) NOPASSWD: /usr/bin/apt-get install -y *
redflag-agent ALL=(root) NOPASSWD: /usr/bin/apt-get upgrade -y *
redflag-agent ALL=(root) NOPASSWD: /usr/bin/apt-get install --dry-run --yes *
# DNF package management commands
redflag-agent ALL=(root) NOPASSWD: /usr/bin/dnf makecache
redflag-agent ALL=(root) NOPASSWD: /usr/bin/dnf install -y *
redflag-agent ALL=(root) NOPASSWD: /usr/bin/dnf upgrade -y *
redflag-agent ALL=(root) NOPASSWD: /usr/bin/dnf install --assumeno --downloadonly *
# Docker operations
redflag-agent ALL=(root) NOPASSWD: /usr/bin/docker pull *
redflag-agent ALL=(root) NOPASSWD: /usr/bin/docker image inspect *
redflag-agent ALL=(root) NOPASSWD: /usr/bin/docker manifest inspect *
EOF
chmod 440 "$SUDOERS_FILE"
# Validate sudoers file
if visudo -c -f "$SUDOERS_FILE"; then
echo "✓ Sudoers configuration installed and validated"
else
echo "ERROR: Sudoers configuration is invalid"
rm -f "$SUDOERS_FILE"
exit 1
fi
}
# Function to install systemd service
install_service() {
echo "Installing systemd service..."
cat > "$SERVICE_FILE" <<EOF
[Unit]
Description=RedFlag Update Agent
After=network.target
[Service]
Type=simple
User=$AGENT_USER
Group=$AGENT_USER
WorkingDirectory=$AGENT_HOME
ExecStart=$AGENT_BINARY
Restart=always
RestartSec=30
# Security hardening
# NoNewPrivileges=true - DISABLED: Prevents sudo from working
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=$AGENT_HOME /var/log /etc/aggregator
PrivateTmp=true
[Install]
WantedBy=multi-user.target
EOF
chmod 644 "$SERVICE_FILE"
echo "✓ Systemd service installed"
}
# Function to start and enable service
start_service() {
echo "Reloading systemd daemon..."
systemctl daemon-reload
# Stop service if running
if systemctl is-active --quiet redflag-agent; then
echo "Stopping existing service..."
systemctl stop redflag-agent
fi
echo "Enabling and starting redflag-agent service..."
systemctl enable redflag-agent
systemctl start redflag-agent
# Wait a moment for service to start
sleep 2
echo "✓ Service started"
}
# Function to show status
show_status() {
echo ""
echo "=== Service Status ==="
systemctl status redflag-agent --no-pager -l
echo ""
echo "=== Recent Logs ==="
journalctl -u redflag-agent -n 20 --no-pager
}
# Function to register agent
register_agent() {
local server_url="${1:-http://localhost:8080}"
echo "Registering agent with server at $server_url..."
# Create config directory
mkdir -p /etc/aggregator
# Clean up old configuration files (prevents conflicts during reinstall/upgrade)
if [ -f /etc/aggregator/.env ]; then
echo "Removing old .env file..."
rm -f /etc/aggregator/.env
echo "✓ Old .env file removed"
fi
if [ -f /etc/aggregator/config.json ]; then
echo "Removing old config.json file..."
rm -f /etc/aggregator/config.json
echo "✓ Old config.json file removed"
fi
# Set SELinux context for config directory if SELinux is enabled
if command -v getenforce >/dev/null 2>&1 && [ "$(getenforce)" != "Disabled" ]; then
echo "Setting SELinux context for config directory..."
restorecon -Rv /etc/aggregator 2>/dev/null || true
echo "✓ SELinux context set for config directory"
fi
# Register agent (run as regular binary, not as service)
if "$AGENT_BINARY" -register -server "$server_url"; then
echo "✓ Agent registered successfully"
else
echo "ERROR: Agent registration failed"
echo "Please ensure the RedFlag server is running at $server_url"
exit 1
fi
}
# Main installation flow
SERVER_URL="${1:-http://localhost:8080}"
echo "Step 1: Creating system user..."
create_user
echo ""
echo "Step 2: Building agent binary..."
build_agent
echo ""
echo "Step 3: Installing agent binary..."
install_binary
echo ""
echo "Step 4: Registering agent with server..."
register_agent "$SERVER_URL"
echo ""
echo "Step 5: Setting config file permissions..."
chown redflag-agent:redflag-agent /etc/aggregator/config.json
chmod 600 /etc/aggregator/config.json
echo ""
echo "Step 6: Installing sudoers configuration..."
install_sudoers
echo ""
echo "Step 7: Installing systemd service..."
install_service
echo ""
echo "Step 8: Starting service..."
start_service
echo ""
echo "=== Installation Complete ==="
echo ""
echo "The RedFlag agent is now installed and running as a systemd service."
echo "Server URL: $SERVER_URL"
echo ""
echo "Useful commands:"
echo " - Check status: sudo systemctl status redflag-agent"
echo " - View logs: sudo journalctl -u redflag-agent -f"
echo " - Restart: sudo systemctl restart redflag-agent"
echo " - Stop: sudo systemctl stop redflag-agent"
echo " - Disable: sudo systemctl disable redflag-agent"
echo ""
echo "Note: To re-register with a different server, edit /etc/aggregator/config.json"
echo ""
show_status

View File

@@ -68,7 +68,7 @@ func GetDefaultSubsystemsConfig() SubsystemsConfig {
},
DNF: SubsystemConfig{
Enabled: true,
Timeout: 45 * time.Second, // DNF can be slower
Timeout: 15 * time.Minute, // TODO: Make scanner timeouts user-adjustable via settings. DNF operations can take a long time on large systems
CircuitBreaker: defaultCB,
},
Docker: SubsystemConfig{

View File

@@ -188,6 +188,9 @@ func main() {
// Initialize system handler
systemHandler := handlers.NewSystemHandler(signingService)
// Initialize security handler
securityHandler := handlers.NewSecurityHandler(signingService, agentQueries, commandQueries)
// Setup router
router := gin.Default()
@@ -242,17 +245,6 @@ func main() {
verificationHandler.VerifySignature(c)
})
agents.DELETE("/:id", agentHandler.UnregisterAgent)
// Subsystem routes
agents.GET("/:id/subsystems", subsystemHandler.GetSubsystems)
agents.GET("/:id/subsystems/:subsystem", subsystemHandler.GetSubsystem)
agents.PATCH("/:id/subsystems/:subsystem", subsystemHandler.UpdateSubsystem)
agents.POST("/:id/subsystems/:subsystem/enable", subsystemHandler.EnableSubsystem)
agents.POST("/:id/subsystems/:subsystem/disable", subsystemHandler.DisableSubsystem)
agents.POST("/:id/subsystems/:subsystem/trigger", subsystemHandler.TriggerSubsystem)
agents.GET("/:id/subsystems/:subsystem/stats", subsystemHandler.GetSubsystemStats)
agents.POST("/:id/subsystems/:subsystem/auto-run", subsystemHandler.SetAutoRun)
agents.POST("/:id/subsystems/:subsystem/interval", subsystemHandler.SetInterval)
}
// Dashboard/Web routes (protected by web auth)
@@ -263,10 +255,21 @@ func main() {
dashboard.GET("/agents", agentHandler.ListAgents)
dashboard.GET("/agents/:id", agentHandler.GetAgent)
dashboard.POST("/agents/:id/scan", agentHandler.TriggerScan)
dashboard.POST("/agents/:id/update", agentHandler.TriggerUpdate)
dashboard.POST("/agents/:id/heartbeat", agentHandler.TriggerHeartbeat)
dashboard.GET("/agents/:id/heartbeat", agentHandler.GetHeartbeatStatus)
dashboard.POST("/agents/:id/reboot", agentHandler.TriggerReboot)
// Subsystem routes for web dashboard
dashboard.GET("/agents/:id/subsystems", subsystemHandler.GetSubsystems)
dashboard.GET("/agents/:id/subsystems/:subsystem", subsystemHandler.GetSubsystem)
dashboard.PATCH("/agents/:id/subsystems/:subsystem", subsystemHandler.UpdateSubsystem)
dashboard.POST("/agents/:id/subsystems/:subsystem/enable", subsystemHandler.EnableSubsystem)
dashboard.POST("/agents/:id/subsystems/:subsystem/disable", subsystemHandler.DisableSubsystem)
dashboard.POST("/agents/:id/subsystems/:subsystem/trigger", subsystemHandler.TriggerSubsystem)
dashboard.GET("/agents/:id/subsystems/:subsystem/stats", subsystemHandler.GetSubsystemStats)
dashboard.POST("/agents/:id/subsystems/:subsystem/auto-run", subsystemHandler.SetAutoRun)
dashboard.POST("/agents/:id/subsystems/:subsystem/interval", subsystemHandler.SetInterval)
dashboard.GET("/updates", updateHandler.ListUpdates)
dashboard.GET("/updates/:id", updateHandler.GetUpdate)
dashboard.GET("/updates/:id/logs", updateHandler.GetUpdateLogs)
@@ -326,6 +329,14 @@ func main() {
admin.GET("/rate-limits/stats", rateLimiter.RateLimit("admin_operations", middleware.KeyByUserID), rateLimitHandler.GetRateLimitStats)
admin.POST("/rate-limits/cleanup", rateLimiter.RateLimit("admin_operations", middleware.KeyByUserID), rateLimitHandler.CleanupRateLimitEntries)
}
// Security Health Check endpoints
dashboard.GET("/security/overview", securityHandler.SecurityOverview)
dashboard.GET("/security/signing", securityHandler.SigningStatus)
dashboard.GET("/security/nonce", securityHandler.NonceValidationStatus)
dashboard.GET("/security/commands", securityHandler.CommandValidationStatus)
dashboard.GET("/security/machine-binding", securityHandler.MachineBindingStatus)
dashboard.GET("/security/metrics", securityHandler.SecurityMetrics)
}
}
@@ -355,7 +366,7 @@ func main() {
// Initialize and start scheduler
schedulerConfig := scheduler.DefaultConfig()
subsystemScheduler := scheduler.NewScheduler(schedulerConfig, agentQueries, commandQueries)
subsystemScheduler := scheduler.NewScheduler(schedulerConfig, agentQueries, commandQueries, subsystemQueries)
// Load subsystems into queue
ctx := context.Background()

View File

@@ -106,7 +106,8 @@ func (h *AgentHandler) RegisterAgent(c *gin.Context) {
// Save to database
if err := h.agentQueries.CreateAgent(agent); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to register agent"})
log.Printf("ERROR: Failed to create agent in database: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to register agent - database error"})
return
}
@@ -163,16 +164,17 @@ func (h *AgentHandler) GetCommands(c *gin.Context) {
// Try to parse optional system metrics from request body
var metrics struct {
CPUPercent float64 `json:"cpu_percent,omitempty"`
MemoryPercent float64 `json:"memory_percent,omitempty"`
MemoryUsedGB float64 `json:"memory_used_gb,omitempty"`
MemoryTotalGB float64 `json:"memory_total_gb,omitempty"`
DiskUsedGB float64 `json:"disk_used_gb,omitempty"`
DiskTotalGB float64 `json:"disk_total_gb,omitempty"`
DiskPercent float64 `json:"disk_percent,omitempty"`
Uptime string `json:"uptime,omitempty"`
Version string `json:"version,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
CPUPercent float64 `json:"cpu_percent,omitempty"`
MemoryPercent float64 `json:"memory_percent,omitempty"`
MemoryUsedGB float64 `json:"memory_used_gb,omitempty"`
MemoryTotalGB float64 `json:"memory_total_gb,omitempty"`
DiskUsedGB float64 `json:"disk_used_gb,omitempty"`
DiskTotalGB float64 `json:"disk_total_gb,omitempty"`
DiskPercent float64 `json:"disk_percent,omitempty"`
Uptime string `json:"uptime,omitempty"`
Version string `json:"version,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
PendingAcknowledgments []string `json:"pending_acknowledgments,omitempty"`
}
// Parse metrics if provided (optional, won't fail if empty)
@@ -449,10 +451,27 @@ func (h *AgentHandler) GetCommands(c *gin.Context) {
}
}
// Process command acknowledgments from agent
var acknowledgedIDs []string
if len(metrics.PendingAcknowledgments) > 0 {
log.Printf("DEBUG: Processing %d pending acknowledgments for agent %s: %v", len(metrics.PendingAcknowledgments), agentID, metrics.PendingAcknowledgments)
// Verify which commands from agent's pending list have been recorded
verified, err := h.commandQueries.VerifyCommandsCompleted(metrics.PendingAcknowledgments)
if err != nil {
log.Printf("Warning: Failed to verify command acknowledgments for agent %s: %v", agentID, err)
} else {
acknowledgedIDs = verified
log.Printf("DEBUG: Verified %d completed commands out of %d pending for agent %s", len(acknowledgedIDs), len(metrics.PendingAcknowledgments), agentID)
if len(acknowledgedIDs) > 0 {
log.Printf("Acknowledged %d command results for agent %s", len(acknowledgedIDs), agentID)
}
}
}
response := models.CommandsResponse{
Commands: commandItems,
RapidPolling: rapidPolling,
AcknowledgedIDs: []string{}, // No acknowledgments in current implementation
AcknowledgedIDs: acknowledgedIDs,
}
c.JSON(http.StatusOK, response)
@@ -465,7 +484,8 @@ func (h *AgentHandler) ListAgents(c *gin.Context) {
agents, err := h.agentQueries.ListAgentsWithLastScan(status, osType)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to list agents"})
log.Printf("ERROR: Failed to list agents: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to list agents - database error"})
return
}

View File

@@ -31,28 +31,24 @@ func (h *DownloadHandler) getServerURL(c *gin.Context) string {
return h.config.Server.PublicURL
}
// Priority 2: Detect from request with TLS/proxy awareness
// Priority 2: Construct API server URL from configuration
scheme := "http"
host := h.config.Server.Host
port := h.config.Server.Port
// Check if TLS is enabled in config
// Use HTTPS if TLS is enabled in config
if h.config.Server.TLS.Enabled {
scheme = "https"
}
// Check if request came through HTTPS (direct or via proxy)
if c.Request.TLS != nil {
scheme = "https"
// For default host (0.0.0.0), use localhost for client connections
if host == "0.0.0.0" {
host = "localhost"
}
// Check X-Forwarded-Proto for reverse proxy setups
if forwardedProto := c.GetHeader("X-Forwarded-Proto"); forwardedProto == "https" {
scheme = "https"
}
// Use the Host header exactly as received (includes port if present)
host := c.GetHeader("X-Forwarded-Host")
if host == "" {
host = c.Request.Host
// Only include port if it's not the default for the protocol
if (scheme == "http" && port != 80) || (scheme == "https" && port != 443) {
return fmt.Sprintf("%s://%s:%d", scheme, host, port)
}
return fmt.Sprintf("%s://%s", scheme, host)
@@ -155,6 +151,7 @@ AGENT_BINARY="/usr/local/bin/redflag-agent"
SUDOERS_FILE="/etc/sudoers.d/redflag-agent"
SERVICE_FILE="/etc/systemd/system/redflag-agent.service"
CONFIG_DIR="/etc/aggregator"
STATE_DIR="/var/lib/aggregator"
echo "=== RedFlag Agent Installation ==="
echo ""
@@ -301,19 +298,24 @@ else
exit 1
fi
# Step 4: Create configuration directory
# Step 4: Create configuration and state directories
echo ""
echo "Step 4: Creating configuration directory..."
echo "Step 4: Creating configuration and state directories..."
mkdir -p "$CONFIG_DIR"
chown "$AGENT_USER:$AGENT_USER" "$CONFIG_DIR"
chmod 755 "$CONFIG_DIR"
echo "✓ Configuration directory created"
# Set SELinux context for config directory if SELinux is enabled
# Create state directory for acknowledgment tracking (v0.1.19+)
mkdir -p "$STATE_DIR"
chown "$AGENT_USER:$AGENT_USER" "$STATE_DIR"
chmod 755 "$STATE_DIR"
echo "✓ Configuration and state directories created"
# Set SELinux context for directories if SELinux is enabled
if command -v getenforce >/dev/null 2>&1 && [ "$(getenforce)" != "Disabled" ]; then
echo "Setting SELinux context for config directory..."
restorecon -Rv "$CONFIG_DIR" 2>/dev/null || true
echo "✓ SELinux context set for config directory"
echo "Setting SELinux context for directories..."
restorecon -Rv "$CONFIG_DIR" "$STATE_DIR" 2>/dev/null || true
echo "✓ SELinux context set for directories"
fi
# Step 5: Install systemd service
@@ -338,7 +340,7 @@ RestartSec=30
# NoNewPrivileges=true - DISABLED: Prevents sudo from working, which agent needs for package management
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=$AGENT_HOME /var/log $CONFIG_DIR
ReadWritePaths=$AGENT_HOME /var/log $CONFIG_DIR $STATE_DIR
PrivateTmp=true
# Logging

View File

@@ -387,9 +387,15 @@ func (h *SetupHandler) ConfigureServer(c *gin.Context) {
fmt.Println("Updating PostgreSQL password from bootstrap to user-provided password...")
bootstrapPassword := "redflag_bootstrap" // This matches our bootstrap .env
if err := updatePostgresPassword(req.DBHost, req.DBPort, req.DBUser, bootstrapPassword, req.DBPassword); err != nil {
fmt.Printf("Warning: Failed to update PostgreSQL password: %v\n", err)
fmt.Println("Will proceed with configuration anyway...")
fmt.Printf("CRITICAL ERROR: Failed to update PostgreSQL password: %v\n", err)
c.JSON(http.StatusInternalServerError, gin.H{
"error": "Failed to update database password. Setup cannot continue.",
"details": err.Error(),
"help": "Ensure PostgreSQL is accessible and the bootstrap password is correct. Check Docker logs for details.",
})
return
}
fmt.Println("PostgreSQL password successfully updated from bootstrap to user-provided password")
// Step 2: Generate configuration content for manual update
fmt.Println("Generating configuration content for manual .env file update...")
@@ -414,6 +420,11 @@ func (h *SetupHandler) ConfigureServer(c *gin.Context) {
// GenerateSigningKeys generates Ed25519 keypair for agent update signing
func (h *SetupHandler) GenerateSigningKeys(c *gin.Context) {
// Prevent caching of generated keys (security critical)
c.Header("Cache-Control", "no-store, no-cache, must-revalidate, private")
c.Header("Pragma", "no-cache")
c.Header("Expires", "0")
// Generate Ed25519 keypair
publicKey, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
@@ -428,6 +439,9 @@ func (h *SetupHandler) GenerateSigningKeys(c *gin.Context) {
// Generate fingerprint (first 16 chars)
fingerprint := publicKeyHex[:16]
// Log key generation for security audit trail (only fingerprint, not full key)
fmt.Printf("Generated new Ed25519 keypair - Fingerprint: %s\n", fingerprint)
c.JSON(http.StatusOK, gin.H{
"public_key": publicKeyHex,
"private_key": privateKeyHex,

View File

@@ -13,6 +13,16 @@ import (
"github.com/google/uuid"
)
// isValidResult checks if the result value complies with the database constraint
func isValidResult(result string) bool {
validResults := map[string]bool{
"success": true,
"failed": true,
"partial": true,
}
return validResults[result]
}
type UpdateHandler struct {
updateQueries *queries.UpdateQueries
agentQueries *queries.AgentQueries
@@ -199,11 +209,22 @@ func (h *UpdateHandler) ReportLog(c *gin.Context) {
return
}
// Validate and map result to comply with database constraint
validResult := req.Result
if !isValidResult(validResult) {
// Map invalid results to valid ones (e.g., "timed_out" -> "failed")
if validResult == "timed_out" || validResult == "timeout" || validResult == "cancelled" {
validResult = "failed"
} else {
validResult = "failed" // Default to failed for any unknown status
}
}
logEntry := &models.UpdateLog{
ID: uuid.New(),
AgentID: agentID,
Action: req.Action,
Result: req.Result,
Result: validResult,
Stdout: req.Stdout,
Stderr: req.Stderr,
ExitCode: req.ExitCode,
@@ -831,8 +852,8 @@ func (h *UpdateHandler) ClearFailedCommands(c *gin.Context) {
// Build the appropriate cleanup query based on parameters
if allFailed {
// Clear ALL failed commands (most aggressive)
count, err = h.commandQueries.ClearAllFailedCommands(olderThanDays)
// Clear ALL failed commands regardless of age (most aggressive)
count, err = h.commandQueries.ClearAllFailedCommandsRegardlessOfAge()
} else if onlyRetried {
// Clear only failed commands that have been retried
count, err = h.commandQueries.ClearRetriedFailedCommands(olderThanDays)

View File

@@ -88,6 +88,14 @@ func Load() (*Config, error) {
cfg.Timezone = getEnv("TIMEZONE", "UTC")
cfg.LatestAgentVersion = getEnv("LATEST_AGENT_VERSION", "0.1.22")
cfg.MinAgentVersion = getEnv("MIN_AGENT_VERSION", "0.1.22")
cfg.SigningPrivateKey = getEnv("REDFLAG_SIGNING_PRIVATE_KEY", "")
// Debug: Log signing key status
if cfg.SigningPrivateKey != "" {
fmt.Printf("[CONFIG] ✅ Ed25519 signing private key configured (%d characters)\n", len(cfg.SigningPrivateKey))
} else {
fmt.Printf("[CONFIG] ❌ No Ed25519 signing private key found in REDFLAG_SIGNING_PRIVATE_KEY\n")
}
// Handle missing secrets
if cfg.Admin.Password == "" || cfg.Admin.JWTSecret == "" || cfg.Database.Password == "" {

View File

@@ -23,14 +23,19 @@ func (q *AgentQueries) CreateAgent(agent *models.Agent) error {
query := `
INSERT INTO agents (
id, hostname, os_type, os_version, os_architecture,
agent_version, last_seen, status, metadata
agent_version, current_version, machine_id, public_key_fingerprint,
last_seen, status, metadata
) VALUES (
:id, :hostname, :os_type, :os_version, :os_architecture,
:agent_version, :last_seen, :status, :metadata
:agent_version, :current_version, :machine_id, :public_key_fingerprint,
:last_seen, :status, :metadata
)
`
_, err := q.db.NamedExec(query, agent)
return err
if err != nil {
return fmt.Errorf("failed to create agent %s (version %s): %w", agent.Hostname, agent.CurrentVersion, err)
}
return nil
}
// GetAgentByID retrieves an agent by ID

View File

@@ -2,6 +2,7 @@ package queries
import (
"fmt"
"strings"
"time"
"github.com/Fimeg/RedFlag/aggregator-server/internal/models"
@@ -31,13 +32,14 @@ func (q *CommandQueries) CreateCommand(cmd *models.AgentCommand) error {
}
// GetPendingCommands retrieves pending commands for an agent
// Only returns 'pending' status - 'sent' commands are handled by timeout service
func (q *CommandQueries) GetPendingCommands(agentID uuid.UUID) ([]models.AgentCommand, error) {
var commands []models.AgentCommand
query := `
SELECT * FROM agent_commands
WHERE agent_id = $1 AND status = 'pending'
ORDER BY created_at ASC
LIMIT 10
LIMIT 100
`
err := q.db.Select(&commands, query, agentID)
return commands, err
@@ -338,6 +340,23 @@ func (q *CommandQueries) ClearAllFailedCommands(days int) (int64, error) {
return result.RowsAffected()
}
// ClearAllFailedCommandsRegardlessOfAge archives ALL failed/timed_out commands regardless of age
// This is used when all_failed=true is passed to truly clear all failed commands
func (q *CommandQueries) ClearAllFailedCommandsRegardlessOfAge() (int64, error) {
query := `
UPDATE agent_commands
SET status = 'archived_failed'
WHERE status IN ('failed', 'timed_out')
`
result, err := q.db.Exec(query)
if err != nil {
return 0, fmt.Errorf("failed to archive all failed commands regardless of age: %w", err)
}
return result.RowsAffected()
}
// CountPendingCommandsForAgent returns the number of pending commands for a specific agent
// Used by scheduler for backpressure detection
func (q *CommandQueries) CountPendingCommandsForAgent(agentID uuid.UUID) (int, error) {
@@ -373,16 +392,30 @@ func (q *CommandQueries) VerifyCommandsCompleted(commandIDs []string) ([]string,
return []string{}, nil
}
// Convert UUIDs back to strings for SQL query
uuidStrs := make([]string, len(uuidIDs))
for i, id := range uuidIDs {
uuidStrs[i] = id.String()
}
// Query for commands that are completed or failed
query := `
// Use ANY with proper array literal for PostgreSQL
placeholders := make([]string, len(uuidStrs))
args := make([]interface{}, len(uuidStrs))
for i, id := range uuidStrs {
placeholders[i] = fmt.Sprintf("$%d", i+1)
args[i] = id
}
query := fmt.Sprintf(`
SELECT id
FROM agent_commands
WHERE id = ANY($1)
AND status IN ('completed', 'failed')
`
WHERE id::text = ANY(%s)
AND status IN ('completed', 'failed', 'timed_out')
`, fmt.Sprintf("ARRAY[%s]", strings.Join(placeholders, ",")))
var completedUUIDs []uuid.UUID
err := q.db.Select(&completedUUIDs, query, uuidIDs)
err := q.db.Select(&completedUUIDs, query, args...)
if err != nil {
return nil, fmt.Errorf("failed to verify command completion: %w", err)
}

View File

@@ -45,6 +45,11 @@ type AgentWithLastScan struct {
CurrentVersion string `json:"current_version" db:"current_version"` // Current running version
UpdateAvailable bool `json:"update_available" db:"update_available"` // Whether update is available
LastVersionCheck time.Time `json:"last_version_check" db:"last_version_check"` // Last time version was checked
MachineID *string `json:"machine_id,omitempty" db:"machine_id"` // Unique machine identifier
PublicKeyFingerprint *string `json:"public_key_fingerprint,omitempty" db:"public_key_fingerprint"` // Public key fingerprint
IsUpdating bool `json:"is_updating" db:"is_updating"` // Whether agent is currently updating
UpdatingToVersion *string `json:"updating_to_version,omitempty" db:"updating_to_version"` // Target version for ongoing update
UpdateInitiatedAt *time.Time `json:"update_initiated_at,omitempty" db:"update_initiated_at"` // When update process started
LastSeen time.Time `json:"last_seen" db:"last_seen"`
Status string `json:"status" db:"status"`
Metadata JSONB `json:"metadata" db:"metadata"`

View File

@@ -53,8 +53,9 @@ type Scheduler struct {
queue *PriorityQueue
// Database queries
agentQueries *queries.AgentQueries
commandQueries *queries.CommandQueries
agentQueries *queries.AgentQueries
commandQueries *queries.CommandQueries
subsystemQueries *queries.SubsystemQueries
// Worker pool
jobChan chan *SubsystemJob
@@ -88,19 +89,20 @@ type Stats struct {
}
// NewScheduler creates a new scheduler instance
func NewScheduler(config Config, agentQueries *queries.AgentQueries, commandQueries *queries.CommandQueries) *Scheduler {
func NewScheduler(config Config, agentQueries *queries.AgentQueries, commandQueries *queries.CommandQueries, subsystemQueries *queries.SubsystemQueries) *Scheduler {
ctx, cancel := context.WithCancel(context.Background())
s := &Scheduler{
config: config,
queue: NewPriorityQueue(),
agentQueries: agentQueries,
commandQueries: commandQueries,
jobChan: make(chan *SubsystemJob, 1000), // Buffer 1000 jobs
workers: make([]*worker, config.NumWorkers),
shutdown: make(chan struct{}),
ctx: ctx,
cancel: cancel,
config: config,
queue: NewPriorityQueue(),
agentQueries: agentQueries,
commandQueries: commandQueries,
subsystemQueries: subsystemQueries,
jobChan: make(chan *SubsystemJob, 1000), // Buffer 1000 jobs
workers: make([]*worker, config.NumWorkers),
shutdown: make(chan struct{}),
ctx: ctx,
cancel: cancel,
}
// Initialize rate limiter if configured
@@ -130,16 +132,6 @@ func (s *Scheduler) LoadSubsystems(ctx context.Context) error {
return fmt.Errorf("failed to get agents: %w", err)
}
// For now, we'll create default subsystems for each agent
// In full implementation, this would read from agent_subsystems table
subsystems := []string{"updates", "storage", "system", "docker"}
intervals := map[string]int{
"updates": 15, // 15 minutes
"storage": 15,
"system": 30,
"docker": 15,
}
loaded := 0
for _, agent := range agents {
// Skip offline agents (haven't checked in for 10+ minutes)
@@ -147,28 +139,70 @@ func (s *Scheduler) LoadSubsystems(ctx context.Context) error {
continue
}
for _, subsystem := range subsystems {
// TODO: Check agent metadata for subsystem enablement
// For now, assume all subsystems are enabled
// Get subsystems from database (respect user settings)
dbSubsystems, err := s.subsystemQueries.GetSubsystems(agent.ID)
if err != nil {
log.Printf("[Scheduler] Failed to get subsystems for agent %s: %v", agent.Hostname, err)
continue
}
job := &SubsystemJob{
AgentID: agent.ID,
AgentHostname: agent.Hostname,
Subsystem: subsystem,
IntervalMinutes: intervals[subsystem],
NextRunAt: time.Now().Add(time.Duration(intervals[subsystem]) * time.Minute),
Enabled: true,
// Create jobs only for enabled subsystems with auto_run=true
for _, dbSub := range dbSubsystems {
if dbSub.Enabled && dbSub.AutoRun {
// Use database interval, fallback to default
intervalMinutes := dbSub.IntervalMinutes
if intervalMinutes <= 0 {
intervalMinutes = s.getDefaultInterval(dbSub.Subsystem)
}
var nextRun time.Time
if dbSub.NextRunAt != nil {
nextRun = *dbSub.NextRunAt
} else {
// If no next run is set, schedule it with default interval
nextRun = time.Now().Add(time.Duration(intervalMinutes) * time.Minute)
}
job := &SubsystemJob{
AgentID: agent.ID,
AgentHostname: agent.Hostname,
Subsystem: dbSub.Subsystem,
IntervalMinutes: intervalMinutes,
NextRunAt: nextRun,
Enabled: dbSub.Enabled,
}
s.queue.Push(job)
loaded++
}
s.queue.Push(job)
loaded++
}
}
log.Printf("[Scheduler] Loaded %d subsystem jobs for %d agents\n", loaded, len(agents))
log.Printf("[Scheduler] Loaded %d subsystem jobs for %d agents (respecting database settings)\n", loaded, len(agents))
return nil
}
// getDefaultInterval returns default interval minutes for a subsystem
// TODO: These intervals need to correlate with agent health scanning settings
// Each subsystem should be variable based on user-configurable agent health policies
func (s *Scheduler) getDefaultInterval(subsystem string) int {
defaults := map[string]int{
"apt": 30, // 30 minutes
"dnf": 240, // 4 hours
"docker": 120, // 2 hours
"storage": 360, // 6 hours
"windows": 480, // 8 hours
"winget": 360, // 6 hours
"updates": 15, // 15 minutes
"system": 30, // 30 minutes
}
if interval, exists := defaults[subsystem]; exists {
return interval
}
return 30 // Default fallback
}
// Start begins the scheduler main loop and workers
func (s *Scheduler) Start() error {
log.Printf("[Scheduler] Starting with %d workers, check interval %v\n",

View File

@@ -9,7 +9,7 @@ import (
func TestScheduler_NewScheduler(t *testing.T) {
config := DefaultConfig()
s := NewScheduler(config, nil, nil)
s := NewScheduler(config, nil, nil, nil)
if s == nil {
t.Fatal("NewScheduler returned nil")
@@ -58,7 +58,7 @@ func TestScheduler_DefaultConfig(t *testing.T) {
func TestScheduler_QueueIntegration(t *testing.T) {
config := DefaultConfig()
s := NewScheduler(config, nil, nil)
s := NewScheduler(config, nil, nil, nil)
// Add jobs to queue
agent1 := uuid.New()
@@ -96,7 +96,7 @@ func TestScheduler_QueueIntegration(t *testing.T) {
func TestScheduler_GetStats(t *testing.T) {
config := DefaultConfig()
s := NewScheduler(config, nil, nil)
s := NewScheduler(config, nil, nil, nil)
// Initial stats should be zero
stats := s.GetStats()
@@ -145,7 +145,7 @@ func TestScheduler_StartStop(t *testing.T) {
RateLimitPerSecond: 0, // Disable rate limiting for test
}
s := NewScheduler(config, nil, nil)
s := NewScheduler(config, nil, nil, nil)
// Start scheduler
err := s.Start()
@@ -167,7 +167,7 @@ func TestScheduler_StartStop(t *testing.T) {
func TestScheduler_ProcessQueueEmpty(t *testing.T) {
config := DefaultConfig()
s := NewScheduler(config, nil, nil)
s := NewScheduler(config, nil, nil, nil)
// Process empty queue should not panic
s.processQueue()
@@ -188,7 +188,7 @@ func TestScheduler_ProcessQueueWithJobs(t *testing.T) {
RateLimitPerSecond: 0, // Disable for test
}
s := NewScheduler(config, nil, nil)
s := NewScheduler(config, nil, nil, nil)
// Add jobs that are due now
for i := 0; i < 5; i++ {
@@ -229,7 +229,7 @@ func TestScheduler_RateLimiterRefill(t *testing.T) {
RateLimitPerSecond: 10, // 10 tokens per second
}
s := NewScheduler(config, nil, nil)
s := NewScheduler(config, nil, nil, nil)
if s.rateLimiter == nil {
t.Fatal("rate limiter not initialized")
@@ -264,7 +264,7 @@ func TestScheduler_RateLimiterRefill(t *testing.T) {
func TestScheduler_ConcurrentQueueAccess(t *testing.T) {
config := DefaultConfig()
s := NewScheduler(config, nil, nil)
s := NewScheduler(config, nil, nil, nil)
done := make(chan bool)
@@ -303,7 +303,7 @@ func TestScheduler_ConcurrentQueueAccess(t *testing.T) {
func BenchmarkScheduler_ProcessQueue(b *testing.B) {
config := DefaultConfig()
s := NewScheduler(config, nil, nil)
s := NewScheduler(config, nil, nil, nil)
// Pre-fill queue with jobs
for i := 0; i < 1000; i++ {

View File

@@ -12,11 +12,12 @@ import (
// TimeoutService handles timeout management for long-running operations
type TimeoutService struct {
commandQueries *queries.CommandQueries
updateQueries *queries.UpdateQueries
ticker *time.Ticker
stopChan chan bool
timeoutDuration time.Duration
commandQueries *queries.CommandQueries
updateQueries *queries.UpdateQueries
ticker *time.Ticker
stopChan chan bool
sentTimeout time.Duration // For commands already sent to agents
pendingTimeout time.Duration // For commands stuck in queue
}
// NewTimeoutService creates a new timeout service
@@ -24,14 +25,16 @@ func NewTimeoutService(cq *queries.CommandQueries, uq *queries.UpdateQueries) *T
return &TimeoutService{
commandQueries: cq,
updateQueries: uq,
timeoutDuration: 2 * time.Hour, // 2 hours timeout - allows for system upgrades and large operations
sentTimeout: 2 * time.Hour, // 2 hours for commands already sent to agents
pendingTimeout: 30 * time.Minute, // 30 minutes for commands stuck in queue
// TODO: Make these timeout durations user-adjustable in settings
stopChan: make(chan bool),
}
}
// Start begins the timeout monitoring service
func (ts *TimeoutService) Start() {
log.Printf("Starting timeout service with %v timeout duration", ts.timeoutDuration)
log.Printf("Starting timeout service with %v sent timeout, %v pending timeout", ts.sentTimeout, ts.pendingTimeout)
// Create a ticker that runs every 5 minutes
ts.ticker = time.NewTicker(5 * time.Minute)
@@ -59,25 +62,41 @@ func (ts *TimeoutService) Stop() {
func (ts *TimeoutService) checkForTimeouts() {
log.Println("Checking for timed out operations...")
// Get all commands that are in 'sent' status
commands, err := ts.commandQueries.GetCommandsByStatus(models.CommandStatusSent)
if err != nil {
log.Printf("Error getting sent commands: %v", err)
return
}
timeoutThreshold := time.Now().Add(-ts.timeoutDuration)
sentTimeoutThreshold := time.Now().Add(-ts.sentTimeout)
pendingTimeoutThreshold := time.Now().Add(-ts.pendingTimeout)
timedOutCommands := make([]models.AgentCommand, 0)
for _, command := range commands {
// Check if command has been sent and is older than timeout threshold
if command.SentAt != nil && command.SentAt.Before(timeoutThreshold) {
timedOutCommands = append(timedOutCommands, command)
// Check 'sent' commands (traditional timeout - 2 hours)
sentCommands, err := ts.commandQueries.GetCommandsByStatus(models.CommandStatusSent)
if err != nil {
log.Printf("Error getting sent commands: %v", err)
} else {
for _, command := range sentCommands {
// Check if command has been sent and is older than sent timeout threshold
if command.SentAt != nil && command.SentAt.Before(sentTimeoutThreshold) {
timedOutCommands = append(timedOutCommands, command)
}
}
}
// Check 'pending' commands (stuck in queue timeout - 30 minutes)
pendingCommands, err := ts.commandQueries.GetCommandsByStatus(models.CommandStatusPending)
if err != nil {
log.Printf("Error getting pending commands: %v", err)
} else {
for _, command := range pendingCommands {
// Check if command has been pending longer than pending timeout threshold
if command.CreatedAt.Before(pendingTimeoutThreshold) {
timedOutCommands = append(timedOutCommands, command)
log.Printf("Found stuck pending command %s (type: %s, created: %s, age: %v)",
command.ID, command.CommandType, command.CreatedAt.Format(time.RFC3339), time.Since(command.CreatedAt))
}
}
}
if len(timedOutCommands) > 0 {
log.Printf("Found %d timed out commands", len(timedOutCommands))
log.Printf("Found %d timed out commands (%d sent >2h, %d stuck pending >30m)",
len(timedOutCommands), len(sentCommands), len(pendingCommands))
for _, command := range timedOutCommands {
if err := ts.timeoutCommand(&command); err != nil {
@@ -91,6 +110,14 @@ func (ts *TimeoutService) checkForTimeouts() {
// timeoutCommand marks a specific command as timed out and updates related entities
func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
// Determine which timeout duration was applied
var appliedTimeout time.Duration
if command.Status == models.CommandStatusSent {
appliedTimeout = ts.sentTimeout
} else {
appliedTimeout = ts.pendingTimeout
}
log.Printf("Timing out command %s (type: %s, agent: %s)",
command.ID, command.CommandType, command.AgentID)
@@ -103,7 +130,7 @@ func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
result := models.JSONB{
"error": "operation timed out",
"timeout_at": time.Now(),
"duration": ts.timeoutDuration.String(),
"duration": appliedTimeout.String(),
"command_id": command.ID.String(),
}
@@ -112,7 +139,7 @@ func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
}
// Update related update package status if applicable
if err := ts.updateRelatedPackageStatus(command); err != nil {
if err := ts.updateRelatedPackageStatus(command, appliedTimeout); err != nil {
log.Printf("Warning: failed to update related package status: %v", err)
// Don't return error here as the main timeout operation succeeded
}
@@ -123,11 +150,11 @@ func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
AgentID: command.AgentID,
UpdatePackageID: ts.extractUpdatePackageID(command),
Action: command.CommandType,
Result: "timed_out",
Result: "failed", // Use 'failed' to comply with database constraint
Stdout: "",
Stderr: fmt.Sprintf("Command %s timed out after %v", command.CommandType, ts.timeoutDuration),
Stderr: fmt.Sprintf("Command %s timed out after %v (timeout_id: %s)", command.CommandType, appliedTimeout, command.ID),
ExitCode: 124, // Standard timeout exit code
DurationSeconds: int(ts.timeoutDuration.Seconds()),
DurationSeconds: int(appliedTimeout.Seconds()),
ExecutedAt: time.Now(),
}
@@ -141,7 +168,7 @@ func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
}
// updateRelatedPackageStatus updates the status of related update packages when a command times out
func (ts *TimeoutService) updateRelatedPackageStatus(command *models.AgentCommand) error {
func (ts *TimeoutService) updateRelatedPackageStatus(command *models.AgentCommand, appliedTimeout time.Duration) error {
// Extract update_id from command params if it exists
_, ok := command.Params["update_id"].(string)
if !ok {
@@ -153,7 +180,7 @@ func (ts *TimeoutService) updateRelatedPackageStatus(command *models.AgentComman
metadata := models.JSONB{
"timeout": true,
"timeout_at": time.Now(),
"timeout_duration": ts.timeoutDuration.String(),
"timeout_duration": appliedTimeout.String(),
"command_id": command.ID.String(),
"failure_reason": "operation timed out",
}
@@ -196,7 +223,7 @@ func (ts *TimeoutService) GetTimeoutStatus() (map[string]interface{}, error) {
}
// Count commands approaching timeout (within 5 minutes of timeout)
timeoutThreshold := time.Now().Add(-ts.timeoutDuration + 5*time.Minute)
timeoutThreshold := time.Now().Add(-ts.sentTimeout + 5*time.Minute)
approachingTimeout := 0
for _, command := range activeCommands {
if command.SentAt != nil && command.SentAt.Before(timeoutThreshold) {
@@ -205,16 +232,30 @@ func (ts *TimeoutService) GetTimeoutStatus() (map[string]interface{}, error) {
}
return map[string]interface{}{
"total_timed_out": len(timedOutCommands),
"total_active": len(activeCommands),
"approaching_timeout": approachingTimeout,
"timeout_duration": ts.timeoutDuration.String(),
"last_check": time.Now(),
"total_timed_out": len(timedOutCommands),
"total_active": len(activeCommands),
"approaching_timeout": approachingTimeout,
"sent_timeout_duration": ts.sentTimeout.String(),
"pending_timeout_duration": ts.pendingTimeout.String(),
"last_check": time.Now(),
}, nil
}
// SetTimeoutDuration allows changing the timeout duration
// SetTimeoutDuration allows changing the timeout duration for sent commands
// TODO: This should be deprecated in favor of SetSentTimeout and SetPendingTimeout
func (ts *TimeoutService) SetTimeoutDuration(duration time.Duration) {
ts.timeoutDuration = duration
log.Printf("Timeout duration updated to %v", duration)
ts.sentTimeout = duration
log.Printf("Sent timeout duration updated to %v", duration)
}
// SetSentTimeout allows changing the timeout duration for sent commands
func (ts *TimeoutService) SetSentTimeout(duration time.Duration) {
ts.sentTimeout = duration
log.Printf("Sent timeout duration updated to %v", duration)
}
// SetPendingTimeout allows changing the timeout duration for pending commands
func (ts *TimeoutService) SetPendingTimeout(duration time.Duration) {
ts.pendingTimeout = duration
log.Printf("Pending timeout duration updated to %v", duration)
}

View File

@@ -8,9 +8,14 @@ import {
Cpu,
Container,
Package,
Shield,
Fingerprint,
CheckCircle,
AlertCircle,
XCircle,
} from 'lucide-react';
import { formatRelativeTime } from '@/lib/utils';
import { agentApi } from '@/lib/api';
import { agentApi, securityApi } from '@/lib/api';
import toast from 'react-hot-toast';
import { cn } from '@/lib/utils';
import { AgentSubsystem } from '@/types';
@@ -60,6 +65,77 @@ export function AgentScanners({ agentId }: AgentScannersProps) {
refetchInterval: 30000, // Refresh every 30 seconds
});
// Fetch security health status
const { data: securityOverview, isLoading: securityLoading } = useQuery({
queryKey: ['security-overview'],
queryFn: async () => {
const data = await securityApi.getOverview();
return data;
},
refetchInterval: 60000, // Refresh every minute
});
// Helper function to get security status color and icon
const getSecurityStatusDisplay = (status: string) => {
switch (status) {
case 'healthy':
case 'enforced':
case 'operational':
return {
color: 'text-green-600 bg-green-100 border-green-200',
icon: <CheckCircle className="h-4 w-4 text-green-600" />
};
case 'degraded':
return {
color: 'text-amber-600 bg-amber-100 border-amber-200',
icon: <AlertCircle className="h-4 w-4 text-amber-600" />
};
case 'unhealthy':
case 'unavailable':
return {
color: 'text-red-600 bg-red-100 border-red-200',
icon: <XCircle className="h-4 w-4 text-red-600" />
};
default:
return {
color: 'text-gray-600 bg-gray-100 border-gray-200',
icon: <AlertCircle className="h-4 w-4 text-gray-600" />
};
}
};
// Get security icon for subsystem type
const getSecurityIcon = (type: string) => {
switch (type) {
case 'ed25519_signing':
return <Shield className="h-4 w-4" />;
case 'nonce_validation':
return <RefreshCw className="h-4 w-4" />;
case 'machine_binding':
return <Fingerprint className="h-4 w-4" />;
case 'command_validation':
return <CheckCircle className="h-4 w-4" />;
default:
return <Shield className="h-4 w-4" />;
}
};
// Get display name for security subsystem
const getSecurityDisplayName = (type: string) => {
switch (type) {
case 'ed25519_signing':
return 'Ed25519 Signing';
case 'nonce_validation':
return 'Nonce Protection';
case 'machine_binding':
return 'Machine Binding';
case 'command_validation':
return 'Command Validation';
default:
return type;
}
};
// Toggle subsystem enabled/disabled
const toggleSubsystemMutation = useMutation({
mutationFn: async ({ subsystem, enabled }: { subsystem: string; enabled: boolean }) => {
@@ -176,6 +252,124 @@ export function AgentScanners({ agentId }: AgentScannersProps) {
</div>
</div>
{/* Security Health Section */}
<div className="card">
<div className="flex items-center justify-between mb-4">
<div className="flex items-center space-x-2">
<Shield className="h-5 w-5 text-gray-600" />
<h3 className="text-sm font-medium text-gray-900">Security Health</h3>
</div>
<button
onClick={() => queryClient.invalidateQueries({ queryKey: ['security-overview'] })}
disabled={securityLoading}
className="flex items-center space-x-1 px-3 py-1 text-xs text-gray-600 hover:text-gray-800 hover:bg-gray-100 rounded transition-colors"
>
<RefreshCw className={cn('h-3 w-3', securityLoading && 'animate-spin')} />
<span>Refresh</span>
</button>
</div>
{securityLoading ? (
<div className="flex items-center justify-center py-8">
<RefreshCw className="h-5 w-5 animate-spin text-gray-400" />
<span className="ml-2 text-sm text-gray-600">Loading security status...</span>
</div>
) : securityOverview ? (
<div className="space-y-4">
{/* Overall Security Status */}
<div className="flex items-center justify-between p-3 bg-gray-50 rounded-lg border border-gray-200">
<div className="flex items-center space-x-3">
<div className={cn(
'w-3 h-3 rounded-full',
securityOverview.overall_status === 'healthy' ? 'bg-green-500' :
securityOverview.overall_status === 'degraded' ? 'bg-amber-500' : 'bg-red-500'
)}></div>
<div>
<p className="text-sm font-medium text-gray-900">Overall Security Status</p>
<p className="text-xs text-gray-500 capitalize">{securityOverview.overall_status}</p>
</div>
</div>
<div className={cn(
'px-2 py-1 rounded-full text-xs font-medium border',
getSecurityStatusDisplay(securityOverview.overall_status).color
)}>
{securityOverview.overall_status.toUpperCase()}
</div>
</div>
{/* Individual Security Subsystems */}
<div className="grid grid-cols-1 md:grid-cols-2 gap-3">
{Object.entries(securityOverview.subsystems).map(([key, subsystem]) => {
const display = getSecurityStatusDisplay(subsystem.status);
return (
<div key={key} className="flex items-center justify-between p-3 bg-white border border-gray-200 rounded-lg">
<div className="flex items-center space-x-3">
<div className="text-gray-600">
{getSecurityIcon(key)}
</div>
<div>
<p className="text-sm font-medium text-gray-900">
{getSecurityDisplayName(key)}
</p>
<p className="text-xs text-gray-500 capitalize">
{subsystem.enabled ? 'Enabled' : 'Disabled'}
</p>
</div>
</div>
<div className={cn(
'px-2 py-1 rounded-full text-xs font-medium border flex items-center space-x-1',
display.color
)}>
{display.icon}
<span>{subsystem.status.toUpperCase()}</span>
</div>
</div>
);
})}
</div>
{/* Security Alerts and Recommendations */}
{(securityOverview.alerts.length > 0 || securityOverview.recommendations.length > 0) && (
<div className="space-y-3">
{securityOverview.alerts.length > 0 && (
<div className="p-3 bg-red-50 border border-red-200 rounded-lg">
<p className="text-sm font-medium text-red-800 mb-2">Security Alerts</p>
<ul className="text-xs text-red-700 space-y-1">
{securityOverview.alerts.map((alert, index) => (
<li key={index} className="flex items-start space-x-2">
<XCircle className="h-3 w-3 text-red-500 mt-0.5 flex-shrink-0" />
<span>{alert}</span>
</li>
))}
</ul>
</div>
)}
{securityOverview.recommendations.length > 0 && (
<div className="p-3 bg-amber-50 border border-amber-200 rounded-lg">
<p className="text-sm font-medium text-amber-800 mb-2">Recommendations</p>
<ul className="text-xs text-amber-700 space-y-1">
{securityOverview.recommendations.map((recommendation, index) => (
<li key={index} className="flex items-start space-x-2">
<AlertCircle className="h-3 w-3 text-amber-500 mt-0.5 flex-shrink-0" />
<span>{recommendation}</span>
</li>
))}
</ul>
</div>
)}
</div>
)}
</div>
) : (
<div className="text-center py-8">
<Shield className="mx-auto h-8 w-8 text-gray-400" />
<p className="mt-2 text-sm text-gray-600">Unable to load security status</p>
<p className="text-xs text-gray-500">Security monitoring may be unavailable</p>
</div>
)}
</div>
{/* Subsystem Configuration Table */}
<div className="card">
<div className="flex items-center justify-between mb-3">

View File

@@ -2,13 +2,9 @@ import { useState } from 'react';
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import {
Search,
Package,
Download,
Upload,
CheckCircle,
RefreshCw,
Terminal,
Filter,
ChevronDown,
ChevronRight,
Check,

View File

@@ -692,4 +692,126 @@ export const adminApi = {
},
};
// Security API endpoints
export const securityApi = {
// Get comprehensive security overview
getOverview: async (): Promise<{
timestamp: string;
overall_status: 'healthy' | 'degraded' | 'unhealthy';
subsystems: {
ed25519_signing: { status: string; enabled: boolean };
nonce_validation: { status: string; enabled: boolean };
machine_binding: { status: string; enabled: boolean };
command_validation: { status: string; enabled: boolean };
};
alerts: string[];
recommendations: string[];
}> => {
const response = await api.get('/security/overview');
return response.data;
},
// Get Ed25519 signing service status
getSigningStatus: async (): Promise<{
status: string;
timestamp: string;
checks: {
service_initialized: boolean;
public_key_available: boolean;
signing_operational: boolean;
};
public_key_fingerprint?: string;
algorithm?: string;
}> => {
const response = await api.get('/security/signing');
return response.data;
},
// Get nonce validation status
getNonceStatus: async (): Promise<{
status: string;
timestamp: string;
checks: {
validation_enabled: boolean;
max_age_minutes: number;
recent_validations: number;
validation_failures: number;
};
details: {
nonce_format: string;
signature_algorithm: string;
replay_protection: string;
};
}> => {
const response = await api.get('/security/nonce');
return response.data;
},
// Get machine binding status
getMachineBindingStatus: async (): Promise<{
status: string;
timestamp: string;
checks: {
binding_enforced: boolean;
min_agent_version: string;
fingerprint_required: boolean;
recent_violations: number;
};
details: {
enforcement_method: string;
binding_scope: string;
violation_action: string;
};
}> => {
const response = await api.get('/security/machine-binding');
return response.data;
},
// Get command validation status
getCommandValidationStatus: async (): Promise<{
status: string;
timestamp: string;
metrics: {
total_pending_commands: number;
agents_with_pending: number;
commands_last_hour: number;
commands_last_24h: number;
};
checks: {
command_processing: string;
backpressure_active: boolean;
agent_responsive: string;
};
}> => {
const response = await api.get('/security/commands');
return response.data;
},
// Get detailed security metrics
getMetrics: async (): Promise<{
timestamp: string;
signing: {
public_key_fingerprint: string;
algorithm: string;
key_size: number;
configured: boolean;
};
nonce: {
max_age_seconds: number;
format: string;
};
machine_binding: {
min_version: string;
enforcement: string;
};
command_processing: {
backpressure_threshold: number;
rate_limit_per_second: number;
};
}> => {
const response = await api.get('/security/metrics');
return response.data;
},
};
export default api;