package orchestrator import ( "crypto/ed25519" "fmt" "log" "sync" "time" "github.com/Fimeg/RedFlag/aggregator-agent/internal/client" "github.com/Fimeg/RedFlag/aggregator-agent/internal/config" "github.com/Fimeg/RedFlag/aggregator-agent/internal/crypto" "github.com/Fimeg/RedFlag/aggregator-agent/internal/logging" "github.com/google/uuid" ) const ( // keyRefreshInterval is how often the agent proactively re-checks the server's primary key keyRefreshInterval = 6 * time.Hour // commandMaxAge is the maximum age of a signed command (F-4 fix: reduced from 24h to 4h) commandMaxAge = 4 * time.Hour // commandClockSkew is the allowed future clock skew for signed commands commandClockSkew = 5 * time.Minute ) // CommandHandler handles command processing with signature verification type CommandHandler struct { verifier *crypto.CommandVerifier securityLogger *logging.SecurityLogger keyCache map[string]ed25519.PublicKey // key_id -> public key keyCacheMu sync.RWMutex executedIDs map[string]time.Time // cmd UUID -> execution time (F-2 fix: dedup) executedIDsMu sync.Mutex lastKeyRefresh time.Time logger *log.Logger } // CommandSigningConfig holds configuration for command signing type CommandSigningConfig struct { Enabled bool `json:"enabled" env:"REDFLAG_AGENT_COMMAND_SIGNING_ENABLED" default:"true"` EnforcementMode string `json:"enforcement_mode" env:"REDFLAG_AGENT_COMMAND_ENFORCEMENT_MODE" default:"strict"` } // NewCommandHandler creates a new command handler func NewCommandHandler(cfg *config.Config, securityLogger *logging.SecurityLogger, logger *log.Logger) (*CommandHandler, error) { handler := &CommandHandler{ securityLogger: securityLogger, logger: logger, verifier: crypto.NewCommandVerifier(), keyCache: make(map[string]ed25519.PublicKey), executedIDs: make(map[string]time.Time), } // Pre-load cached public key if command signing is enabled if cfg.CommandSigning.Enabled { if pubKey, err := crypto.LoadCachedPublicKey(); err == nil { // Store under empty key_id for backward-compat lookup handler.keyCacheMu.Lock() handler.keyCache[""] = pubKey handler.keyCacheMu.Unlock() logger.Printf("[INFO] [agent] [cmd_handler] primary_public_key_loaded") } else { logger.Printf("[WARNING] [agent] [cmd_handler] primary_key_not_cached error=\"%v\"", err) } } return handler, nil } // getKeyForCommand returns the appropriate public key for verifying a command. // Uses key_id-aware lookup with lazy fetch for unknown keys. func (h *CommandHandler) getKeyForCommand(cmd client.Command, serverURL string) (ed25519.PublicKey, error) { keyID := cmd.KeyID // Check in-memory cache first h.keyCacheMu.RLock() if key, ok := h.keyCache[keyID]; ok { h.keyCacheMu.RUnlock() return key, nil } h.keyCacheMu.RUnlock() // Not in memory — check disk cache via CheckKeyRotation key, isNew, err := h.verifier.CheckKeyRotation(keyID, serverURL) if err != nil { return nil, fmt.Errorf("failed to resolve key %q: %w", keyID, err) } if isNew { h.logger.Printf("[INFO] [agent] [cmd_handler] new_signing_key_cached key_id=%q", keyID) if h.securityLogger != nil { h.securityLogger.LogKeyRotationDetected(keyID) } } // Store in memory cache h.keyCacheMu.Lock() h.keyCache[keyID] = key h.keyCacheMu.Unlock() return key, nil } // ProcessCommand processes a command with signature verification func (h *CommandHandler) ProcessCommand(cmd client.Command, cfg *config.Config, agentID uuid.UUID) error { // F-2 fix: Check deduplication BEFORE verification // TODO: persist executedIDs to disk (path: getPublicKeyDir()+ // "/executed_commands.json") to survive restarts. // Current in-memory implementation allows replay of commands // issued within commandMaxAge if the agent restarts. h.executedIDsMu.Lock() if execTime, found := h.executedIDs[cmd.ID]; found { h.executedIDsMu.Unlock() h.logger.Printf("[WARNING] [agent] [cmd_handler] duplicate_command_rejected command_id=%q already_executed_at=%v", cmd.ID, execTime) if h.securityLogger != nil { h.securityLogger.LogCommandVerificationFailure(cmd.ID, fmt.Sprintf("duplicate command rejected, already executed at %v", execTime)) } return fmt.Errorf("duplicate command %s rejected, already executed at %v", cmd.ID, execTime) } h.executedIDsMu.Unlock() signingCfg := cfg.CommandSigning if !signingCfg.Enabled { if cmd.Signature != "" { h.logger.Printf("[INFO] [agent] [cmd_handler] command_has_signature_but_signing_disabled command_id=%q", cmd.ID) } h.markExecuted(cmd.ID) return nil } // Resolve the correct public key for this command pubKey, err := h.getKeyForCommand(cmd, cfg.ServerURL) if err != nil { h.logger.Printf("[ERROR] [agent] [cmd_handler] key_resolution_failed command_id=%q error=%q", cmd.ID, err) if h.securityLogger != nil { h.securityLogger.LogCommandVerificationFailure(cmd.ID, "key resolution failed: "+err.Error()) } if signingCfg.EnforcementMode == "strict" { return fmt.Errorf("command verification failed: %w", err) } return nil } verifyFunc := func() error { if cmd.SignedAt != nil { // New format: timestamp-aware verification return h.verifier.VerifyCommandWithTimestamp(cmd, pubKey, commandMaxAge, commandClockSkew) } // Old format: no timestamp (backward compat) return h.verifier.VerifyCommand(cmd, pubKey) } switch signingCfg.EnforcementMode { case "strict": if cmd.Signature == "" { h.logger.Printf("[ERROR] [agent] [cmd_handler] command_not_signed command_id=%q", cmd.ID) if h.securityLogger != nil { h.securityLogger.LogCommandVerificationFailure(cmd.ID, "missing signature") } return fmt.Errorf("command verification failed: strict enforcement requires signed commands") } if err := verifyFunc(); err != nil { h.logger.Printf("[ERROR] [agent] [cmd_handler] command_verification_failed command_id=%q error=%q", cmd.ID, err) if h.securityLogger != nil { h.securityLogger.LogCommandVerificationFailure(cmd.ID, err.Error()) } return fmt.Errorf("command verification failed: %w", err) } h.logger.Printf("[INFO] [agent] [cmd_handler] command_verified command_id=%q", cmd.ID) if h.securityLogger != nil { h.securityLogger.LogCommandVerificationSuccess(cmd.ID) } h.markExecuted(cmd.ID) case "warning": if cmd.Signature != "" { if err := verifyFunc(); err != nil { h.logger.Printf("[WARNING] [agent] [cmd_handler] verification_failed_warning_mode command_id=%q error=%q", cmd.ID, err) if h.securityLogger != nil { h.securityLogger.LogCommandVerificationFailure(cmd.ID, err.Error()) } } else { if h.securityLogger != nil { h.securityLogger.LogCommandVerificationSuccess(cmd.ID) } } } else { h.logger.Printf("[WARNING] [agent] [cmd_handler] unsigned_command_warning_mode command_id=%q", cmd.ID) } h.markExecuted(cmd.ID) // "disabled" or any other value: skip verification default: h.markExecuted(cmd.ID) } return nil } // markExecuted records a command ID in the deduplication set (F-2 fix) func (h *CommandHandler) markExecuted(cmdID string) { h.executedIDsMu.Lock() h.executedIDs[cmdID] = time.Now() h.executedIDsMu.Unlock() } // CleanupExecutedIDs evicts entries older than commandMaxAge from the dedup set. // Should be called when ShouldRefreshKey() fires (every 6h). func (h *CommandHandler) CleanupExecutedIDs() { h.executedIDsMu.Lock() defer h.executedIDsMu.Unlock() cutoff := time.Now().Add(-commandMaxAge) evicted := 0 for id, execTime := range h.executedIDs { if execTime.Before(cutoff) { delete(h.executedIDs, id) evicted++ } } if evicted > 0 { h.logger.Printf("[INFO] [agent] [cmd_handler] cleanup_executed_ids evicted=%d remaining=%d", evicted, len(h.executedIDs)) } } // RefreshPrimaryKey proactively re-fetches the server's primary key. // Should be called every keyRefreshInterval to detect rotations early. func (h *CommandHandler) RefreshPrimaryKey(serverURL string) error { h.logger.Printf("[INFO] [agent] [cmd_handler] refreshing_primary_key") pubKey, err := crypto.FetchAndCacheServerPublicKey(serverURL) if err != nil { return fmt.Errorf("failed to refresh primary key: %w", err) } h.keyCacheMu.Lock() h.keyCache[""] = pubKey h.keyCacheMu.Unlock() h.lastKeyRefresh = time.Now() h.logger.Printf("[INFO] [agent] [cmd_handler] primary_key_refreshed") return nil } // ShouldRefreshKey returns true if enough time has passed to warrant a proactive key refresh func (h *CommandHandler) ShouldRefreshKey() bool { return time.Since(h.lastKeyRefresh) >= keyRefreshInterval } // UpdateServerPublicKey updates the primary cached public key (kept for backward compat) func (h *CommandHandler) UpdateServerPublicKey(serverURL string) error { return h.RefreshPrimaryKey(serverURL) }