Complete RedFlag codebase with two major security audit implementations.
== A-1: Ed25519 Key Rotation Support ==
Server:
- SignCommand sets SignedAt timestamp and KeyID on every signature
- signing_keys database table (migration 020) for multi-key rotation
- InitializePrimaryKey registers active key at startup
- /api/v1/public-keys endpoint for rotation-aware agents
- SigningKeyQueries for key lifecycle management
Agent:
- Key-ID-aware verification via CheckKeyRotation
- FetchAndCacheAllActiveKeys for rotation pre-caching
- Cache metadata with TTL and staleness fallback
- SecurityLogger events for key rotation and command signing
== A-2: Replay Attack Fixes (F-1 through F-7) ==
F-5 CRITICAL - RetryCommand now signs via signAndCreateCommand
F-1 HIGH - v3 format: "{agent_id}:{cmd_id}:{type}:{hash}:{ts}"
F-7 HIGH - Migration 026: expires_at column with partial index
F-6 HIGH - GetPendingCommands/GetStuckCommands filter by expires_at
F-2 HIGH - Agent-side executedIDs dedup map with cleanup
F-4 HIGH - commandMaxAge reduced from 24h to 4h
F-3 CRITICAL - Old-format commands rejected after 48h via CreatedAt
Verification fixes: migration idempotency (ETHOS #4), log format
compliance (ETHOS #1), stale comments updated.
All 24 tests passing. Docker --no-cache build verified.
See docs/ for full audit reports and deviation log (DEV-001 to DEV-019).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
252 lines
8.7 KiB
Go
252 lines
8.7 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"crypto/ed25519"
|
|
"fmt"
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/Fimeg/RedFlag/aggregator-agent/internal/client"
|
|
"github.com/Fimeg/RedFlag/aggregator-agent/internal/config"
|
|
"github.com/Fimeg/RedFlag/aggregator-agent/internal/crypto"
|
|
"github.com/Fimeg/RedFlag/aggregator-agent/internal/logging"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
const (
|
|
// keyRefreshInterval is how often the agent proactively re-checks the server's primary key
|
|
keyRefreshInterval = 6 * time.Hour
|
|
// commandMaxAge is the maximum age of a signed command (F-4 fix: reduced from 24h to 4h)
|
|
commandMaxAge = 4 * time.Hour
|
|
// commandClockSkew is the allowed future clock skew for signed commands
|
|
commandClockSkew = 5 * time.Minute
|
|
)
|
|
|
|
// CommandHandler handles command processing with signature verification
|
|
type CommandHandler struct {
|
|
verifier *crypto.CommandVerifier
|
|
securityLogger *logging.SecurityLogger
|
|
keyCache map[string]ed25519.PublicKey // key_id -> public key
|
|
keyCacheMu sync.RWMutex
|
|
executedIDs map[string]time.Time // cmd UUID -> execution time (F-2 fix: dedup)
|
|
executedIDsMu sync.Mutex
|
|
lastKeyRefresh time.Time
|
|
logger *log.Logger
|
|
}
|
|
|
|
// CommandSigningConfig holds configuration for command signing
|
|
type CommandSigningConfig struct {
|
|
Enabled bool `json:"enabled" env:"REDFLAG_AGENT_COMMAND_SIGNING_ENABLED" default:"true"`
|
|
EnforcementMode string `json:"enforcement_mode" env:"REDFLAG_AGENT_COMMAND_ENFORCEMENT_MODE" default:"strict"`
|
|
}
|
|
|
|
// NewCommandHandler creates a new command handler
|
|
func NewCommandHandler(cfg *config.Config, securityLogger *logging.SecurityLogger, logger *log.Logger) (*CommandHandler, error) {
|
|
handler := &CommandHandler{
|
|
securityLogger: securityLogger,
|
|
logger: logger,
|
|
verifier: crypto.NewCommandVerifier(),
|
|
keyCache: make(map[string]ed25519.PublicKey),
|
|
executedIDs: make(map[string]time.Time),
|
|
}
|
|
|
|
// Pre-load cached public key if command signing is enabled
|
|
if cfg.CommandSigning.Enabled {
|
|
if pubKey, err := crypto.LoadCachedPublicKey(); err == nil {
|
|
// Store under empty key_id for backward-compat lookup
|
|
handler.keyCacheMu.Lock()
|
|
handler.keyCache[""] = pubKey
|
|
handler.keyCacheMu.Unlock()
|
|
logger.Printf("[INFO] [agent] [cmd_handler] primary_public_key_loaded")
|
|
} else {
|
|
logger.Printf("[WARNING] [agent] [cmd_handler] primary_key_not_cached error=\"%v\"", err)
|
|
}
|
|
}
|
|
|
|
return handler, nil
|
|
}
|
|
|
|
// getKeyForCommand returns the appropriate public key for verifying a command.
|
|
// Uses key_id-aware lookup with lazy fetch for unknown keys.
|
|
func (h *CommandHandler) getKeyForCommand(cmd client.Command, serverURL string) (ed25519.PublicKey, error) {
|
|
keyID := cmd.KeyID
|
|
|
|
// Check in-memory cache first
|
|
h.keyCacheMu.RLock()
|
|
if key, ok := h.keyCache[keyID]; ok {
|
|
h.keyCacheMu.RUnlock()
|
|
return key, nil
|
|
}
|
|
h.keyCacheMu.RUnlock()
|
|
|
|
// Not in memory — check disk cache via CheckKeyRotation
|
|
key, isNew, err := h.verifier.CheckKeyRotation(keyID, serverURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to resolve key %q: %w", keyID, err)
|
|
}
|
|
|
|
if isNew {
|
|
h.logger.Printf("[INFO] [agent] [cmd_handler] new_signing_key_cached key_id=%q", keyID)
|
|
if h.securityLogger != nil {
|
|
h.securityLogger.LogKeyRotationDetected(keyID)
|
|
}
|
|
}
|
|
|
|
// Store in memory cache
|
|
h.keyCacheMu.Lock()
|
|
h.keyCache[keyID] = key
|
|
h.keyCacheMu.Unlock()
|
|
|
|
return key, nil
|
|
}
|
|
|
|
// ProcessCommand processes a command with signature verification
|
|
func (h *CommandHandler) ProcessCommand(cmd client.Command, cfg *config.Config, agentID uuid.UUID) error {
|
|
// F-2 fix: Check deduplication BEFORE verification
|
|
// TODO: persist executedIDs to disk (path: getPublicKeyDir()+
|
|
// "/executed_commands.json") to survive restarts.
|
|
// Current in-memory implementation allows replay of commands
|
|
// issued within commandMaxAge if the agent restarts.
|
|
h.executedIDsMu.Lock()
|
|
if execTime, found := h.executedIDs[cmd.ID]; found {
|
|
h.executedIDsMu.Unlock()
|
|
h.logger.Printf("[WARNING] [agent] [cmd_handler] duplicate_command_rejected command_id=%q already_executed_at=%v", cmd.ID, execTime)
|
|
if h.securityLogger != nil {
|
|
h.securityLogger.LogCommandVerificationFailure(cmd.ID, fmt.Sprintf("duplicate command rejected, already executed at %v", execTime))
|
|
}
|
|
return fmt.Errorf("duplicate command %s rejected, already executed at %v", cmd.ID, execTime)
|
|
}
|
|
h.executedIDsMu.Unlock()
|
|
|
|
signingCfg := cfg.CommandSigning
|
|
|
|
if !signingCfg.Enabled {
|
|
if cmd.Signature != "" {
|
|
h.logger.Printf("[INFO] [agent] [cmd_handler] command_has_signature_but_signing_disabled command_id=%q", cmd.ID)
|
|
}
|
|
h.markExecuted(cmd.ID)
|
|
return nil
|
|
}
|
|
|
|
// Resolve the correct public key for this command
|
|
pubKey, err := h.getKeyForCommand(cmd, cfg.ServerURL)
|
|
if err != nil {
|
|
h.logger.Printf("[ERROR] [agent] [cmd_handler] key_resolution_failed command_id=%q error=%q", cmd.ID, err)
|
|
if h.securityLogger != nil {
|
|
h.securityLogger.LogCommandVerificationFailure(cmd.ID, "key resolution failed: "+err.Error())
|
|
}
|
|
if signingCfg.EnforcementMode == "strict" {
|
|
return fmt.Errorf("command verification failed: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
verifyFunc := func() error {
|
|
if cmd.SignedAt != nil {
|
|
// New format: timestamp-aware verification
|
|
return h.verifier.VerifyCommandWithTimestamp(cmd, pubKey, commandMaxAge, commandClockSkew)
|
|
}
|
|
// Old format: no timestamp (backward compat)
|
|
return h.verifier.VerifyCommand(cmd, pubKey)
|
|
}
|
|
|
|
switch signingCfg.EnforcementMode {
|
|
case "strict":
|
|
if cmd.Signature == "" {
|
|
h.logger.Printf("[ERROR] [agent] [cmd_handler] command_not_signed command_id=%q", cmd.ID)
|
|
if h.securityLogger != nil {
|
|
h.securityLogger.LogCommandVerificationFailure(cmd.ID, "missing signature")
|
|
}
|
|
return fmt.Errorf("command verification failed: strict enforcement requires signed commands")
|
|
}
|
|
if err := verifyFunc(); err != nil {
|
|
h.logger.Printf("[ERROR] [agent] [cmd_handler] command_verification_failed command_id=%q error=%q", cmd.ID, err)
|
|
if h.securityLogger != nil {
|
|
h.securityLogger.LogCommandVerificationFailure(cmd.ID, err.Error())
|
|
}
|
|
return fmt.Errorf("command verification failed: %w", err)
|
|
}
|
|
h.logger.Printf("[INFO] [agent] [cmd_handler] command_verified command_id=%q", cmd.ID)
|
|
if h.securityLogger != nil {
|
|
h.securityLogger.LogCommandVerificationSuccess(cmd.ID)
|
|
}
|
|
h.markExecuted(cmd.ID)
|
|
case "warning":
|
|
if cmd.Signature != "" {
|
|
if err := verifyFunc(); err != nil {
|
|
h.logger.Printf("[WARNING] [agent] [cmd_handler] verification_failed_warning_mode command_id=%q error=%q", cmd.ID, err)
|
|
if h.securityLogger != nil {
|
|
h.securityLogger.LogCommandVerificationFailure(cmd.ID, err.Error())
|
|
}
|
|
} else {
|
|
if h.securityLogger != nil {
|
|
h.securityLogger.LogCommandVerificationSuccess(cmd.ID)
|
|
}
|
|
}
|
|
} else {
|
|
h.logger.Printf("[WARNING] [agent] [cmd_handler] unsigned_command_warning_mode command_id=%q", cmd.ID)
|
|
}
|
|
h.markExecuted(cmd.ID)
|
|
// "disabled" or any other value: skip verification
|
|
default:
|
|
h.markExecuted(cmd.ID)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// markExecuted records a command ID in the deduplication set (F-2 fix)
|
|
func (h *CommandHandler) markExecuted(cmdID string) {
|
|
h.executedIDsMu.Lock()
|
|
h.executedIDs[cmdID] = time.Now()
|
|
h.executedIDsMu.Unlock()
|
|
}
|
|
|
|
// CleanupExecutedIDs evicts entries older than commandMaxAge from the dedup set.
|
|
// Should be called when ShouldRefreshKey() fires (every 6h).
|
|
func (h *CommandHandler) CleanupExecutedIDs() {
|
|
h.executedIDsMu.Lock()
|
|
defer h.executedIDsMu.Unlock()
|
|
|
|
cutoff := time.Now().Add(-commandMaxAge)
|
|
evicted := 0
|
|
for id, execTime := range h.executedIDs {
|
|
if execTime.Before(cutoff) {
|
|
delete(h.executedIDs, id)
|
|
evicted++
|
|
}
|
|
}
|
|
if evicted > 0 {
|
|
h.logger.Printf("[INFO] [agent] [cmd_handler] cleanup_executed_ids evicted=%d remaining=%d", evicted, len(h.executedIDs))
|
|
}
|
|
}
|
|
|
|
// RefreshPrimaryKey proactively re-fetches the server's primary key.
|
|
// Should be called every keyRefreshInterval to detect rotations early.
|
|
func (h *CommandHandler) RefreshPrimaryKey(serverURL string) error {
|
|
h.logger.Printf("[INFO] [agent] [cmd_handler] refreshing_primary_key")
|
|
pubKey, err := crypto.FetchAndCacheServerPublicKey(serverURL)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to refresh primary key: %w", err)
|
|
}
|
|
|
|
h.keyCacheMu.Lock()
|
|
h.keyCache[""] = pubKey
|
|
h.keyCacheMu.Unlock()
|
|
h.lastKeyRefresh = time.Now()
|
|
|
|
h.logger.Printf("[INFO] [agent] [cmd_handler] primary_key_refreshed")
|
|
return nil
|
|
}
|
|
|
|
// ShouldRefreshKey returns true if enough time has passed to warrant a proactive key refresh
|
|
func (h *CommandHandler) ShouldRefreshKey() bool {
|
|
return time.Since(h.lastKeyRefresh) >= keyRefreshInterval
|
|
}
|
|
|
|
// UpdateServerPublicKey updates the primary cached public key (kept for backward compat)
|
|
func (h *CommandHandler) UpdateServerPublicKey(serverURL string) error {
|
|
return h.RefreshPrimaryKey(serverURL)
|
|
}
|