Files
Redflag/aggregator-agent/internal/logging/security_logger.go
jpetree331 f97d4845af feat(security): A-1 Ed25519 key rotation + A-2 replay attack fixes
Complete RedFlag codebase with two major security audit implementations.

== A-1: Ed25519 Key Rotation Support ==

Server:
- SignCommand sets SignedAt timestamp and KeyID on every signature
- signing_keys database table (migration 020) for multi-key rotation
- InitializePrimaryKey registers active key at startup
- /api/v1/public-keys endpoint for rotation-aware agents
- SigningKeyQueries for key lifecycle management

Agent:
- Key-ID-aware verification via CheckKeyRotation
- FetchAndCacheAllActiveKeys for rotation pre-caching
- Cache metadata with TTL and staleness fallback
- SecurityLogger events for key rotation and command signing

== A-2: Replay Attack Fixes (F-1 through F-7) ==

F-5 CRITICAL - RetryCommand now signs via signAndCreateCommand
F-1 HIGH     - v3 format: "{agent_id}:{cmd_id}:{type}:{hash}:{ts}"
F-7 HIGH     - Migration 026: expires_at column with partial index
F-6 HIGH     - GetPendingCommands/GetStuckCommands filter by expires_at
F-2 HIGH     - Agent-side executedIDs dedup map with cleanup
F-4 HIGH     - commandMaxAge reduced from 24h to 4h
F-3 CRITICAL - Old-format commands rejected after 48h via CreatedAt

Verification fixes: migration idempotency (ETHOS #4), log format
compliance (ETHOS #1), stale comments updated.

All 24 tests passing. Docker --no-cache build verified.
See docs/ for full audit reports and deviation log (DEV-001 to DEV-019).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 21:25:47 -04:00

467 lines
12 KiB
Go

package logging
import (
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"sync"
"time"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/config"
)
// SecurityEvent represents a security event on the agent side
// This is a simplified version of the server model to avoid circular dependencies
type SecurityEvent struct {
Timestamp time.Time `json:"timestamp"`
Level string `json:"level"` // CRITICAL, WARNING, INFO, DEBUG
EventType string `json:"event_type"`
Message string `json:"message"`
Details map[string]interface{} `json:"details,omitempty"`
}
// SecurityLogConfig holds configuration for security logging on the agent
type SecurityLogConfig struct {
Enabled bool `json:"enabled" env:"REDFLAG_AGENT_SECURITY_LOG_ENABLED" default:"true"`
Level string `json:"level" env:"REDFLAG_AGENT_SECURITY_LOG_LEVEL" default:"warning"` // none, error, warn, info, debug
LogSuccesses bool `json:"log_successes" env:"REDFLAG_AGENT_SECURITY_LOG_SUCCESSES" default:"false"`
FilePath string `json:"file_path" env:"REDFLAG_AGENT_SECURITY_LOG_PATH"` // Relative to agent data directory
MaxSizeMB int `json:"max_size_mb" env:"REDFLAG_AGENT_SECURITY_LOG_MAX_SIZE" default:"50"`
MaxFiles int `json:"max_files" env:"REDFLAG_AGENT_SECURITY_LOG_MAX_FILES" default:"5"`
BatchSize int `json:"batch_size" env:"REDFLAG_AGENT_SECURITY_LOG_BATCH_SIZE" default:"10"`
SendToServer bool `json:"send_to_server" env:"REDFLAG_AGENT_SECURITY_LOG_SEND" default:"true"`
}
// SecurityLogger handles security event logging on the agent
type SecurityLogger struct {
config SecurityLogConfig
logger *log.Logger
file *os.File
mu sync.Mutex
buffer []*SecurityEvent
flushTimer *time.Timer
lastFlush time.Time
closed bool
}
// SecurityEventTypes defines all possible security event types on the agent
var SecurityEventTypes = struct {
CmdSignatureVerificationFailed string
CmdSignatureVerificationSuccess string
UpdateNonceInvalid string
UpdateSignatureVerificationFailed string
MachineIDChangeDetected string
ConfigTamperingWarning string
UnauthorizedCommandAttempt string
KeyRotationDetected string
}{
CmdSignatureVerificationFailed: "CMD_SIGNATURE_VERIFICATION_FAILED",
CmdSignatureVerificationSuccess: "CMD_SIGNATURE_VERIFICATION_SUCCESS",
UpdateNonceInvalid: "UPDATE_NONCE_INVALID",
UpdateSignatureVerificationFailed: "UPDATE_SIGNATURE_VERIFICATION_FAILED",
MachineIDChangeDetected: "MACHINE_ID_CHANGE_DETECTED",
ConfigTamperingWarning: "CONFIG_TAMPERING_WARNING",
UnauthorizedCommandAttempt: "UNAUTHORIZED_COMMAND_ATTEMPT",
KeyRotationDetected: "KEY_ROTATION_DETECTED",
}
// NewSecurityLogger creates a new agent security logger
func NewSecurityLogger(agentConfig *config.Config, logDir string) (*SecurityLogger, error) {
// Create default security log config
secConfig := SecurityLogConfig{
Enabled: true,
Level: "warning",
LogSuccesses: false,
FilePath: "security.log",
MaxSizeMB: 50,
MaxFiles: 5,
BatchSize: 10,
SendToServer: true,
}
// Ensure log directory exists
if err := os.MkdirAll(logDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create security log directory: %w", err)
}
// Open log file
logPath := filepath.Join(logDir, secConfig.FilePath)
file, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0600)
if err != nil {
return nil, fmt.Errorf("failed to open security log file: %w", err)
}
logger := &SecurityLogger{
config: secConfig,
logger: log.New(file, "[SECURITY] ", log.LstdFlags|log.LUTC),
file: file,
buffer: make([]*SecurityEvent, 0, secConfig.BatchSize),
lastFlush: time.Now(),
}
// Start flush timer
logger.flushTimer = time.AfterFunc(30*time.Second, logger.flushBuffer)
return logger, nil
}
// Log writes a security event
func (sl *SecurityLogger) Log(event *SecurityEvent) error {
if !sl.config.Enabled || sl.config.Level == "none" {
return nil
}
// Skip successes unless configured to log them
if !sl.config.LogSuccesses && event.EventType == SecurityEventTypes.CmdSignatureVerificationSuccess {
return nil
}
// Filter by log level
if !sl.shouldLogLevel(event.Level) {
return nil
}
sl.mu.Lock()
defer sl.mu.Unlock()
if sl.closed {
return fmt.Errorf("security logger is closed")
}
// Add prefix to distinguish security events
event.Message = "SECURITY: " + event.Message
// Write immediately for critical events
if event.Level == "CRITICAL" {
return sl.writeEvent(event)
}
// Add to buffer
sl.buffer = append(sl.buffer, event)
// Flush if buffer is full
if len(sl.buffer) >= sl.config.BatchSize {
sl.flushBufferUnsafe()
}
return nil
}
// LogCommandVerificationFailure logs a command signature verification failure
func (sl *SecurityLogger) LogCommandVerificationFailure(commandID string, reason string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "CRITICAL",
EventType: SecurityEventTypes.CmdSignatureVerificationFailed,
Message: "Command signature verification failed",
Details: map[string]interface{}{
"command_id": commandID,
"reason": reason,
},
}
_ = sl.Log(event)
}
// LogNonceValidationFailure logs a nonce validation failure
func (sl *SecurityLogger) LogNonceValidationFailure(nonce string, reason string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "WARNING",
EventType: SecurityEventTypes.UpdateNonceInvalid,
Message: "Update nonce validation failed",
Details: map[string]interface{}{
"nonce": nonce[:min(len(nonce), 16)] + "...", // Truncate for security
"reason": reason,
},
}
_ = sl.Log(event)
}
// LogUpdateSignatureVerificationFailure logs an update signature verification failure
func (sl *SecurityLogger) LogUpdateSignatureVerificationFailure(updateID string, reason string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "CRITICAL",
EventType: SecurityEventTypes.UpdateSignatureVerificationFailed,
Message: "Update signature verification failed",
Details: map[string]interface{}{
"update_id": updateID,
"reason": reason,
},
}
_ = sl.Log(event)
}
// LogMachineIDChangeDetected logs when machine ID changes
func (sl *SecurityLogger) LogMachineIDChangeDetected(oldID, newID string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "WARNING",
EventType: SecurityEventTypes.MachineIDChangeDetected,
Message: "Machine ID change detected",
Details: map[string]interface{}{
"old_machine_id": oldID,
"new_machine_id": newID,
},
}
_ = sl.Log(event)
}
// LogConfigTamperingWarning logs when configuration tampering is suspected
func (sl *SecurityLogger) LogConfigTamperingWarning(configPath string, reason string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "WARNING",
EventType: SecurityEventTypes.ConfigTamperingWarning,
Message: "Configuration file tampering detected",
Details: map[string]interface{}{
"config_file": configPath,
"reason": reason,
},
}
_ = sl.Log(event)
}
// LogUnauthorizedCommandAttempt logs an attempt to run an unauthorized command
func (sl *SecurityLogger) LogUnauthorizedCommandAttempt(command string, reason string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "WARNING",
EventType: SecurityEventTypes.UnauthorizedCommandAttempt,
Message: "Unauthorized command execution attempt",
Details: map[string]interface{}{
"command": command,
"reason": reason,
},
}
_ = sl.Log(event)
}
// LogCommandVerificationSuccess logs a successful command signature verification
func (sl *SecurityLogger) LogCommandVerificationSuccess(commandID string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "INFO",
EventType: SecurityEventTypes.CmdSignatureVerificationSuccess,
Message: "Command signature verified successfully",
Details: map[string]interface{}{
"command_id": commandID,
},
}
_ = sl.Log(event)
}
// LogCommandVerificationFailed logs a failed command signature verification
func (sl *SecurityLogger) LogCommandVerificationFailed(commandID, reason string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "CRITICAL",
EventType: SecurityEventTypes.CmdSignatureVerificationFailed,
Message: "Command signature verification failed",
Details: map[string]interface{}{
"command_id": commandID,
"reason": reason,
},
}
_ = sl.Log(event)
}
// LogKeyRotationDetected logs when a new signing key is detected and cached.
// This occurs when a command arrives with a key_id not previously cached by the agent,
// indicating the server has rotated its signing key.
func (sl *SecurityLogger) LogKeyRotationDetected(keyID string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "INFO",
EventType: SecurityEventTypes.KeyRotationDetected,
Message: "New signing key detected and cached",
Details: map[string]interface{}{
"key_id": keyID,
},
}
_ = sl.Log(event)
}
// LogCommandSkipped logs when a command is skipped due to signing configuration
func (sl *SecurityLogger) LogCommandSkipped(commandID, reason string) {
if sl == nil {
return
}
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "INFO",
EventType: "COMMAND_SKIPPED",
Message: "Command skipped due to signing configuration",
Details: map[string]interface{}{
"command_id": commandID,
"reason": reason,
},
}
_ = sl.Log(event)
}
// GetBatch returns a batch of events for sending to server
func (sl *SecurityLogger) GetBatch() []*SecurityEvent {
sl.mu.Lock()
defer sl.mu.Unlock()
if len(sl.buffer) == 0 {
return nil
}
// Copy buffer
batch := make([]*SecurityEvent, len(sl.buffer))
copy(batch, sl.buffer)
// Clear buffer
sl.buffer = sl.buffer[:0]
return batch
}
// ClearBatch clears the buffer after successful send to server
func (sl *SecurityLogger) ClearBatch() {
sl.mu.Lock()
defer sl.mu.Unlock()
sl.buffer = sl.buffer[:0]
}
// writeEvent writes an event to the log file
func (sl *SecurityLogger) writeEvent(event *SecurityEvent) error {
jsonData, err := json.Marshal(event)
if err != nil {
return fmt.Errorf("failed to marshal security event: %w", err)
}
sl.logger.Println(string(jsonData))
return nil
}
// flushBuffer flushes all buffered events to file
func (sl *SecurityLogger) flushBuffer() {
sl.mu.Lock()
defer sl.mu.Unlock()
sl.flushBufferUnsafe()
}
// flushBufferUnsafe flushes buffer without acquiring lock (must be called with lock held)
func (sl *SecurityLogger) flushBufferUnsafe() {
for _, event := range sl.buffer {
if err := sl.writeEvent(event); err != nil {
log.Printf("[ERROR] Failed to write security event: %v", err)
}
}
sl.buffer = sl.buffer[:0]
sl.lastFlush = time.Now()
// Reset timer if not closed
if !sl.closed && sl.flushTimer != nil {
sl.flushTimer.Stop()
sl.flushTimer.Reset(30 * time.Second)
}
}
// shouldLogLevel checks if the event should be logged based on the configured level
func (sl *SecurityLogger) shouldLogLevel(eventLevel string) bool {
levels := map[string]int{
"NONE": 0,
"ERROR": 1,
"WARNING": 2,
"INFO": 3,
"DEBUG": 4,
}
configLevel := levels[sl.config.Level]
eventLvl, exists := levels[eventLevel]
if !exists {
eventLvl = 2 // Default to WARNING
}
return eventLvl <= configLevel
}
// Close closes the security logger
func (sl *SecurityLogger) Close() error {
sl.mu.Lock()
defer sl.mu.Unlock()
if sl.closed {
return nil
}
// Stop flush timer
if sl.flushTimer != nil {
sl.flushTimer.Stop()
}
// Flush remaining events
sl.flushBufferUnsafe()
// Close file
if sl.file != nil {
err := sl.file.Close()
sl.closed = true
return err
}
sl.closed = true
return nil
}
// min returns the minimum of two integers
func min(a, b int) int {
if a < b {
return a
}
return b
}