Complete RedFlag codebase with two major security audit implementations.
== A-1: Ed25519 Key Rotation Support ==
Server:
- SignCommand sets SignedAt timestamp and KeyID on every signature
- signing_keys database table (migration 020) for multi-key rotation
- InitializePrimaryKey registers active key at startup
- /api/v1/public-keys endpoint for rotation-aware agents
- SigningKeyQueries for key lifecycle management
Agent:
- Key-ID-aware verification via CheckKeyRotation
- FetchAndCacheAllActiveKeys for rotation pre-caching
- Cache metadata with TTL and staleness fallback
- SecurityLogger events for key rotation and command signing
== A-2: Replay Attack Fixes (F-1 through F-7) ==
F-5 CRITICAL - RetryCommand now signs via signAndCreateCommand
F-1 HIGH - v3 format: "{agent_id}:{cmd_id}:{type}:{hash}:{ts}"
F-7 HIGH - Migration 026: expires_at column with partial index
F-6 HIGH - GetPendingCommands/GetStuckCommands filter by expires_at
F-2 HIGH - Agent-side executedIDs dedup map with cleanup
F-4 HIGH - commandMaxAge reduced from 24h to 4h
F-3 CRITICAL - Old-format commands rejected after 48h via CreatedAt
Verification fixes: migration idempotency (ETHOS #4), log format
compliance (ETHOS #1), stale comments updated.
All 24 tests passing. Docker --no-cache build verified.
See docs/ for full audit reports and deviation log (DEV-001 to DEV-019).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
467 lines
12 KiB
Go
467 lines
12 KiB
Go
package logging
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/Fimeg/RedFlag/aggregator-agent/internal/config"
|
|
)
|
|
|
|
// SecurityEvent represents a security event on the agent side
|
|
// This is a simplified version of the server model to avoid circular dependencies
|
|
type SecurityEvent struct {
|
|
Timestamp time.Time `json:"timestamp"`
|
|
Level string `json:"level"` // CRITICAL, WARNING, INFO, DEBUG
|
|
EventType string `json:"event_type"`
|
|
Message string `json:"message"`
|
|
Details map[string]interface{} `json:"details,omitempty"`
|
|
}
|
|
|
|
// SecurityLogConfig holds configuration for security logging on the agent
|
|
type SecurityLogConfig struct {
|
|
Enabled bool `json:"enabled" env:"REDFLAG_AGENT_SECURITY_LOG_ENABLED" default:"true"`
|
|
Level string `json:"level" env:"REDFLAG_AGENT_SECURITY_LOG_LEVEL" default:"warning"` // none, error, warn, info, debug
|
|
LogSuccesses bool `json:"log_successes" env:"REDFLAG_AGENT_SECURITY_LOG_SUCCESSES" default:"false"`
|
|
FilePath string `json:"file_path" env:"REDFLAG_AGENT_SECURITY_LOG_PATH"` // Relative to agent data directory
|
|
MaxSizeMB int `json:"max_size_mb" env:"REDFLAG_AGENT_SECURITY_LOG_MAX_SIZE" default:"50"`
|
|
MaxFiles int `json:"max_files" env:"REDFLAG_AGENT_SECURITY_LOG_MAX_FILES" default:"5"`
|
|
BatchSize int `json:"batch_size" env:"REDFLAG_AGENT_SECURITY_LOG_BATCH_SIZE" default:"10"`
|
|
SendToServer bool `json:"send_to_server" env:"REDFLAG_AGENT_SECURITY_LOG_SEND" default:"true"`
|
|
}
|
|
|
|
// SecurityLogger handles security event logging on the agent
|
|
type SecurityLogger struct {
|
|
config SecurityLogConfig
|
|
logger *log.Logger
|
|
file *os.File
|
|
mu sync.Mutex
|
|
buffer []*SecurityEvent
|
|
flushTimer *time.Timer
|
|
lastFlush time.Time
|
|
closed bool
|
|
}
|
|
|
|
// SecurityEventTypes defines all possible security event types on the agent
|
|
var SecurityEventTypes = struct {
|
|
CmdSignatureVerificationFailed string
|
|
CmdSignatureVerificationSuccess string
|
|
UpdateNonceInvalid string
|
|
UpdateSignatureVerificationFailed string
|
|
MachineIDChangeDetected string
|
|
ConfigTamperingWarning string
|
|
UnauthorizedCommandAttempt string
|
|
KeyRotationDetected string
|
|
}{
|
|
CmdSignatureVerificationFailed: "CMD_SIGNATURE_VERIFICATION_FAILED",
|
|
CmdSignatureVerificationSuccess: "CMD_SIGNATURE_VERIFICATION_SUCCESS",
|
|
UpdateNonceInvalid: "UPDATE_NONCE_INVALID",
|
|
UpdateSignatureVerificationFailed: "UPDATE_SIGNATURE_VERIFICATION_FAILED",
|
|
MachineIDChangeDetected: "MACHINE_ID_CHANGE_DETECTED",
|
|
ConfigTamperingWarning: "CONFIG_TAMPERING_WARNING",
|
|
UnauthorizedCommandAttempt: "UNAUTHORIZED_COMMAND_ATTEMPT",
|
|
KeyRotationDetected: "KEY_ROTATION_DETECTED",
|
|
}
|
|
|
|
// NewSecurityLogger creates a new agent security logger
|
|
func NewSecurityLogger(agentConfig *config.Config, logDir string) (*SecurityLogger, error) {
|
|
// Create default security log config
|
|
secConfig := SecurityLogConfig{
|
|
Enabled: true,
|
|
Level: "warning",
|
|
LogSuccesses: false,
|
|
FilePath: "security.log",
|
|
MaxSizeMB: 50,
|
|
MaxFiles: 5,
|
|
BatchSize: 10,
|
|
SendToServer: true,
|
|
}
|
|
|
|
// Ensure log directory exists
|
|
if err := os.MkdirAll(logDir, 0755); err != nil {
|
|
return nil, fmt.Errorf("failed to create security log directory: %w", err)
|
|
}
|
|
|
|
// Open log file
|
|
logPath := filepath.Join(logDir, secConfig.FilePath)
|
|
file, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0600)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to open security log file: %w", err)
|
|
}
|
|
|
|
logger := &SecurityLogger{
|
|
config: secConfig,
|
|
logger: log.New(file, "[SECURITY] ", log.LstdFlags|log.LUTC),
|
|
file: file,
|
|
buffer: make([]*SecurityEvent, 0, secConfig.BatchSize),
|
|
lastFlush: time.Now(),
|
|
}
|
|
|
|
// Start flush timer
|
|
logger.flushTimer = time.AfterFunc(30*time.Second, logger.flushBuffer)
|
|
|
|
return logger, nil
|
|
}
|
|
|
|
// Log writes a security event
|
|
func (sl *SecurityLogger) Log(event *SecurityEvent) error {
|
|
if !sl.config.Enabled || sl.config.Level == "none" {
|
|
return nil
|
|
}
|
|
|
|
// Skip successes unless configured to log them
|
|
if !sl.config.LogSuccesses && event.EventType == SecurityEventTypes.CmdSignatureVerificationSuccess {
|
|
return nil
|
|
}
|
|
|
|
// Filter by log level
|
|
if !sl.shouldLogLevel(event.Level) {
|
|
return nil
|
|
}
|
|
|
|
sl.mu.Lock()
|
|
defer sl.mu.Unlock()
|
|
|
|
if sl.closed {
|
|
return fmt.Errorf("security logger is closed")
|
|
}
|
|
|
|
// Add prefix to distinguish security events
|
|
event.Message = "SECURITY: " + event.Message
|
|
|
|
// Write immediately for critical events
|
|
if event.Level == "CRITICAL" {
|
|
return sl.writeEvent(event)
|
|
}
|
|
|
|
// Add to buffer
|
|
sl.buffer = append(sl.buffer, event)
|
|
|
|
// Flush if buffer is full
|
|
if len(sl.buffer) >= sl.config.BatchSize {
|
|
sl.flushBufferUnsafe()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// LogCommandVerificationFailure logs a command signature verification failure
|
|
func (sl *SecurityLogger) LogCommandVerificationFailure(commandID string, reason string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "CRITICAL",
|
|
EventType: SecurityEventTypes.CmdSignatureVerificationFailed,
|
|
Message: "Command signature verification failed",
|
|
Details: map[string]interface{}{
|
|
"command_id": commandID,
|
|
"reason": reason,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// LogNonceValidationFailure logs a nonce validation failure
|
|
func (sl *SecurityLogger) LogNonceValidationFailure(nonce string, reason string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "WARNING",
|
|
EventType: SecurityEventTypes.UpdateNonceInvalid,
|
|
Message: "Update nonce validation failed",
|
|
Details: map[string]interface{}{
|
|
"nonce": nonce[:min(len(nonce), 16)] + "...", // Truncate for security
|
|
"reason": reason,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// LogUpdateSignatureVerificationFailure logs an update signature verification failure
|
|
func (sl *SecurityLogger) LogUpdateSignatureVerificationFailure(updateID string, reason string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "CRITICAL",
|
|
EventType: SecurityEventTypes.UpdateSignatureVerificationFailed,
|
|
Message: "Update signature verification failed",
|
|
Details: map[string]interface{}{
|
|
"update_id": updateID,
|
|
"reason": reason,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// LogMachineIDChangeDetected logs when machine ID changes
|
|
func (sl *SecurityLogger) LogMachineIDChangeDetected(oldID, newID string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "WARNING",
|
|
EventType: SecurityEventTypes.MachineIDChangeDetected,
|
|
Message: "Machine ID change detected",
|
|
Details: map[string]interface{}{
|
|
"old_machine_id": oldID,
|
|
"new_machine_id": newID,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// LogConfigTamperingWarning logs when configuration tampering is suspected
|
|
func (sl *SecurityLogger) LogConfigTamperingWarning(configPath string, reason string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "WARNING",
|
|
EventType: SecurityEventTypes.ConfigTamperingWarning,
|
|
Message: "Configuration file tampering detected",
|
|
Details: map[string]interface{}{
|
|
"config_file": configPath,
|
|
"reason": reason,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// LogUnauthorizedCommandAttempt logs an attempt to run an unauthorized command
|
|
func (sl *SecurityLogger) LogUnauthorizedCommandAttempt(command string, reason string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "WARNING",
|
|
EventType: SecurityEventTypes.UnauthorizedCommandAttempt,
|
|
Message: "Unauthorized command execution attempt",
|
|
Details: map[string]interface{}{
|
|
"command": command,
|
|
"reason": reason,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// LogCommandVerificationSuccess logs a successful command signature verification
|
|
func (sl *SecurityLogger) LogCommandVerificationSuccess(commandID string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "INFO",
|
|
EventType: SecurityEventTypes.CmdSignatureVerificationSuccess,
|
|
Message: "Command signature verified successfully",
|
|
Details: map[string]interface{}{
|
|
"command_id": commandID,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// LogCommandVerificationFailed logs a failed command signature verification
|
|
func (sl *SecurityLogger) LogCommandVerificationFailed(commandID, reason string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "CRITICAL",
|
|
EventType: SecurityEventTypes.CmdSignatureVerificationFailed,
|
|
Message: "Command signature verification failed",
|
|
Details: map[string]interface{}{
|
|
"command_id": commandID,
|
|
"reason": reason,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// LogKeyRotationDetected logs when a new signing key is detected and cached.
|
|
// This occurs when a command arrives with a key_id not previously cached by the agent,
|
|
// indicating the server has rotated its signing key.
|
|
func (sl *SecurityLogger) LogKeyRotationDetected(keyID string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "INFO",
|
|
EventType: SecurityEventTypes.KeyRotationDetected,
|
|
Message: "New signing key detected and cached",
|
|
Details: map[string]interface{}{
|
|
"key_id": keyID,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// LogCommandSkipped logs when a command is skipped due to signing configuration
|
|
func (sl *SecurityLogger) LogCommandSkipped(commandID, reason string) {
|
|
if sl == nil {
|
|
return
|
|
}
|
|
|
|
event := &SecurityEvent{
|
|
Timestamp: time.Now().UTC(),
|
|
Level: "INFO",
|
|
EventType: "COMMAND_SKIPPED",
|
|
Message: "Command skipped due to signing configuration",
|
|
Details: map[string]interface{}{
|
|
"command_id": commandID,
|
|
"reason": reason,
|
|
},
|
|
}
|
|
|
|
_ = sl.Log(event)
|
|
}
|
|
|
|
// GetBatch returns a batch of events for sending to server
|
|
func (sl *SecurityLogger) GetBatch() []*SecurityEvent {
|
|
sl.mu.Lock()
|
|
defer sl.mu.Unlock()
|
|
|
|
if len(sl.buffer) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Copy buffer
|
|
batch := make([]*SecurityEvent, len(sl.buffer))
|
|
copy(batch, sl.buffer)
|
|
|
|
// Clear buffer
|
|
sl.buffer = sl.buffer[:0]
|
|
|
|
return batch
|
|
}
|
|
|
|
// ClearBatch clears the buffer after successful send to server
|
|
func (sl *SecurityLogger) ClearBatch() {
|
|
sl.mu.Lock()
|
|
defer sl.mu.Unlock()
|
|
sl.buffer = sl.buffer[:0]
|
|
}
|
|
|
|
// writeEvent writes an event to the log file
|
|
func (sl *SecurityLogger) writeEvent(event *SecurityEvent) error {
|
|
jsonData, err := json.Marshal(event)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal security event: %w", err)
|
|
}
|
|
|
|
sl.logger.Println(string(jsonData))
|
|
return nil
|
|
}
|
|
|
|
// flushBuffer flushes all buffered events to file
|
|
func (sl *SecurityLogger) flushBuffer() {
|
|
sl.mu.Lock()
|
|
defer sl.mu.Unlock()
|
|
sl.flushBufferUnsafe()
|
|
}
|
|
|
|
// flushBufferUnsafe flushes buffer without acquiring lock (must be called with lock held)
|
|
func (sl *SecurityLogger) flushBufferUnsafe() {
|
|
for _, event := range sl.buffer {
|
|
if err := sl.writeEvent(event); err != nil {
|
|
log.Printf("[ERROR] Failed to write security event: %v", err)
|
|
}
|
|
}
|
|
|
|
sl.buffer = sl.buffer[:0]
|
|
sl.lastFlush = time.Now()
|
|
|
|
// Reset timer if not closed
|
|
if !sl.closed && sl.flushTimer != nil {
|
|
sl.flushTimer.Stop()
|
|
sl.flushTimer.Reset(30 * time.Second)
|
|
}
|
|
}
|
|
|
|
// shouldLogLevel checks if the event should be logged based on the configured level
|
|
func (sl *SecurityLogger) shouldLogLevel(eventLevel string) bool {
|
|
levels := map[string]int{
|
|
"NONE": 0,
|
|
"ERROR": 1,
|
|
"WARNING": 2,
|
|
"INFO": 3,
|
|
"DEBUG": 4,
|
|
}
|
|
|
|
configLevel := levels[sl.config.Level]
|
|
eventLvl, exists := levels[eventLevel]
|
|
if !exists {
|
|
eventLvl = 2 // Default to WARNING
|
|
}
|
|
|
|
return eventLvl <= configLevel
|
|
}
|
|
|
|
// Close closes the security logger
|
|
func (sl *SecurityLogger) Close() error {
|
|
sl.mu.Lock()
|
|
defer sl.mu.Unlock()
|
|
|
|
if sl.closed {
|
|
return nil
|
|
}
|
|
|
|
// Stop flush timer
|
|
if sl.flushTimer != nil {
|
|
sl.flushTimer.Stop()
|
|
}
|
|
|
|
// Flush remaining events
|
|
sl.flushBufferUnsafe()
|
|
|
|
// Close file
|
|
if sl.file != nil {
|
|
err := sl.file.Close()
|
|
sl.closed = true
|
|
return err
|
|
}
|
|
|
|
sl.closed = true
|
|
return nil
|
|
}
|
|
|
|
// min returns the minimum of two integers
|
|
func min(a, b int) int {
|
|
if a < b {
|
|
return a
|
|
}
|
|
return b
|
|
} |