Files
Redflag/aggregator-agent/internal/crypto/pubkey.go
jpetree331 f97d4845af feat(security): A-1 Ed25519 key rotation + A-2 replay attack fixes
Complete RedFlag codebase with two major security audit implementations.

== A-1: Ed25519 Key Rotation Support ==

Server:
- SignCommand sets SignedAt timestamp and KeyID on every signature
- signing_keys database table (migration 020) for multi-key rotation
- InitializePrimaryKey registers active key at startup
- /api/v1/public-keys endpoint for rotation-aware agents
- SigningKeyQueries for key lifecycle management

Agent:
- Key-ID-aware verification via CheckKeyRotation
- FetchAndCacheAllActiveKeys for rotation pre-caching
- Cache metadata with TTL and staleness fallback
- SecurityLogger events for key rotation and command signing

== A-2: Replay Attack Fixes (F-1 through F-7) ==

F-5 CRITICAL - RetryCommand now signs via signAndCreateCommand
F-1 HIGH     - v3 format: "{agent_id}:{cmd_id}:{type}:{hash}:{ts}"
F-7 HIGH     - Migration 026: expires_at column with partial index
F-6 HIGH     - GetPendingCommands/GetStuckCommands filter by expires_at
F-2 HIGH     - Agent-side executedIDs dedup map with cleanup
F-4 HIGH     - commandMaxAge reduced from 24h to 4h
F-3 CRITICAL - Old-format commands rejected after 48h via CreatedAt

Verification fixes: migration idempotency (ETHOS #4), log format
compliance (ETHOS #1), stale comments updated.

All 24 tests passing. Docker --no-cache build verified.
See docs/ for full audit reports and deviation log (DEV-001 to DEV-019).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 21:25:47 -04:00

281 lines
8.9 KiB
Go

package crypto
import (
"crypto/ed25519"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"runtime"
"time"
)
const defaultCacheTTLHours = 24
// getPublicKeyDir returns the platform-specific directory for key cache files
func getPublicKeyDir() string {
if runtime.GOOS == "windows" {
return "C:\\ProgramData\\RedFlag"
}
return "/etc/redflag"
}
// getPrimaryKeyPath returns the path for the primary cached public key (backward compat)
func getPrimaryKeyPath() string {
return filepath.Join(getPublicKeyDir(), "server_public_key")
}
// getKeyPathByID returns the path for a specific key cached by key_id
func getKeyPathByID(keyID string) string {
return filepath.Join(getPublicKeyDir(), "server_public_key_"+keyID)
}
// getPrimaryMetaPath returns the metadata file path for the primary key
func getPrimaryMetaPath() string {
return filepath.Join(getPublicKeyDir(), "server_public_key.meta")
}
// CacheMetadata holds metadata about the cached public key
type CacheMetadata struct {
KeyID string `json:"key_id"`
Version int `json:"version"`
CachedAt time.Time `json:"cached_at"`
TTLHours int `json:"ttl_hours"`
}
// IsExpired returns true if the cache TTL has been exceeded
func (m *CacheMetadata) IsExpired() bool {
ttl := time.Duration(m.TTLHours) * time.Hour
if ttl <= 0 {
ttl = defaultCacheTTLHours * time.Hour
}
return time.Since(m.CachedAt) > ttl
}
// PublicKeyResponse represents the server's public key response
type PublicKeyResponse struct {
PublicKey string `json:"public_key"`
Fingerprint string `json:"fingerprint"`
Algorithm string `json:"algorithm"`
KeySize int `json:"key_size"`
KeyID string `json:"key_id"`
Version int `json:"version"`
}
// ActivePublicKeyEntry represents one entry from GET /api/v1/public-keys
type ActivePublicKeyEntry struct {
KeyID string `json:"key_id"`
PublicKey string `json:"public_key"`
IsPrimary bool `json:"is_primary"`
Version int `json:"version"`
Algorithm string `json:"algorithm"`
}
// loadCacheMetadata loads the metadata sidecar file for the primary key
func loadCacheMetadata() (*CacheMetadata, error) {
data, err := os.ReadFile(getPrimaryMetaPath())
if err != nil {
return nil, err
}
var meta CacheMetadata
if err := json.Unmarshal(data, &meta); err != nil {
return nil, err
}
return &meta, nil
}
// saveCacheMetadata writes the metadata sidecar file
func saveCacheMetadata(meta *CacheMetadata) error {
dir := getPublicKeyDir()
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create key dir: %w", err)
}
data, err := json.MarshalIndent(meta, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal metadata: %w", err)
}
return os.WriteFile(getPrimaryMetaPath(), data, 0644)
}
// FetchAndCacheServerPublicKey fetches the server's Ed25519 primary public key.
// Uses a TTL+key_id cache: skips the fetch only if both TTL is valid AND key_id matches.
// Implements Trust-On-First-Use (TOFU) with rotation awareness.
func FetchAndCacheServerPublicKey(serverURL string) (ed25519.PublicKey, error) {
// Check if cache is still valid
if meta, err := loadCacheMetadata(); err == nil && meta.KeyID != "" && !meta.IsExpired() {
// Cache metadata is valid and within TTL — try to load the cached key
if cachedKey, err := LoadCachedPublicKey(); err == nil && cachedKey != nil {
return cachedKey, nil
}
// Cache file missing despite valid metadata — fall through to re-fetch
}
// Fetch primary key from server
resp, err := http.Get(serverURL + "/api/v1/public-key")
if err != nil {
// Network failed — fall back to stale cache if available
if cachedKey, loadErr := LoadCachedPublicKey(); loadErr == nil {
fmt.Printf("Warning: Failed to fetch public key (network error), using stale cache: %v\n", err)
return cachedKey, nil
}
return nil, fmt.Errorf("failed to fetch public key from server: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("server returned status %d: %s", resp.StatusCode, string(body))
}
var keyResp PublicKeyResponse
if err := json.NewDecoder(resp.Body).Decode(&keyResp); err != nil {
return nil, fmt.Errorf("failed to parse public key response: %w", err)
}
if keyResp.Algorithm != "ed25519" {
return nil, fmt.Errorf("unsupported signature algorithm: %s (expected ed25519)", keyResp.Algorithm)
}
pubKeyBytes, err := hex.DecodeString(keyResp.PublicKey)
if err != nil {
return nil, fmt.Errorf("invalid public key format: %w", err)
}
if len(pubKeyBytes) != ed25519.PublicKeySize {
return nil, fmt.Errorf("invalid public key size: expected %d bytes, got %d", ed25519.PublicKeySize, len(pubKeyBytes))
}
publicKey := ed25519.PublicKey(pubKeyBytes)
// Cache the primary key
if err := cachePublicKey(publicKey); err != nil {
fmt.Printf("Warning: Failed to cache primary public key: %v\n", err)
}
// Use key_id from response (fall back to fingerprint for old servers)
keyID := keyResp.KeyID
if keyID == "" {
keyID = keyResp.Fingerprint
}
// Write metadata sidecar
meta := &CacheMetadata{
KeyID: keyID,
Version: keyResp.Version,
CachedAt: time.Now().UTC(),
TTLHours: defaultCacheTTLHours,
}
if err := saveCacheMetadata(meta); err != nil {
fmt.Printf("Warning: Failed to save key cache metadata: %v\n", err)
}
// Also cache by key_id for multi-key lookup
if keyID != "" {
if err := CachePublicKeyByID(keyID, publicKey); err != nil {
fmt.Printf("Warning: Failed to cache key by ID %s: %v\n", keyID, err)
}
}
fmt.Printf("Server public key fetched and cached (key_id: %s, version: %d)\n", keyID, keyResp.Version)
return publicKey, nil
}
// FetchAndCacheAllActiveKeys fetches all active public keys from GET /api/v1/public-keys
// and caches each one by its key_id. Used during key rotation transition windows.
func FetchAndCacheAllActiveKeys(serverURL string) ([]ActivePublicKeyEntry, error) {
resp, err := http.Get(serverURL + "/api/v1/public-keys")
if err != nil {
return nil, fmt.Errorf("failed to fetch active public keys: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("server returned %d: %s", resp.StatusCode, string(body))
}
var entries []ActivePublicKeyEntry
if err := json.NewDecoder(resp.Body).Decode(&entries); err != nil {
return nil, fmt.Errorf("failed to decode public keys list: %w", err)
}
for _, entry := range entries {
if entry.Algorithm != "ed25519" {
continue
}
pubKeyBytes, err := hex.DecodeString(entry.PublicKey)
if err != nil || len(pubKeyBytes) != ed25519.PublicKeySize {
continue
}
if err := CachePublicKeyByID(entry.KeyID, ed25519.PublicKey(pubKeyBytes)); err != nil {
fmt.Printf("Warning: Failed to cache key %s: %v\n", entry.KeyID, err)
}
}
return entries, nil
}
// LoadCachedPublicKey loads the primary cached public key from disk (backward compat path)
func LoadCachedPublicKey() (ed25519.PublicKey, error) {
data, err := os.ReadFile(getPrimaryKeyPath())
if err != nil {
return nil, err
}
if len(data) != ed25519.PublicKeySize {
return nil, fmt.Errorf("cached public key has invalid size: %d bytes", len(data))
}
return ed25519.PublicKey(data), nil
}
// LoadCachedPublicKeyByID loads a cached public key by its key_id.
// Falls back to the primary key if the key_id-specific file does not exist.
func LoadCachedPublicKeyByID(keyID string) (ed25519.PublicKey, error) {
if keyID == "" {
return LoadCachedPublicKey()
}
data, err := os.ReadFile(getKeyPathByID(keyID))
if err == nil && len(data) == ed25519.PublicKeySize {
return ed25519.PublicKey(data), nil
}
// Fall back to primary
return LoadCachedPublicKey()
}
// IsKeyIDCached returns true if a key with the given key_id is cached locally
func IsKeyIDCached(keyID string) bool {
if keyID == "" {
return false
}
info, err := os.Stat(getKeyPathByID(keyID))
return err == nil && info.Size() == ed25519.PublicKeySize
}
// cachePublicKey saves the primary public key to disk (backward compat path)
func cachePublicKey(publicKey ed25519.PublicKey) error {
dir := getPublicKeyDir()
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
return os.WriteFile(getPrimaryKeyPath(), publicKey, 0644)
}
// CachePublicKeyByID saves a public key under its key_id filename
func CachePublicKeyByID(keyID string, publicKey ed25519.PublicKey) error {
if keyID == "" {
return fmt.Errorf("keyID cannot be empty")
}
dir := getPublicKeyDir()
if err := os.MkdirAll(dir, 0755); err != nil {
return fmt.Errorf("failed to create directory: %w", err)
}
return os.WriteFile(getKeyPathByID(keyID), publicKey, 0644)
}
// GetPublicKey returns the primary cached public key or fetches it from the server
func GetPublicKey(serverURL string) (ed25519.PublicKey, error) {
// Try with TTL-aware fetch (will use cache if valid)
return FetchAndCacheServerPublicKey(serverURL)
}