Complete RedFlag codebase with two major security audit implementations.
== A-1: Ed25519 Key Rotation Support ==
Server:
- SignCommand sets SignedAt timestamp and KeyID on every signature
- signing_keys database table (migration 020) for multi-key rotation
- InitializePrimaryKey registers active key at startup
- /api/v1/public-keys endpoint for rotation-aware agents
- SigningKeyQueries for key lifecycle management
Agent:
- Key-ID-aware verification via CheckKeyRotation
- FetchAndCacheAllActiveKeys for rotation pre-caching
- Cache metadata with TTL and staleness fallback
- SecurityLogger events for key rotation and command signing
== A-2: Replay Attack Fixes (F-1 through F-7) ==
F-5 CRITICAL - RetryCommand now signs via signAndCreateCommand
F-1 HIGH - v3 format: "{agent_id}:{cmd_id}:{type}:{hash}:{ts}"
F-7 HIGH - Migration 026: expires_at column with partial index
F-6 HIGH - GetPendingCommands/GetStuckCommands filter by expires_at
F-2 HIGH - Agent-side executedIDs dedup map with cleanup
F-4 HIGH - commandMaxAge reduced from 24h to 4h
F-3 CRITICAL - Old-format commands rejected after 48h via CreatedAt
Verification fixes: migration idempotency (ETHOS #4), log format
compliance (ETHOS #1), stale comments updated.
All 24 tests passing. Docker --no-cache build verified.
See docs/ for full audit reports and deviation log (DEV-001 to DEV-019).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
316 lines
8.7 KiB
Go
316 lines
8.7 KiB
Go
package services
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"time"
|
|
|
|
"github.com/Fimeg/RedFlag/aggregator-server/internal/config"
|
|
"github.com/Fimeg/RedFlag/aggregator-server/internal/database/queries"
|
|
"github.com/Fimeg/RedFlag/aggregator-server/internal/models"
|
|
"github.com/google/uuid"
|
|
"github.com/jmoiron/sqlx"
|
|
)
|
|
|
|
// LifecycleOperation represents the type of agent operation
|
|
type LifecycleOperation string
|
|
|
|
const (
|
|
OperationNew LifecycleOperation = "new"
|
|
OperationUpgrade LifecycleOperation = "upgrade"
|
|
OperationRebuild LifecycleOperation = "rebuild"
|
|
)
|
|
|
|
// AgentConfig holds configuration for agent operations
|
|
type AgentConfig struct {
|
|
AgentID string
|
|
Version string
|
|
Platform string
|
|
Architecture string
|
|
MachineID string
|
|
AgentType string
|
|
ServerURL string
|
|
Hostname string
|
|
}
|
|
|
|
// AgentLifecycleService manages all agent lifecycle operations
|
|
type AgentLifecycleService struct {
|
|
db *sqlx.DB
|
|
config *config.Config
|
|
buildService *BuildService
|
|
configService *ConfigService
|
|
artifactService *ArtifactService
|
|
subsystemQueries *queries.SubsystemQueries
|
|
logger *log.Logger
|
|
}
|
|
|
|
// NewAgentLifecycleService creates a new lifecycle service
|
|
func NewAgentLifecycleService(
|
|
db *sqlx.DB,
|
|
cfg *config.Config,
|
|
logger *log.Logger,
|
|
) *AgentLifecycleService {
|
|
return &AgentLifecycleService{
|
|
db: db,
|
|
config: cfg,
|
|
buildService: NewBuildService(db, cfg, logger),
|
|
configService: NewConfigService(db, cfg, logger),
|
|
artifactService: NewArtifactService(db, cfg, logger),
|
|
subsystemQueries: queries.NewSubsystemQueries(db),
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
// Process handles all agent lifecycle operations (new, upgrade, rebuild)
|
|
func (s *AgentLifecycleService) Process(
|
|
ctx context.Context,
|
|
op LifecycleOperation,
|
|
agentCfg *AgentConfig,
|
|
) (*AgentSetupResponse, error) {
|
|
|
|
// Step 1: Validate operation
|
|
if err := s.validateOperation(op, agentCfg); err != nil {
|
|
return nil, fmt.Errorf("validation failed: %w", err)
|
|
}
|
|
|
|
// Step 2: Check if agent exists (for upgrade/rebuild)
|
|
_, err := s.getAgent(ctx, agentCfg.AgentID)
|
|
if err != nil && op != OperationNew {
|
|
return nil, fmt.Errorf("agent not found: %w", err)
|
|
}
|
|
|
|
// Step 3: Generate or load configuration
|
|
var configJSON []byte
|
|
if op == OperationNew {
|
|
configJSON, err = s.configService.GenerateNewConfig(agentCfg)
|
|
} else {
|
|
configJSON, err = s.configService.LoadExistingConfig(agentCfg.AgentID)
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("config generation failed: %w", err)
|
|
}
|
|
|
|
// Step 4: Check if build is needed
|
|
needBuild, err := s.buildService.IsBuildRequired(agentCfg)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("build check failed: %w", err)
|
|
}
|
|
|
|
var artifacts *BuildArtifacts
|
|
if needBuild {
|
|
// Step 5: Build artifacts
|
|
artifacts, err = s.buildService.BuildArtifacts(ctx, agentCfg)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("build failed: %w", err)
|
|
}
|
|
|
|
// Step 6: Store artifacts
|
|
if err := s.artifactService.Store(ctx, artifacts); err != nil {
|
|
return nil, fmt.Errorf("artifact storage failed: %w", err)
|
|
}
|
|
} else {
|
|
// Step 7: Use existing artifacts
|
|
artifacts, err = s.artifactService.Get(ctx, agentCfg.Platform, agentCfg.Version)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("existing artifacts not found: %w", err)
|
|
}
|
|
}
|
|
|
|
// Step 8: Create or update agent record
|
|
if op == OperationNew {
|
|
err = s.createAgent(ctx, agentCfg, configJSON)
|
|
} else {
|
|
err = s.updateAgent(ctx, agentCfg, configJSON)
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("agent record update failed: %w", err)
|
|
}
|
|
|
|
// Step 9: Return response
|
|
return s.buildResponse(agentCfg, artifacts), nil
|
|
}
|
|
|
|
// validateOperation validates the lifecycle operation
|
|
func (s *AgentLifecycleService) validateOperation(
|
|
op LifecycleOperation,
|
|
cfg *AgentConfig,
|
|
) error {
|
|
if cfg.AgentID == "" {
|
|
return fmt.Errorf("agent_id is required")
|
|
}
|
|
if cfg.Version == "" {
|
|
return fmt.Errorf("version is required")
|
|
}
|
|
if cfg.Platform == "" {
|
|
return fmt.Errorf("platform is required")
|
|
}
|
|
|
|
// Operation-specific validation
|
|
switch op {
|
|
case OperationNew:
|
|
// New agents need machine_id
|
|
if cfg.MachineID == "" {
|
|
return fmt.Errorf("machine_id is required for new agents")
|
|
}
|
|
case OperationUpgrade, OperationRebuild:
|
|
// Upgrade/rebuild need existing agent
|
|
// Validation done in getAgent()
|
|
default:
|
|
return fmt.Errorf("unknown operation: %s", op)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// getAgent retrieves agent from database
|
|
func (s *AgentLifecycleService) getAgent(ctx context.Context, agentID string) (*models.Agent, error) {
|
|
var agent models.Agent
|
|
query := `SELECT * FROM agents WHERE id = $1`
|
|
err := s.db.GetContext(ctx, &agent, query, agentID)
|
|
return &agent, err
|
|
}
|
|
|
|
// createAgent creates new agent record
|
|
func (s *AgentLifecycleService) createAgent(
|
|
ctx context.Context,
|
|
cfg *AgentConfig,
|
|
configJSON []byte,
|
|
) error {
|
|
machineID := cfg.MachineID
|
|
agent := &models.Agent{
|
|
ID: uuid.MustParse(cfg.AgentID),
|
|
Hostname: cfg.Hostname,
|
|
OSType: cfg.Platform,
|
|
AgentVersion: cfg.Version,
|
|
MachineID: &machineID,
|
|
CreatedAt: time.Now(),
|
|
UpdatedAt: time.Now(),
|
|
}
|
|
|
|
query := `
|
|
INSERT INTO agents (id, hostname, os_type, agent_version, machine_id, created_at, updated_at)
|
|
VALUES (:id, :hostname, :os_type, :agent_version, :machine_id, :created_at, :updated_at)
|
|
`
|
|
_, err := s.db.NamedExecContext(ctx, query, agent)
|
|
if err != nil {
|
|
return fmt.Errorf("agent record creation failed: %w", err)
|
|
}
|
|
|
|
// Create default subsystems for new agent
|
|
if err := s.subsystemQueries.CreateDefaultSubsystems(agent.ID); err != nil {
|
|
s.logger.Printf("Warning: failed to create default subsystems: %v", err)
|
|
// Non-fatal error - agent still created
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// updateAgent updates existing agent record
|
|
func (s *AgentLifecycleService) updateAgent(
|
|
ctx context.Context,
|
|
cfg *AgentConfig,
|
|
configJSON []byte,
|
|
) error {
|
|
query := `
|
|
UPDATE agents
|
|
SET agent_version = $1, updated_at = $2
|
|
WHERE id = $3
|
|
`
|
|
_, err := s.db.ExecContext(ctx, query, cfg.Version, time.Now(), cfg.AgentID)
|
|
return err
|
|
}
|
|
|
|
// buildResponse constructs the API response
|
|
func (s *AgentLifecycleService) buildResponse(
|
|
cfg *AgentConfig,
|
|
artifacts *BuildArtifacts,
|
|
) *AgentSetupResponse {
|
|
// Default to amd64 if architecture not specified
|
|
arch := cfg.Architecture
|
|
if arch == "" {
|
|
arch = "amd64"
|
|
}
|
|
|
|
return &AgentSetupResponse{
|
|
AgentID: cfg.AgentID,
|
|
ConfigURL: fmt.Sprintf("/api/v1/config/%s", cfg.AgentID),
|
|
BinaryURL: fmt.Sprintf("/api/v1/downloads/%s-%s?version=%s", cfg.Platform, arch, cfg.Version),
|
|
Signature: artifacts.Signature,
|
|
Version: cfg.Version,
|
|
Platform: cfg.Platform,
|
|
NextSteps: s.generateNextSteps(cfg),
|
|
CreatedAt: time.Now(),
|
|
}
|
|
}
|
|
|
|
// generateNextSteps creates installation instructions
|
|
func (s *AgentLifecycleService) generateNextSteps(cfg *AgentConfig) []string {
|
|
return []string{
|
|
fmt.Sprintf("1. Download binary: %s/redflag-agent", cfg.Platform),
|
|
fmt.Sprintf("2. Download config: %s/config.json", cfg.AgentID),
|
|
"3. Install binary to: /usr/local/bin/redflag-agent",
|
|
"4. Install config to: /etc/redflag/config.json",
|
|
"5. Run: systemctl enable --now redflag-agent",
|
|
}
|
|
}
|
|
|
|
// AgentSetupResponse is the unified response for all agent operations
|
|
type AgentSetupResponse struct {
|
|
AgentID string `json:"agent_id"`
|
|
ConfigURL string `json:"config_url"`
|
|
BinaryURL string `json:"binary_url"`
|
|
Signature string `json:"signature"`
|
|
Version string `json:"version"`
|
|
Platform string `json:"platform"`
|
|
NextSteps []string `json:"next_steps"`
|
|
CreatedAt time.Time `json:"created_at"`
|
|
}
|
|
|
|
// BuildService placeholder (to be implemented)
|
|
type BuildService struct {
|
|
db *sqlx.DB
|
|
config *config.Config
|
|
logger *log.Logger
|
|
}
|
|
|
|
func NewBuildService(db *sqlx.DB, cfg *config.Config, logger *log.Logger) *BuildService {
|
|
return &BuildService{db: db, config: cfg, logger: logger}
|
|
}
|
|
|
|
func (s *BuildService) IsBuildRequired(cfg *AgentConfig) (bool, error) {
|
|
// Placeholder: Always return false for now (use existing builds)
|
|
return false, nil
|
|
}
|
|
|
|
func (s *BuildService) BuildArtifacts(ctx context.Context, cfg *AgentConfig) (*BuildArtifacts, error) {
|
|
// Placeholder: Return empty artifacts
|
|
return &BuildArtifacts{}, nil
|
|
}
|
|
|
|
// ArtifactService placeholder (to be implemented)
|
|
type ArtifactService struct {
|
|
db *sqlx.DB
|
|
config *config.Config
|
|
logger *log.Logger
|
|
}
|
|
|
|
func NewArtifactService(db *sqlx.DB, cfg *config.Config, logger *log.Logger) *ArtifactService {
|
|
return &ArtifactService{db: db, config: cfg, logger: logger}
|
|
}
|
|
|
|
func (s *ArtifactService) Store(ctx context.Context, artifacts *BuildArtifacts) error {
|
|
// Placeholder: Do nothing for now
|
|
return nil
|
|
}
|
|
|
|
func (s *ArtifactService) Get(ctx context.Context, platform, version string) (*BuildArtifacts, error) {
|
|
// Placeholder: Return empty artifacts
|
|
return &BuildArtifacts{}, nil
|
|
}
|
|
|
|
// BuildArtifacts represents build output
|
|
type BuildArtifacts struct {
|
|
Signature string
|
|
}
|