v0.1.16: Security overhaul and systematic deployment preparation

Breaking changes for clean alpha releases:
- JWT authentication with user-provided secrets (no more development defaults)
- Registration token system for secure agent enrollment
- Rate limiting with user-adjustable settings
- Enhanced agent configuration with proxy support
- Interactive server setup wizard (--setup flag)
- Heartbeat architecture separation for better UX
- Package status synchronization fixes
- Accurate timestamp tracking for RMM features

Setup process for new installations:
1. docker-compose up -d postgres
2. ./redflag-server --setup
3. ./redflag-server --migrate
4. ./redflag-server
5. Generate tokens via admin UI
6. Deploy agents with registration tokens
This commit is contained in:
Fimeg
2025-10-29 10:38:18 -04:00
parent b3e1b9e52f
commit 03fee29760
50 changed files with 5807 additions and 466 deletions

View File

@@ -0,0 +1,9 @@
-- Add retry tracking to agent_commands table
-- This allows us to track command retry chains and display retry indicators in the UI
-- Add retried_from_id column to link retries to their original commands
ALTER TABLE agent_commands
ADD COLUMN retried_from_id UUID REFERENCES agent_commands(id) ON DELETE SET NULL;
-- Add index for efficient retry chain lookups
CREATE INDEX idx_commands_retried_from ON agent_commands(retried_from_id) WHERE retried_from_id IS NOT NULL;

View File

@@ -0,0 +1,9 @@
-- Add 'archived_failed' status to agent_commands status constraint
-- This allows archiving failed/timed_out commands to clean up the active list
-- Drop the existing constraint
ALTER TABLE agent_commands DROP CONSTRAINT IF EXISTS agent_commands_status_check;
-- Add the new constraint with 'archived_failed' included
ALTER TABLE agent_commands ADD CONSTRAINT agent_commands_status_check
CHECK (status IN ('pending', 'sent', 'running', 'completed', 'failed', 'timed_out', 'cancelled', 'archived_failed'));

View File

@@ -0,0 +1,85 @@
-- Registration tokens for secure agent enrollment
-- Tokens are one-time use and have configurable expiration
CREATE TABLE registration_tokens (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
token VARCHAR(64) UNIQUE NOT NULL, -- One-time use token
label VARCHAR(255), -- Optional label for token identification
expires_at TIMESTAMP NOT NULL, -- Token expiration time
created_at TIMESTAMP DEFAULT NOW(), -- When token was created
used_at TIMESTAMP NULL, -- When token was used (NULL if unused)
used_by_agent_id UUID NULL, -- Which agent used this token (foreign key)
revoked BOOLEAN DEFAULT FALSE, -- Manual revocation
revoked_at TIMESTAMP NULL, -- When token was revoked
revoked_reason VARCHAR(255) NULL, -- Reason for revocation
-- Token status tracking
status VARCHAR(20) DEFAULT 'active'
CHECK (status IN ('active', 'used', 'expired', 'revoked')),
-- Additional metadata
created_by VARCHAR(100) DEFAULT 'setup_wizard', -- Who created the token
metadata JSONB DEFAULT '{}'::jsonb -- Additional token metadata
);
-- Indexes for performance
CREATE INDEX idx_registration_tokens_token ON registration_tokens(token);
CREATE INDEX idx_registration_tokens_expires_at ON registration_tokens(expires_at);
CREATE INDEX idx_registration_tokens_status ON registration_tokens(status);
CREATE INDEX idx_registration_tokens_used_by_agent ON registration_tokens(used_by_agent_id) WHERE used_by_agent_id IS NOT NULL;
-- Foreign key constraint for used_by_agent_id
ALTER TABLE registration_tokens
ADD CONSTRAINT fk_registration_tokens_agent
FOREIGN KEY (used_by_agent_id) REFERENCES agents(id) ON DELETE SET NULL;
-- Function to clean up expired tokens (called by periodic cleanup job)
CREATE OR REPLACE FUNCTION cleanup_expired_registration_tokens()
RETURNS INTEGER AS $$
DECLARE
deleted_count INTEGER;
BEGIN
UPDATE registration_tokens
SET status = 'expired',
used_at = NOW()
WHERE status = 'active'
AND expires_at < NOW()
AND used_at IS NULL;
GET DIAGNOSTICS deleted_count = ROW_COUNT;
RETURN deleted_count;
END;
$$ LANGUAGE plpgsql;
-- Function to check if a token is valid
CREATE OR REPLACE FUNCTION is_registration_token_valid(token_input VARCHAR)
RETURNS BOOLEAN AS $$
DECLARE
token_valid BOOLEAN;
BEGIN
SELECT (status = 'active' AND expires_at > NOW()) INTO token_valid
FROM registration_tokens
WHERE token = token_input;
RETURN COALESCE(token_valid, FALSE);
END;
$$ LANGUAGE plpgsql;
-- Function to mark token as used
CREATE OR REPLACE function mark_registration_token_used(token_input VARCHAR, agent_id UUID)
RETURNS BOOLEAN AS $$
DECLARE
updated BOOLEAN;
BEGIN
UPDATE registration_tokens
SET status = 'used',
used_at = NOW(),
used_by_agent_id = agent_id
WHERE token = token_input
AND status = 'active'
AND expires_at > NOW();
GET DIAGNOSTICS updated = ROW_COUNT;
RETURN updated > 0;
END;
$$ LANGUAGE plpgsql;

View File

@@ -196,3 +196,11 @@ func (q *AgentQueries) DeleteAgent(id uuid.UUID) error {
// Commit the transaction
return tx.Commit()
}
// GetActiveAgentCount returns the count of active (online) agents
func (q *AgentQueries) GetActiveAgentCount() (int, error) {
var count int
query := `SELECT COUNT(*) FROM agents WHERE status = 'online'`
err := q.db.Get(&count, query)
return count, err
}

View File

@@ -21,9 +21,9 @@ func NewCommandQueries(db *sqlx.DB) *CommandQueries {
func (q *CommandQueries) CreateCommand(cmd *models.AgentCommand) error {
query := `
INSERT INTO agent_commands (
id, agent_id, command_type, params, status
id, agent_id, command_type, params, status, retried_from_id
) VALUES (
:id, :agent_id, :command_type, :params, :status
:id, :agent_id, :command_type, :params, :status, :retried_from_id
)
`
_, err := q.db.NamedExec(query, cmd)
@@ -152,14 +152,15 @@ func (q *CommandQueries) RetryCommand(originalID uuid.UUID) (*models.AgentComman
return nil, fmt.Errorf("command must be failed, timed_out, or cancelled to retry")
}
// Create new command with same parameters
// Create new command with same parameters, linking it to the original
newCommand := &models.AgentCommand{
ID: uuid.New(),
AgentID: original.AgentID,
CommandType: original.CommandType,
Params: original.Params,
Status: models.CommandStatusPending,
CreatedAt: time.Now(),
ID: uuid.New(),
AgentID: original.AgentID,
CommandType: original.CommandType,
Params: original.Params,
Status: models.CommandStatusPending,
CreatedAt: time.Now(),
RetriedFromID: &originalID,
}
// Store the new command
@@ -180,20 +181,44 @@ func (q *CommandQueries) GetActiveCommands() ([]models.ActiveCommandInfo, error)
c.id,
c.agent_id,
c.command_type,
c.params,
c.status,
c.created_at,
c.sent_at,
c.result,
c.retried_from_id,
a.hostname as agent_hostname,
COALESCE(ups.package_name, 'N/A') as package_name,
COALESCE(ups.package_type, 'N/A') as package_type
COALESCE(ups.package_type, 'N/A') as package_type,
(c.retried_from_id IS NOT NULL) as is_retry,
EXISTS(SELECT 1 FROM agent_commands WHERE retried_from_id = c.id) as has_been_retried,
COALESCE((
WITH RECURSIVE retry_chain AS (
SELECT id, retried_from_id, 1 as depth
FROM agent_commands
WHERE id = c.id
UNION ALL
SELECT ac.id, ac.retried_from_id, rc.depth + 1
FROM agent_commands ac
JOIN retry_chain rc ON ac.id = rc.retried_from_id
)
SELECT MAX(depth) FROM retry_chain
), 1) - 1 as retry_count
FROM agent_commands c
LEFT JOIN agents a ON c.agent_id = a.id
LEFT JOIN current_package_state ups ON (
c.params->>'update_id' = ups.id::text OR
(c.params->>'package_name' = ups.package_name AND c.params->>'package_type' = ups.package_type)
)
WHERE c.status NOT IN ('completed', 'cancelled')
WHERE c.status NOT IN ('completed', 'cancelled', 'archived_failed')
AND NOT (
c.status IN ('failed', 'timed_out')
AND EXISTS (
SELECT 1 FROM agent_commands retry
WHERE retry.retried_from_id = c.id
AND retry.status = 'completed'
)
)
ORDER BY c.created_at DESC
`
@@ -223,9 +248,24 @@ func (q *CommandQueries) GetRecentCommands(limit int) ([]models.ActiveCommandInf
c.sent_at,
c.completed_at,
c.result,
c.retried_from_id,
a.hostname as agent_hostname,
COALESCE(ups.package_name, 'N/A') as package_name,
COALESCE(ups.package_type, 'N/A') as package_type
COALESCE(ups.package_type, 'N/A') as package_type,
(c.retried_from_id IS NOT NULL) as is_retry,
EXISTS(SELECT 1 FROM agent_commands WHERE retried_from_id = c.id) as has_been_retried,
COALESCE((
WITH RECURSIVE retry_chain AS (
SELECT id, retried_from_id, 1 as depth
FROM agent_commands
WHERE id = c.id
UNION ALL
SELECT ac.id, ac.retried_from_id, rc.depth + 1
FROM agent_commands ac
JOIN retry_chain rc ON ac.id = rc.retried_from_id
)
SELECT MAX(depth) FROM retry_chain
), 1) - 1 as retry_count
FROM agent_commands c
LEFT JOIN agents a ON c.agent_id = a.id
LEFT JOIN current_package_state ups ON (
@@ -243,3 +283,55 @@ func (q *CommandQueries) GetRecentCommands(limit int) ([]models.ActiveCommandInf
return commands, nil
}
// ClearOldFailedCommands archives failed commands older than specified days by changing status to 'archived_failed'
func (q *CommandQueries) ClearOldFailedCommands(days int) (int64, error) {
query := fmt.Sprintf(`
UPDATE agent_commands
SET status = 'archived_failed'
WHERE status IN ('failed', 'timed_out')
AND created_at < NOW() - INTERVAL '%d days'
`, days)
result, err := q.db.Exec(query)
if err != nil {
return 0, fmt.Errorf("failed to archive old failed commands: %w", err)
}
return result.RowsAffected()
}
// ClearRetriedFailedCommands archives failed commands that have been retried and are older than specified days
func (q *CommandQueries) ClearRetriedFailedCommands(days int) (int64, error) {
query := fmt.Sprintf(`
UPDATE agent_commands
SET status = 'archived_failed'
WHERE status IN ('failed', 'timed_out')
AND EXISTS (SELECT 1 FROM agent_commands WHERE retried_from_id = agent_commands.id)
AND created_at < NOW() - INTERVAL '%d days'
`, days)
result, err := q.db.Exec(query)
if err != nil {
return 0, fmt.Errorf("failed to archive retried failed commands: %w", err)
}
return result.RowsAffected()
}
// ClearAllFailedCommands archives all failed commands older than specified days (most aggressive)
func (q *CommandQueries) ClearAllFailedCommands(days int) (int64, error) {
query := fmt.Sprintf(`
UPDATE agent_commands
SET status = 'archived_failed'
WHERE status IN ('failed', 'timed_out')
AND created_at < NOW() - INTERVAL '%d days'
`, days)
result, err := q.db.Exec(query)
if err != nil {
return 0, fmt.Errorf("failed to archive all failed commands: %w", err)
}
return result.RowsAffected()
}

View File

@@ -0,0 +1,232 @@
package queries
import (
"database/sql"
"encoding/json"
"fmt"
"time"
"github.com/google/uuid"
"github.com/jmoiron/sqlx"
)
type RegistrationTokenQueries struct {
db *sqlx.DB
}
type RegistrationToken struct {
ID uuid.UUID `json:"id" db:"id"`
Token string `json:"token" db:"token"`
Label *string `json:"label" db:"label"`
ExpiresAt time.Time `json:"expires_at" db:"expires_at"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UsedAt *time.Time `json:"used_at" db:"used_at"`
UsedByAgentID *uuid.UUID `json:"used_by_agent_id" db:"used_by_agent_id"`
Revoked bool `json:"revoked" db:"revoked"`
RevokedAt *time.Time `json:"revoked_at" db:"revoked_at"`
RevokedReason *string `json:"revoked_reason" db:"revoked_reason"`
Status string `json:"status" db:"status"`
CreatedBy string `json:"created_by" db:"created_by"`
Metadata map[string]interface{} `json:"metadata" db:"metadata"`
}
type TokenRequest struct {
Label string `json:"label"`
ExpiresIn string `json:"expires_in"` // e.g., "24h", "7d"
Metadata map[string]interface{} `json:"metadata"`
}
type TokenResponse struct {
Token string `json:"token"`
Label string `json:"label"`
ExpiresAt time.Time `json:"expires_at"`
InstallCommand string `json:"install_command"`
}
func NewRegistrationTokenQueries(db *sqlx.DB) *RegistrationTokenQueries {
return &RegistrationTokenQueries{db: db}
}
// CreateRegistrationToken creates a new one-time use registration token
func (q *RegistrationTokenQueries) CreateRegistrationToken(token, label string, expiresAt time.Time, metadata map[string]interface{}) error {
metadataJSON, err := json.Marshal(metadata)
if err != nil {
return fmt.Errorf("failed to marshal metadata: %w", err)
}
query := `
INSERT INTO registration_tokens (token, label, expires_at, metadata)
VALUES ($1, $2, $3, $4)
`
_, err = q.db.Exec(query, token, label, expiresAt, metadataJSON)
if err != nil {
return fmt.Errorf("failed to create registration token: %w", err)
}
return nil
}
// ValidateRegistrationToken checks if a token is valid and unused
func (q *RegistrationTokenQueries) ValidateRegistrationToken(token string) (*RegistrationToken, error) {
var regToken RegistrationToken
query := `
SELECT id, token, label, expires_at, created_at, used_at, used_by_agent_id,
revoked, revoked_at, revoked_reason, status, created_by, metadata
FROM registration_tokens
WHERE token = $1 AND status = 'active' AND expires_at > NOW()
`
err := q.db.Get(&regToken, query, token)
if err != nil {
if err == sql.ErrNoRows {
return nil, fmt.Errorf("invalid or expired token")
}
return nil, fmt.Errorf("failed to validate token: %w", err)
}
return &regToken, nil
}
// MarkTokenUsed marks a token as used by an agent
func (q *RegistrationTokenQueries) MarkTokenUsed(token string, agentID uuid.UUID) error {
query := `
UPDATE registration_tokens
SET status = 'used',
used_at = NOW(),
used_by_agent_id = $1
WHERE token = $2 AND status = 'active' AND expires_at > NOW()
`
result, err := q.db.Exec(query, agentID, token)
if err != nil {
return fmt.Errorf("failed to mark token as used: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("token not found or already used")
}
return nil
}
// GetActiveRegistrationTokens returns all active tokens
func (q *RegistrationTokenQueries) GetActiveRegistrationTokens() ([]RegistrationToken, error) {
var tokens []RegistrationToken
query := `
SELECT id, token, label, expires_at, created_at, used_at, used_by_agent_id,
revoked, revoked_at, revoked_reason, status, created_by, metadata
FROM registration_tokens
WHERE status = 'active'
ORDER BY created_at DESC
`
err := q.db.Select(&tokens, query)
if err != nil {
return nil, fmt.Errorf("failed to get active tokens: %w", err)
}
return tokens, nil
}
// GetAllRegistrationTokens returns all tokens with pagination
func (q *RegistrationTokenQueries) GetAllRegistrationTokens(limit, offset int) ([]RegistrationToken, error) {
var tokens []RegistrationToken
query := `
SELECT id, token, label, expires_at, created_at, used_at, used_by_agent_id,
revoked, revoked_at, revoked_reason, status, created_by, metadata
FROM registration_tokens
ORDER BY created_at DESC
LIMIT $1 OFFSET $2
`
err := q.db.Select(&tokens, query, limit, offset)
if err != nil {
return nil, fmt.Errorf("failed to get all tokens: %w", err)
}
return tokens, nil
}
// RevokeRegistrationToken revokes a token
func (q *RegistrationTokenQueries) RevokeRegistrationToken(token, reason string) error {
query := `
UPDATE registration_tokens
SET status = 'revoked',
revoked = true,
revoked_at = NOW(),
revoked_reason = $1
WHERE token = $2 AND status = 'active'
`
result, err := q.db.Exec(query, reason, token)
if err != nil {
return fmt.Errorf("failed to revoke token: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return fmt.Errorf("failed to get rows affected: %w", err)
}
if rowsAffected == 0 {
return fmt.Errorf("token not found or already used/revoked")
}
return nil
}
// CleanupExpiredTokens marks expired tokens as expired
func (q *RegistrationTokenQueries) CleanupExpiredTokens() (int, error) {
query := `
UPDATE registration_tokens
SET status = 'expired',
used_at = NOW()
WHERE status = 'active' AND expires_at < NOW() AND used_at IS NULL
`
result, err := q.db.Exec(query)
if err != nil {
return 0, fmt.Errorf("failed to cleanup expired tokens: %w", err)
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return 0, fmt.Errorf("failed to get rows affected: %w", err)
}
return int(rowsAffected), nil
}
// GetTokenUsageStats returns statistics about token usage
func (q *RegistrationTokenQueries) GetTokenUsageStats() (map[string]int, error) {
stats := make(map[string]int)
query := `
SELECT status, COUNT(*) as count
FROM registration_tokens
GROUP BY status
`
rows, err := q.db.Query(query)
if err != nil {
return nil, fmt.Errorf("failed to get token stats: %w", err)
}
defer rows.Close()
for rows.Next() {
var status string
var count int
if err := rows.Scan(&status, &count); err != nil {
return nil, fmt.Errorf("failed to scan token stats row: %w", err)
}
stats[status] = count
}
return stats, nil
}

View File

@@ -527,7 +527,8 @@ func (q *UpdateQueries) GetPackageHistory(agentID uuid.UUID, packageType, packag
}
// UpdatePackageStatus updates the status of a package and records history
func (q *UpdateQueries) UpdatePackageStatus(agentID uuid.UUID, packageType, packageName, status string, metadata models.JSONB) error {
// completedAt is optional - if nil, uses time.Now(). Pass actual completion time for accurate audit trails.
func (q *UpdateQueries) UpdatePackageStatus(agentID uuid.UUID, packageType, packageName, status string, metadata models.JSONB, completedAt *time.Time) error {
tx, err := q.db.Beginx()
if err != nil {
return fmt.Errorf("failed to begin transaction: %w", err)
@@ -542,13 +543,19 @@ func (q *UpdateQueries) UpdatePackageStatus(agentID uuid.UUID, packageType, pack
return fmt.Errorf("failed to get current state: %w", err)
}
// Use provided timestamp or fall back to server time
timestamp := time.Now()
if completedAt != nil {
timestamp = *completedAt
}
// Update status
updateQuery := `
UPDATE current_package_state
SET status = $1, last_updated_at = $2
WHERE agent_id = $3 AND package_type = $4 AND package_name = $5
`
_, err = tx.Exec(updateQuery, status, time.Now(), agentID, packageType, packageName)
_, err = tx.Exec(updateQuery, status, timestamp, agentID, packageType, packageName)
if err != nil {
return fmt.Errorf("failed to update package status: %w", err)
}
@@ -564,7 +571,7 @@ func (q *UpdateQueries) UpdatePackageStatus(agentID uuid.UUID, packageType, pack
_, err = tx.Exec(historyQuery,
agentID, packageType, packageName, currentState.CurrentVersion,
currentState.AvailableVersion, currentState.Severity,
currentState.RepositorySource, metadata, time.Now(), status)
currentState.RepositorySource, metadata, timestamp, status)
if err != nil {
return fmt.Errorf("failed to record version history: %w", err)
}