v0.1.27 release: Complete implementation

Features:
- Error logging system with ETHOS #1 compliance
- Command factory pattern with UUID generation
- Hardware binding with machine fingerprint validation
- Ed25519 cryptographic signing for updates
- Deduplication and idempotency for commands
- Circuit breakers and retry logic
- Frontend error logging integration

Bug Fixes:
- Version display using compile-time injection
- Migration 017 CONCURRENTLY issue resolved
- Docker build context fixes
- Rate limiting implementation verified

Documentation:
- README updated to reflect actual implementation
- v0.1.27 inventory analysis added
This commit is contained in:
Fimeg
2025-12-20 13:47:36 -05:00
parent 54c554ac7c
commit 62697df112
19 changed files with 1405 additions and 18 deletions

View File

@@ -1248,7 +1248,7 @@ func (h *AgentHandler) EnableRapidPollingMode(agentID uuid.UUID, durationMinutes
}
// SetRapidPollingMode enables rapid polling mode for an agent
// TODO: Rate limiting should be implemented for rapid polling endpoints to prevent abuse (technical debt)
// Rate limiting is implemented at router level in cmd/server/main.go
func (h *AgentHandler) SetRapidPollingMode(c *gin.Context) {
idStr := c.Param("id")
agentID, err := uuid.Parse(idStr)

View File

@@ -0,0 +1,223 @@
package handlers
import (
"encoding/json"
"fmt"
"log"
"net/http"
"time"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/jmoiron/sqlx"
)
// ClientErrorHandler handles frontend error logging per ETHOS #1
type ClientErrorHandler struct {
db *sqlx.DB
}
// NewClientErrorHandler creates a new error handler
func NewClientErrorHandler(db *sqlx.DB) *ClientErrorHandler {
return &ClientErrorHandler{db: db}
}
// GetErrorsResponse represents paginated error list
type GetErrorsResponse struct {
Errors []ClientErrorResponse `json:"errors"`
Total int64 `json:"total"`
Page int `json:"page"`
PageSize int `json:"page_size"`
TotalPages int `json:"total_pages"`
}
// ClientErrorResponse represents a single error in response
type ClientErrorResponse struct {
ID string `json:"id"`
AgentID string `json:"agent_id,omitempty"`
Subsystem string `json:"subsystem"`
ErrorType string `json:"error_type"`
Message string `json:"message"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
URL string `json:"url"`
CreatedAt time.Time `json:"created_at"`
}
// GetErrors returns paginated error logs (admin only)
func (h *ClientErrorHandler) GetErrors(c *gin.Context) {
// Parse pagination params
page := 1
pageSize := 50
if p, ok := c.GetQuery("page"); ok {
fmt.Sscanf(p, "%d", &page)
}
if ps, ok := c.GetQuery("page_size"); ok {
fmt.Sscanf(ps, "%d", &pageSize)
}
if pageSize > 100 {
pageSize = 100 // Max page size
}
// Parse filters
subsystem := c.Query("subsystem")
errorType := c.Query("error_type")
agentIDStr := c.Query("agent_id")
// Build query
query := `SELECT id, agent_id, subsystem, error_type, message, metadata, url, created_at
FROM client_errors
WHERE 1=1`
params := map[string]interface{}{}
if subsystem != "" {
query += " AND subsystem = :subsystem"
params["subsystem"] = subsystem
}
if errorType != "" {
query += " AND error_type = :error_type"
params["error_type"] = errorType
}
if agentIDStr != "" {
query += " AND agent_id = :agent_id"
params["agent_id"] = agentIDStr
}
query += " ORDER BY created_at DESC LIMIT :limit OFFSET :offset"
params["limit"] = pageSize
params["offset"] = (page - 1) * pageSize
// Execute query
var errors []ClientErrorResponse
if err := h.db.Select(&errors, query, params); err != nil {
log.Printf("[ERROR] [server] [client_error] query_failed error=\"%v\"", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
// Get total count
countQuery := `SELECT COUNT(*) FROM client_errors WHERE 1=1`
if subsystem != "" {
countQuery += " AND subsystem = :subsystem"
}
if errorType != "" {
countQuery += " AND error_type = :error_type"
}
if agentIDStr != "" {
countQuery += " AND agent_id = :agent_id"
}
var total int64
if err := h.db.Get(&total, countQuery, params); err != nil {
log.Printf("[ERROR] [server] [client_error] count_failed error=\"%v\"", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "count failed"})
return
}
totalPages := int((total + int64(pageSize) - 1) / int64(pageSize))
response := GetErrorsResponse{
Errors: errors,
Total: total,
Page: page,
PageSize: pageSize,
TotalPages: totalPages,
}
c.JSON(http.StatusOK, response)
}
// LogErrorRequest represents a client error log entry
type LogErrorRequest struct {
Subsystem string `json:"subsystem" binding:"required"`
ErrorType string `json:"error_type" binding:"required,oneof=javascript_error api_error ui_error validation_error"`
Message string `json:"message" binding:"required,max=10000"`
StackTrace string `json:"stack_trace,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
URL string `json:"url" binding:"required"`
}
// LogError processes and stores frontend errors
func (h *ClientErrorHandler) LogError(c *gin.Context) {
var req LogErrorRequest
if err := c.ShouldBindJSON(&req); err != nil {
log.Printf("[ERROR] [server] [client_error] validation_failed error=\"%v\"", err)
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request data"})
return
}
// Extract agent ID from auth middleware if available
var agentID interface{}
if agentIDValue, exists := c.Get("agentID"); exists {
if id, ok := agentIDValue.(uuid.UUID); ok {
agentID = id
}
}
// Log to console with HISTORY prefix
log.Printf("[ERROR] [server] [client] [%s] agent_id=%v subsystem=%s message=\"%s\"",
req.ErrorType, agentID, req.Subsystem, truncate(req.Message, 200))
log.Printf("[HISTORY] [server] [client_error] agent_id=%v subsystem=%s type=%s url=\"%s\" message=\"%s\" timestamp=%s",
agentID, req.Subsystem, req.ErrorType, req.URL, req.Message, time.Now().Format(time.RFC3339))
// Store in database with retry logic
if err := h.storeError(agentID, req, c); err != nil {
log.Printf("[ERROR] [server] [client_error] store_failed error=\"%v\"", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store error"})
return
}
c.JSON(http.StatusOK, gin.H{"logged": true})
}
// storeError persists error to database with retry
func (h *ClientErrorHandler) storeError(agentID interface{}, req LogErrorRequest, c *gin.Context) error {
const maxRetries = 3
var lastErr error
for attempt := 1; attempt <= maxRetries; attempt++ {
query := `INSERT INTO client_errors (agent_id, subsystem, error_type, message, stack_trace, metadata, url, user_agent)
VALUES (:agent_id, :subsystem, :error_type, :message, :stack_trace, :metadata, :url, :user_agent)`
// Convert metadata map to JSON for PostgreSQL JSONB column
var metadataJSON json.RawMessage
if req.Metadata != nil && len(req.Metadata) > 0 {
jsonBytes, err := json.Marshal(req.Metadata)
if err != nil {
log.Printf("[ERROR] [server] [client_error] metadata_marshal_failed error=\"%v\"", err)
metadataJSON = nil
} else {
metadataJSON = json.RawMessage(jsonBytes)
}
}
_, err := h.db.NamedExec(query, map[string]interface{}{
"agent_id": agentID,
"subsystem": req.Subsystem,
"error_type": req.ErrorType,
"message": req.Message,
"stack_trace": req.StackTrace,
"metadata": metadataJSON,
"url": req.URL,
"user_agent": c.GetHeader("User-Agent"),
})
if err == nil {
return nil
}
lastErr = err
if attempt < maxRetries {
time.Sleep(time.Duration(attempt) * time.Second)
continue
}
}
return fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}

View File

@@ -0,0 +1,66 @@
package command
import (
"errors"
"fmt"
"time"
"github.com/Fimeg/RedFlag/aggregator-server/internal/models"
"github.com/google/uuid"
)
// Factory creates validated AgentCommand instances
type Factory struct {
validator *Validator
}
// NewFactory creates a new command factory
func NewFactory() *Factory {
return &Factory{
validator: NewValidator(),
}
}
// Create generates a new validated AgentCommand with unique ID
func (f *Factory) Create(agentID uuid.UUID, commandType string, params map[string]interface{}) (*models.AgentCommand, error) {
cmd := &models.AgentCommand{
ID: uuid.New(),
AgentID: agentID,
CommandType: commandType,
Status: "pending",
Source: determineSource(commandType),
Params: params,
CreatedAt: time.Now(),
UpdatedAt: time.Now(),
}
if err := f.validator.Validate(cmd); err != nil {
return nil, fmt.Errorf("command validation failed: %w", err)
}
return cmd, nil
}
// determineSource classifies command source based on type
func determineSource(commandType string) string {
if isSystemCommand(commandType) {
return "system"
}
return "manual"
}
func isSystemCommand(commandType string) bool {
systemCommands := []string{
"enable_heartbeat",
"disable_heartbeat",
"update_check",
"cleanup_old_logs",
}
for _, cmd := range systemCommands {
if commandType == cmd {
return true
}
}
return false
}

View File

@@ -0,0 +1,123 @@
package command
import (
"errors"
"fmt"
"github.com/google/uuid"
"github.com/Fimeg/RedFlag/aggregator-server/internal/models"
)
// Validator validates command parameters
type Validator struct {
minCheckInSeconds int
maxCheckInSeconds int
minScannerMinutes int
maxScannerMinutes int
}
// NewValidator creates a new command validator
func NewValidator() *Validator {
return &Validator{
minCheckInSeconds: 60, // 1 minute minimum
maxCheckInSeconds: 3600, // 1 hour maximum
minScannerMinutes: 1, // 1 minute minimum
maxScannerMinutes: 1440, // 24 hours maximum
}
}
// Validate performs comprehensive command validation
func (v *Validator) Validate(cmd *models.AgentCommand) error {
if cmd == nil {
return errors.New("command cannot be nil")
}
if cmd.ID == uuid.Nil {
return errors.New("command ID cannot be zero UUID")
}
if cmd.AgentID == uuid.Nil {
return errors.New("agent ID is required")
}
if cmd.CommandType == "" {
return errors.New("command type is required")
}
if cmd.Status == "" {
return errors.New("status is required")
}
validStatuses := []string{"pending", "running", "completed", "failed", "cancelled"}
if !contains(validStatuses, cmd.Status) {
return fmt.Errorf("invalid status: %s", cmd.Status)
}
if cmd.Source != "manual" && cmd.Source != "system" {
return fmt.Errorf("source must be 'manual' or 'system', got: %s", cmd.Source)
}
// Validate command type format
if err := v.validateCommandType(cmd.CommandType); err != nil {
return err
}
return nil
}
// ValidateSubsystemAction validates subsystem-specific actions
func (v *Validator) ValidateSubsystemAction(subsystem string, action string) error {
validActions := map[string][]string{
"storage": {"trigger", "enable", "disable", "set_interval"},
"system": {"trigger", "enable", "disable", "set_interval"},
"docker": {"trigger", "enable", "disable", "set_interval"},
"updates": {"trigger", "enable", "disable", "set_interval"},
}
actions, ok := validActions[subsystem]
if !ok {
return fmt.Errorf("unknown subsystem: %s", subsystem)
}
if !contains(actions, action) {
return fmt.Errorf("invalid action '%s' for subsystem '%s'", action, subsystem)
}
return nil
}
// ValidateInterval ensures scanner intervals are within bounds
func (v *Validator) ValidateInterval(subsystem string, minutes int) error {
if minutes < v.minScannerMinutes {
return fmt.Errorf("interval %d minutes below minimum %d for subsystem %s",
minutes, v.minScannerMinutes, subsystem)
}
if minutes > v.maxScannerMinutes {
return fmt.Errorf("interval %d minutes above maximum %d for subsystem %s",
minutes, v.maxScannerMinutes, subsystem)
}
return nil
}
func (v *Validator) validateCommandType(commandType string) error {
validPrefixes := []string{"scan_", "install_", "update_", "enable_", "disable_", "reboot"}
for _, prefix := range validPrefixes {
if len(commandType) >= len(prefix) && commandType[:len(prefix)] == prefix {
return nil
}
}
return fmt.Errorf("invalid command type format: %s", commandType)
}
func contains(slice []string, item string) bool {
for _, s := range slice {
if s == item {
return true
}
}
return false
}

View File

@@ -6,7 +6,8 @@
DROP INDEX IF EXISTS idx_agents_machine_id;
-- Create unique index to prevent duplicate machine IDs (allows multiple NULLs)
CREATE UNIQUE INDEX CONCURRENTLY idx_agents_machine_id_unique ON agents(machine_id) WHERE machine_id IS NOT NULL;
-- Note: CONCURRENTLY removed to allow transaction-based migration
CREATE UNIQUE INDEX idx_agents_machine_id_unique ON agents(machine_id) WHERE machine_id IS NOT NULL;
-- Add comment for documentation
COMMENT ON COLUMN agents.machine_id IS 'SHA-256 hash of hardware fingerprint (prevents agent impersonation via config copying)';

View File

@@ -0,0 +1,3 @@
-- Rollback migration 023: Client Error Logging Schema
DROP TABLE IF EXISTS client_errors;

View File

@@ -0,0 +1,28 @@
-- Migration 023: Client Error Logging Schema
-- Implements ETHOS #1: Errors are History, Not /dev/null
CREATE TABLE client_errors (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
agent_id UUID REFERENCES agents(id) ON DELETE SET NULL,
subsystem VARCHAR(50) NOT NULL,
error_type VARCHAR(50) NOT NULL,
message TEXT NOT NULL,
stack_trace TEXT,
metadata JSONB,
url TEXT NOT NULL,
user_agent TEXT,
created_at TIMESTAMP DEFAULT NOW()
);
-- Indexes for efficient querying
CREATE INDEX idx_client_errors_agent_time ON client_errors(agent_id, created_at DESC);
CREATE INDEX idx_client_errors_subsystem_time ON client_errors(subsystem, created_at DESC);
CREATE INDEX idx_client_errors_error_type_time ON client_errors(error_type, created_at DESC);
CREATE INDEX idx_client_errors_created_at ON client_errors(created_at DESC);
-- Comments for documentation
COMMENT ON TABLE client_errors IS 'Frontend error logs for debugging and auditing. Implements ETHOS #1.';
COMMENT ON COLUMN client_errors.agent_id IS 'Agent active when error occurred (NULL for pre-auth errors)';
COMMENT ON COLUMN client_errors.subsystem IS 'RedFlag subsystem being used (storage, system, docker, etc.)';
COMMENT ON COLUMN client_errors.error_type IS 'Error category: javascript_error, api_error, ui_error, validation_error';
COMMENT ON COLUMN client_errors.metadata IS 'Additional context (component name, API response, user actions)';

View File

@@ -0,0 +1,5 @@
-- Rollback migration 023a: Command Deduplication Schema
DROP INDEX IF EXISTS idx_agent_pending_subsystem;
ALTER TABLE agent_commands DROP COLUMN IF EXISTS idempotency_key;
DROP INDEX IF EXISTS idx_agent_commands_idempotency_key;

View File

@@ -0,0 +1,16 @@
-- Migration 023a: Command Deduplication Schema
-- Prevents multiple pending scan commands per subsystem per agent
-- Add unique constraint to enforce single pending command per subsystem
CREATE UNIQUE INDEX idx_agent_pending_subsystem
ON agent_commands(agent_id, command_type, status)
WHERE status = 'pending';
-- Add idempotency key support for retry scenarios
ALTER TABLE agent_commands ADD COLUMN idempotency_key VARCHAR(64) UNIQUE NULL;
CREATE INDEX idx_agent_commands_idempotency_key ON agent_commands(idempotency_key);
-- Comments for documentation
COMMENT ON TABLE agent_commands IS 'Commands sent to agents for execution';
COMMENT ON COLUMN agent_commands.idempotency_key IS
'Prevents duplicate command creation from retry logic. Based on agent_id + subsystem + timestamp window.';

View File

@@ -1,6 +1,7 @@
package models
import (
"errors"
"time"
"github.com/google/uuid"
@@ -16,12 +17,52 @@ type AgentCommand struct {
Source string `json:"source" db:"source"`
Signature string `json:"signature,omitempty" db:"signature"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
SentAt *time.Time `json:"sent_at,omitempty" db:"sent_at"`
CompletedAt *time.Time `json:"completed_at,omitempty" db:"completed_at"`
Result JSONB `json:"result,omitempty" db:"result"`
RetriedFromID *uuid.UUID `json:"retried_from_id,omitempty" db:"retried_from_id"`
}
// Validate checks if the command has all required fields
func (c *AgentCommand) Validate() error {
if c.ID == uuid.Nil {
return ErrCommandIDRequired
}
if c.AgentID == uuid.Nil {
return ErrAgentIDRequired
}
if c.CommandType == "" {
return ErrCommandTypeRequired
}
if c.Status == "" {
return ErrStatusRequired
}
if c.Source != "manual" && c.Source != "system" {
return ErrInvalidSource
}
return nil
}
// IsTerminal returns true if the command is in a terminal state
func (c *AgentCommand) IsTerminal() bool {
return c.Status == "completed" || c.Status == "failed" || c.Status == "cancelled"
}
// CanRetry returns true if the command can be retried
func (c *AgentCommand) CanRetry() bool {
return c.Status == "failed" && c.RetriedFromID == nil
}
// Predefined errors for validation
var (
ErrCommandIDRequired = errors.New("command ID cannot be zero UUID")
ErrAgentIDRequired = errors.New("agent ID is required")
ErrCommandTypeRequired = errors.New("command type is required")
ErrStatusRequired = errors.New("status is required")
ErrInvalidSource = errors.New("source must be 'manual' or 'system'")
)
// CommandsResponse is returned when an agent checks in for commands
type CommandsResponse struct {
Commands []CommandItem `json:"commands"`

View File

@@ -7,24 +7,37 @@ import (
// Version coordination for Server Authority model
// The server is the single source of truth for all version information
//
// Version Sources:
// - Agent versions: Compiled into agent via ldflags during build (see agent/internal/version)
// - Server versions: Compiled into server via ldflags during build (injected below)
// - Database: agents table stores agent_version at registration
// CurrentVersions holds the authoritative version information
// Build-time injected version information (SERVER AUTHORITY)
// Injected by build script during server compilation
var (
AgentVersion = "dev" // Server's agent version (format: 0.1.27)
ConfigVersion = "dev" // Config schema version (format: 3)
MinAgentVersion = "dev" // Minimum supported agent version
)
// CurrentVersions holds the authoritative version information for API responses
type CurrentVersions struct {
AgentVersion string `json:"agent_version"` // e.g., "0.1.23.6"
ConfigVersion string `json:"config_version"` // e.g., "6"
AgentVersion string `json:"agent_version"` // e.g., "0.1.27"
ConfigVersion string `json:"config_version"` // e.g., "3"
MinAgentVersion string `json:"min_agent_version"` // e.g., "0.1.22"
BuildTime time.Time `json:"build_time"`
}
// GetCurrentVersions returns the current version information
// In production, this would come from a version file, database, or environment
// Version is compiled into the server binary at build time via ldflags
func GetCurrentVersions() CurrentVersions {
// TODO: For production, load this from version file or database
// For now, use environment variables with defaults
// Build-time injection allows version updates without code changes
// See Dockerfile for injection via: -ldflags "-X .../version.AgentVersion=0.1.27"
return CurrentVersions{
AgentVersion: "0.1.23", // Should match current branch
ConfigVersion: "3", // Should map from agent version (0.1.23 -> "3")
MinAgentVersion: "0.1.22",
AgentVersion: AgentVersion,
ConfigVersion: ConfigVersion,
MinAgentVersion: MinAgentVersion,
BuildTime: time.Now(),
}
}