Files
Redflag/aggregator-server/internal/api/handlers/client_errors.go
jpetree331 f97d4845af feat(security): A-1 Ed25519 key rotation + A-2 replay attack fixes
Complete RedFlag codebase with two major security audit implementations.

== A-1: Ed25519 Key Rotation Support ==

Server:
- SignCommand sets SignedAt timestamp and KeyID on every signature
- signing_keys database table (migration 020) for multi-key rotation
- InitializePrimaryKey registers active key at startup
- /api/v1/public-keys endpoint for rotation-aware agents
- SigningKeyQueries for key lifecycle management

Agent:
- Key-ID-aware verification via CheckKeyRotation
- FetchAndCacheAllActiveKeys for rotation pre-caching
- Cache metadata with TTL and staleness fallback
- SecurityLogger events for key rotation and command signing

== A-2: Replay Attack Fixes (F-1 through F-7) ==

F-5 CRITICAL - RetryCommand now signs via signAndCreateCommand
F-1 HIGH     - v3 format: "{agent_id}:{cmd_id}:{type}:{hash}:{ts}"
F-7 HIGH     - Migration 026: expires_at column with partial index
F-6 HIGH     - GetPendingCommands/GetStuckCommands filter by expires_at
F-2 HIGH     - Agent-side executedIDs dedup map with cleanup
F-4 HIGH     - commandMaxAge reduced from 24h to 4h
F-3 CRITICAL - Old-format commands rejected after 48h via CreatedAt

Verification fixes: migration idempotency (ETHOS #4), log format
compliance (ETHOS #1), stale comments updated.

All 24 tests passing. Docker --no-cache build verified.
See docs/ for full audit reports and deviation log (DEV-001 to DEV-019).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 21:25:47 -04:00

224 lines
6.8 KiB
Go

package handlers
import (
"encoding/json"
"fmt"
"log"
"net/http"
"time"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/jmoiron/sqlx"
)
// ClientErrorHandler handles frontend error logging per ETHOS #1
type ClientErrorHandler struct {
db *sqlx.DB
}
// NewClientErrorHandler creates a new error handler
func NewClientErrorHandler(db *sqlx.DB) *ClientErrorHandler {
return &ClientErrorHandler{db: db}
}
// GetErrorsResponse represents paginated error list
type GetErrorsResponse struct {
Errors []ClientErrorResponse `json:"errors"`
Total int64 `json:"total"`
Page int `json:"page"`
PageSize int `json:"page_size"`
TotalPages int `json:"total_pages"`
}
// ClientErrorResponse represents a single error in response
type ClientErrorResponse struct {
ID string `json:"id"`
AgentID string `json:"agent_id,omitempty"`
Subsystem string `json:"subsystem"`
ErrorType string `json:"error_type"`
Message string `json:"message"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
URL string `json:"url"`
CreatedAt time.Time `json:"created_at"`
}
// GetErrors returns paginated error logs (admin only)
func (h *ClientErrorHandler) GetErrors(c *gin.Context) {
// Parse pagination params
page := 1
pageSize := 50
if p, ok := c.GetQuery("page"); ok {
fmt.Sscanf(p, "%d", &page)
}
if ps, ok := c.GetQuery("page_size"); ok {
fmt.Sscanf(ps, "%d", &pageSize)
}
if pageSize > 100 {
pageSize = 100 // Max page size
}
// Parse filters
subsystem := c.Query("subsystem")
errorType := c.Query("error_type")
agentIDStr := c.Query("agent_id")
// Build query
query := `SELECT id, agent_id, subsystem, error_type, message, metadata, url, created_at
FROM client_errors
WHERE 1=1`
params := map[string]interface{}{}
if subsystem != "" {
query += " AND subsystem = :subsystem"
params["subsystem"] = subsystem
}
if errorType != "" {
query += " AND error_type = :error_type"
params["error_type"] = errorType
}
if agentIDStr != "" {
query += " AND agent_id = :agent_id"
params["agent_id"] = agentIDStr
}
query += " ORDER BY created_at DESC LIMIT :limit OFFSET :offset"
params["limit"] = pageSize
params["offset"] = (page - 1) * pageSize
// Execute query
var errors []ClientErrorResponse
if err := h.db.Select(&errors, query, params); err != nil {
log.Printf("[ERROR] [server] [client_error] query_failed error=\"%v\"", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
// Get total count
countQuery := `SELECT COUNT(*) FROM client_errors WHERE 1=1`
if subsystem != "" {
countQuery += " AND subsystem = :subsystem"
}
if errorType != "" {
countQuery += " AND error_type = :error_type"
}
if agentIDStr != "" {
countQuery += " AND agent_id = :agent_id"
}
var total int64
if err := h.db.Get(&total, countQuery, params); err != nil {
log.Printf("[ERROR] [server] [client_error] count_failed error=\"%v\"", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "count failed"})
return
}
totalPages := int((total + int64(pageSize) - 1) / int64(pageSize))
response := GetErrorsResponse{
Errors: errors,
Total: total,
Page: page,
PageSize: pageSize,
TotalPages: totalPages,
}
c.JSON(http.StatusOK, response)
}
// LogErrorRequest represents a client error log entry
type LogErrorRequest struct {
Subsystem string `json:"subsystem" binding:"required"`
ErrorType string `json:"error_type" binding:"required,oneof=javascript_error api_error ui_error validation_error"`
Message string `json:"message" binding:"required,max=10000"`
StackTrace string `json:"stack_trace,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
URL string `json:"url" binding:"required"`
}
// LogError processes and stores frontend errors
func (h *ClientErrorHandler) LogError(c *gin.Context) {
var req LogErrorRequest
if err := c.ShouldBindJSON(&req); err != nil {
log.Printf("[ERROR] [server] [client_error] validation_failed error=\"%v\"", err)
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request data"})
return
}
// Extract agent ID from auth middleware if available
var agentID interface{}
if agentIDValue, exists := c.Get("agentID"); exists {
if id, ok := agentIDValue.(uuid.UUID); ok {
agentID = id
}
}
// Log to console with HISTORY prefix
log.Printf("[ERROR] [server] [client] [%s] agent_id=%v subsystem=%s message=\"%s\"",
req.ErrorType, agentID, req.Subsystem, truncate(req.Message, 200))
log.Printf("[HISTORY] [server] [client_error] agent_id=%v subsystem=%s type=%s url=\"%s\" message=\"%s\" timestamp=%s",
agentID, req.Subsystem, req.ErrorType, req.URL, req.Message, time.Now().Format(time.RFC3339))
// Store in database with retry logic
if err := h.storeError(agentID, req, c); err != nil {
log.Printf("[ERROR] [server] [client_error] store_failed error=\"%v\"", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store error"})
return
}
c.JSON(http.StatusOK, gin.H{"logged": true})
}
// storeError persists error to database with retry
func (h *ClientErrorHandler) storeError(agentID interface{}, req LogErrorRequest, c *gin.Context) error {
const maxRetries = 3
var lastErr error
for attempt := 1; attempt <= maxRetries; attempt++ {
query := `INSERT INTO client_errors (agent_id, subsystem, error_type, message, stack_trace, metadata, url, user_agent)
VALUES (:agent_id, :subsystem, :error_type, :message, :stack_trace, :metadata, :url, :user_agent)`
// Convert metadata map to JSON for PostgreSQL JSONB column
var metadataJSON json.RawMessage
if req.Metadata != nil && len(req.Metadata) > 0 {
jsonBytes, err := json.Marshal(req.Metadata)
if err != nil {
log.Printf("[ERROR] [server] [client_error] metadata_marshal_failed error=\"%v\"", err)
metadataJSON = nil
} else {
metadataJSON = json.RawMessage(jsonBytes)
}
}
_, err := h.db.NamedExec(query, map[string]interface{}{
"agent_id": agentID,
"subsystem": req.Subsystem,
"error_type": req.ErrorType,
"message": req.Message,
"stack_trace": req.StackTrace,
"metadata": metadataJSON,
"url": req.URL,
"user_agent": c.GetHeader("User-Agent"),
})
if err == nil {
return nil
}
lastErr = err
if attempt < maxRetries {
time.Sleep(time.Duration(attempt) * time.Second)
continue
}
}
return fmt.Errorf("failed after %d attempts: %w", maxRetries, lastErr)
}
func truncate(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen] + "..."
}