Fix zero UUID and storage metrics issues\n\n- Add ID generation in signAndCreateCommand to prevent zero UUIDs\n- Fix storage metrics metadata field (remove pq.Array)\n- Add IdempotencyKey field to AgentCommand model\n\nThese issues were causing duplicate key violations and storage page errors.

2025-12-20 14:26:55 -05:00
parent e7a8cc90dd
commit 642eec5d44
8 changed files with 15 additions and 289 deletions
--- a/aggregator-server/internal/api/handlers/subsystems.go
+++ b/aggregator-server/internal/api/handlers/subsystems.go
@@ -32,6 +32,19 @@ func NewSubsystemHandler(sq *queries.SubsystemQueries, cq *queries.CommandQuerie
 // signAndCreateCommand signs a command if signing service is enabled, then stores it in the database
 func (h *SubsystemHandler) signAndCreateCommand(cmd *models.AgentCommand) error {
 	// Generate ID if not set (prevents zero UUID issues)
 	if cmd.ID == uuid.Nil {
 		cmd.ID = uuid.New()
 	}
 	// Set timestamps if not set
 	if cmd.CreatedAt.IsZero() {
 		cmd.CreatedAt = time.Now()
 	}
 	if cmd.UpdatedAt.IsZero() {
 		cmd.UpdatedAt = time.Now()
 	}
 	// Sign the command before storing
 	if h.signingService != nil && h.signingService.IsEnabled() {
 		signature, err := h.signingService.SignCommand(cmd)
--- a/aggregator-server/internal/database/queries/storage_metrics.go
+++ b/aggregator-server/internal/database/queries/storage_metrics.go
@@ -34,7 +34,7 @@ func (q *StorageMetricsQueries) InsertStorageMetric(ctx context.Context, metric
 		metric.ID, metric.AgentID, metric.Mountpoint, metric.Device,
 		metric.DiskType, metric.Filesystem, metric.TotalBytes,
 		metric.UsedBytes, metric.AvailableBytes, metric.UsedPercent,
-		metric.Severity, pq.Array(metric.Metadata), metric.CreatedAt,
+		metric.Severity, metric.Metadata, metric.CreatedAt,
 	)
 	if err != nil {
--- a/aggregator-server/internal/models/command.go
+++ b/aggregator-server/internal/models/command.go
@@ -16,6 +16,7 @@ type AgentCommand struct {
 	Status         string     `json:"status" db:"status"`
 	Source         string     `json:"source" db:"source"`
 	Signature      string     `json:"signature,omitempty" db:"signature"`
 	IdempotencyKey *string    `json:"idempotency_key,omitempty" db:"idempotency_key"`
 	CreatedAt      time.Time  `json:"created_at" db:"created_at"`
 	UpdatedAt      time.Time  `json:"updated_at" db:"updated_at"`
 	SentAt         *time.Time `json:"sent_at,omitempty" db:"sent_at"`
--- a/restart_and_fix.sh
+++ b/restart_and_fix.sh
@@ -1,35 +0,0 @@
 #!/bin/bash
 echo "=== RedFlag Database Recovery Script ==="
 echo
 echo "1. Stopping containers..."
 docker-compose down
 echo
 echo "2. Removing postgres volume to reset database (WARNING: This will delete all data)..."
 docker volume rm redflag_postgres-data 2>/dev/null
 echo
 echo "3. Starting containers with fixed configuration..."
 docker-compose up -d
 echo
 echo "4. Waiting for database to be ready..."
 sleep 10
 docker exec redflag-postgres pg_isready -U redflag
 echo
 echo "5. Checking server logs (tail):"
 echo "=========================="
 docker logs redflag-server --tail 50
 echo
 echo "=========================="
 echo "If migrations ran successfully, you should see:"
 echo "- Database migrations completed message"
 echo "- Default security settings initialized"
 echo "- Admin user ensured"
 echo
 echo "The server should now be accessible at http://localhost:8080"
 echo "Login with admin / Qu@ntum21!"
--- a/scanning_ux_summary.txt
+++ b/scanning_ux_summary.txt
@@ -1,50 +0,0 @@
 ## Summary: Why History Shows "SCAN" Generically
 **The Confusion You See**:
 - Each subsystem has its own "Scan" button (✅ correct)
 - But history only shows generic "SCAN" (❌ confusing)
 **The Implementation Flow**:
 ```
 You click: "Scan Storage" button
  → UI passes: subsystem="storage"  ✅
  → Backend creates: command_type="scan_storage"  ✅
  → Agent runs: handleScanStorage()  ✅
  → Results stored: updates=[4 items]  ✅
  → History logged: action="scan"  ❌ (should be "storage scan" or similar)
 ```
 **Root Cause**: 
 The history table's `action` field stores only generic "scan" instead of including the subsystem context. Even though:
 - Backend knows it's "scan_storage"
 - UI sends subsystem parameter
 - Results are subsystem-specific
 **The Result**:
 ```
 History shows unhelpful entries like:
  [14:20] SCAN → Success → 4 updates found
  [14:19] SCAN → Success → 461 updates found
 Which subsystem found which updates? Unknown from history.
 ```
 **This is a UX Issue, NOT a Bug**:
 - ✅ Scans run for correct subsystems
 - ✅ Results are accurate
 - ✅ Backend distinguishes types ("scan_storage", "scan_system", "scan_docker")
 - ❌ History display is generic "SCAN" instead of "Storage Scan", "System Scan", "Docker Scan"
 **Why It Happened**:
 - Early design had simple action types ("scan", "install", "upgrade")
 - Later added docker/storage/system scans
 - Database schema never evolved to include subsystem context
 - History display just shows action field directly
 **Files Involved**:
 - ✅ Working: AgentHealth.tsx (per-subsystem scan buttons)
 - ✅ Working: Backend API (creates "scan_storage", "scan_system", etc.)
 - ❌ Broken: History logging (stores only "scan", not subsystem)
 - ❌ Broken: History display (shows generic text, no subsystem parsing)
 **Full Analysis**: `/home/casey/Projects/RedFlag/UX_ISSUE_ANALYSIS_scan_history.md`
--- a/149
+++ b/149
@@ -1,149 +0,0 @@
 #!/bin/bash
 set -e
 # Parse command line arguments
 TARGET="$1"  # Optional target parameter
 # Validate target if provided
 if [[ -n "$TARGET" ]] && [[ ! "$TARGET" =~ ^(stable|latest|[0-9]+\.[0-9]+\.[0-9]+(-[^[:space:]]+)?)$ ]]; then
    echo "Usage: $0 [stable|latest|VERSION]" >&2
    exit 1
 fi
 GCS_BUCKET="https://storage.googleapis.com/claude-code-dist-86c565f3-f756-42ad-8dfa-d59b1c096819/claude-code-releases"
 DOWNLOAD_DIR="$HOME/.claude/downloads"
 # Check for required dependencies
 DOWNLOADER=""
 if command -v curl >/dev/null 2>&1; then
    DOWNLOADER="curl"
 elif command -v wget >/dev/null 2>&1; then
    DOWNLOADER="wget"
 else
    echo "Either curl or wget is required but neither is installed" >&2
    exit 1
 fi
 # Check if jq is available (optional)
 HAS_JQ=false
 if command -v jq >/dev/null 2>&1; then
    HAS_JQ=true
 fi
 # Download function that works with both curl and wget
 download_file() {
    local url="$1"
    local output="$2"
    if [ "$DOWNLOADER" = "curl" ]; then
        if [ -n "$output" ]; then
            curl -fsSL -o "$output" "$url"
        else
            curl -fsSL "$url"
        fi
    elif [ "$DOWNLOADER" = "wget" ]; then
        if [ -n "$output" ]; then
            wget -q -O "$output" "$url"
        else
            wget -q -O - "$url"
        fi
    else
        return 1
    fi
 }
 # Simple JSON parser for extracting checksum when jq is not available
 get_checksum_from_manifest() {
    local json="$1"
    local platform="$2"
    # Normalize JSON to single line and extract checksum
    json=$(echo "$json" | tr -d '\n\r\t' | sed 's/ \+/ /g')
    # Extract checksum for platform using bash regex
    if [[ $json =~ \"$platform\"[^}]*\"checksum\"[[:space:]]*:[[:space:]]*\"([a-f0-9]{64})\" ]]; then
        echo "${BASH_REMATCH[1]}"
        return 0
    fi
    return 1
 }
 # Detect platform
 case "$(uname -s)" in
    Darwin) os="darwin" ;;
    Linux) os="linux" ;;
    *) echo "Windows is not supported" >&2; exit 1 ;;
 esac
 case "$(uname -m)" in
    x86_64|amd64) arch="x64" ;;
    arm64|aarch64) arch="arm64" ;;
    *) echo "Unsupported architecture: $(uname -m)" >&2; exit 1 ;;
 esac
 # Check for musl on Linux and adjust platform accordingly
 if [ "$os" = "linux" ]; then
    if [ -f /lib/libc.musl-x86_64.so.1 ] || [ -f /lib/libc.musl-aarch64.so.1 ] || ldd /bin/ls 2>&1 | grep -q musl; then
        platform="linux-${arch}-musl"
    else
        platform="linux-${arch}"
    fi
 else
    platform="${os}-${arch}"
 fi
 mkdir -p "$DOWNLOAD_DIR"
 # Always download stable version (which has the most up-to-date installer)
 version=$(download_file "$GCS_BUCKET/stable")
 # Download manifest and extract checksum
 manifest_json=$(download_file "$GCS_BUCKET/$version/manifest.json")
 # Use jq if available, otherwise fall back to pure bash parsing
 if [ "$HAS_JQ" = true ]; then
    checksum=$(echo "$manifest_json" | jq -r ".platforms[\"$platform\"].checksum // empty")
 else
    checksum=$(get_checksum_from_manifest "$manifest_json" "$platform")
 fi
 # Validate checksum format (SHA256 = 64 hex characters)
 if [ -z "$checksum" ] || [[ ! "$checksum" =~ ^[a-f0-9]{64}$ ]]; then
    echo "Platform $platform not found in manifest" >&2
    exit 1
 fi
 # Download and verify
 binary_path="$DOWNLOAD_DIR/claude-$version-$platform"
 if ! download_file "$GCS_BUCKET/$version/$platform/claude" "$binary_path"; then
    echo "Download failed" >&2
    rm -f "$binary_path"
    exit 1
 fi
 # Pick the right checksum tool
 if [ "$os" = "darwin" ]; then
    actual=$(shasum -a 256 "$binary_path" | cut -d' ' -f1)
 else
    actual=$(sha256sum "$binary_path" | cut -d' ' -f1)
 fi
 if [ "$actual" != "$checksum" ]; then
    echo "Checksum verification failed" >&2
    rm -f "$binary_path"
    exit 1
 fi
 chmod +x "$binary_path"
 # Run claude install to set up launcher and shell integration
 echo "Setting up Claude Code..."
 "$binary_path" install ${TARGET:+"$TARGET"}
 # Clean up downloaded file
 rm -f "$binary_path"
 echo ""
 echo "✅ Installation complete!"
 echo ""
--- a/BIN
+++ b/BIN
--- a/test_install_commands.sh
+++ b/test_install_commands.sh
@@ -1,54 +0,0 @@
 #!/bin/bash
 # Test script to verify the installation command generation fixes
 echo "=== Testing RedFlag Agent Installation Command Generation ==="
 echo
 # Test 1: Verify the API endpoint exists and is accessible
 echo "1. Testing API endpoint availability..."
 if curl -sfL "http://localhost:8080/api/v1/install/linux" > /dev/null 2>&1; then
    echo "✓ API endpoint /api/v1/install/linux is accessible"
 else
    echo "⚠ API endpoint not accessible (server may not be running)"
 fi
 echo
 # Test 2: Show examples of corrected commands
 echo "2. Corrected Installation Commands:"
 echo "-----------------------------------"
 echo
 echo "For Registration Token API (when creating a new token):"
 echo 'curl -sfL "http://localhost:8080/api/v1/install/linux?token=YOUR_TOKEN_HERE" | sudo bash'
 echo
 echo "For Agent Settings UI (Linux):"
 echo 'curl -sfL "http://localhost:8080/api/v1/install/linux?token=YOUR_TOKEN_HERE" | sudo bash'
 echo
 echo "For Agent Settings UI (Windows PowerShell):"
 echo 'iwr "http://localhost:8080/api/v1/install/windows?token=YOUR_TOKEN_HERE" -OutFile install.bat; .\install.bat'
 echo
 # Test 3: Verify template variables
 echo "3. Template Variables Check:"
 echo "-----------------------------"
 echo "The following variables are now provided to the install template:"
 echo "- AgentUser: redflag-agent"
 echo "- AgentHome: /var/lib/redflag-agent"
 echo "- ConfigDir: /etc/redflag"
 echo "- LogDir: /var/log/redflag"
 echo
 echo "=== Summary of Fixes ==="
 echo "✓ Fixed registration token API command generation"
 echo "✓ Fixed agent settings UI command generation (both TokenManagement and AgentManagement)"
 echo "✓ Fixed template error by adding .AgentUser and related variables"
 echo "✓ Changed from command-line args to query parameters for token passing"
 echo "✓ Added proper protocol handling (http:// for localhost)"
 echo "✓ Added sudo to the bash command for proper permissions"
 echo
 echo "All installation commands now use the correct format:"
 echo 'curl -sfL "http://localhost:8080/api/v1/install/linux?token=<TOKEN>" | sudo bash'
 echo