fix(identity): D-1 machine ID deduplication fixes

- Remove unhashed 'unknown-' fallback from registration (F-D1-1)
  Registration aborts if GetMachineID() fails (no bad data)
- Add POST /admin/agents/:id/rebind-machine-id endpoint (F-D1-2)
  Admin can update stored machine ID after hardware change
- Delete dead example_integration.go with wrong usage (F-D1-3)
- Remove redundant Windows machineid.ID() retry (F-D1-4)
- Replace fmt.Printf with log.Printf in client.go (F-D1-5)

Operator note: agents registered with 'unknown-' machine IDs
must be rebound before upgrading. See D1_Fix_Implementation.md.

All tests pass. No regressions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-29 09:53:43 -04:00
parent 2c98973f87
commit db67049e92
14 changed files with 210 additions and 325 deletions

View File

@@ -421,11 +421,12 @@ func registerAgent(cfg *config.Config, serverURL string) error {
}
}
// Get machine ID for binding
// Get machine ID for binding (F-D1-1 fix: no unhashed fallback)
// system.GetMachineID() has its own internal fallback chain that
// always produces a SHA256 hash. If even that fails, abort registration.
machineID, err := system.GetMachineID()
if err != nil {
log.Printf("Warning: Failed to get machine ID: %v", err)
machineID = "unknown-" + sysInfo.Hostname
log.Fatalf("[ERROR] [agent] [registration] machine_id_unavailable error=%q — cannot register without consistent machine ID", err)
}
// Get embedded public key fingerprint

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"path/filepath"
@@ -35,8 +36,8 @@ func NewClient(baseURL, token string) *Client {
// Get machine ID for security binding (v0.1.22+)
machineID, err := system.GetMachineID()
if err != nil {
// Log warning but don't fail - older servers may not require it
fmt.Printf("Warning: Failed to get machine ID: %v\n", err)
// Log warning but don't fail older servers may not require it
log.Printf("[WARNING] [agent] [client] machine_id_error error=%q", err)
machineID = "" // Will be handled by server validation
}

View File

@@ -19,8 +19,7 @@ import (
// ---------------------------------------------------------------------------
func TestClientMachineIDErrorUsesFmtPrintf(t *testing.T) {
// F-D1-5 LOW: client.go uses fmt.Printf for machine ID error.
// ETHOS #1 requires log.Printf with [TAG] [system] [component].
// POST-FIX (F-D1-5): fmt.Printf replaced with log.Printf.
content, err := os.ReadFile("client.go")
if err != nil {
t.Fatalf("failed to read client.go: %v", err)
@@ -28,7 +27,6 @@ func TestClientMachineIDErrorUsesFmtPrintf(t *testing.T) {
src := string(content)
// Find the NewClient function's machine ID error handling
newClientIdx := strings.Index(src, "func NewClient(")
if newClientIdx == -1 {
t.Fatal("[ERROR] [agent] [client] NewClient function not found")
@@ -40,11 +38,11 @@ func TestClientMachineIDErrorUsesFmtPrintf(t *testing.T) {
fnBody = fnBody[:nextFn+1]
}
if !strings.Contains(fnBody, "fmt.Printf") {
t.Error("[ERROR] [agent] [client] F-D1-5 already fixed: no fmt.Printf in NewClient")
if strings.Contains(fnBody, "fmt.Printf") {
t.Error("[ERROR] [agent] [client] F-D1-5 NOT FIXED: fmt.Printf still in NewClient")
}
t.Log("[INFO] [agent] [client] F-D1-5 confirmed: fmt.Printf used for machine ID error")
t.Log("[INFO] [agent] [client] F-D1-5 FIXED: structured logging in NewClient")
}
// ---------------------------------------------------------------------------

View File

@@ -1,138 +0,0 @@
package logging
// This file contains example code showing how to integrate the security logger
// into various parts of the agent application.
import (
"fmt"
"time"
"github.com/Fimeg/RedFlag/aggregator-agent/internal/config"
"github.com/denisbrodbeck/machineid"
)
// Example of how to initialize the security logger in main.go
func ExampleInitializeSecurityLogger(cfg *config.Config, dataDir string) (*SecurityLogger, error) {
// Create the security logger
securityLogger, err := NewSecurityLogger(cfg, dataDir)
if err != nil {
return nil, err
}
return securityLogger, nil
}
// Example of using the security logger in command executor
func ExampleCommandExecution(securityLogger *SecurityLogger, command string, signature string) {
// Simulate signature verification
signatureValid := false // In real code, this would be actual verification
if !signatureValid {
securityLogger.LogCommandVerificationFailure(
"cmd-123",
"signature verification failed: crypto/rsa: verification error",
)
} else {
// Only log success if configured
event := &SecurityEvent{
Timestamp: time.Now().UTC(),
Level: "INFO",
EventType: SecurityEventTypes.CmdSignatureVerificationSuccess,
Message: "Command signature verified successfully",
}
securityLogger.Log(event)
}
}
// Example of using the security logger in update handler
func ExampleUpdateHandler(securityLogger *SecurityLogger, updateID string, updateData []byte, signature string) {
// Simulate nonce validation
nonceValid := false
if !nonceValid {
securityLogger.LogNonceValidationFailure(
"deadbeef-1234-5678-9abc-1234567890ef",
"nonce expired or reused",
)
}
// Simulate signature verification
signatureValid := false
if !signatureValid {
securityLogger.LogUpdateSignatureVerificationFailure(
updateID,
"signature does not match update data",
)
}
}
// Example of machine ID monitoring
func ExampleMachineIDMonitoring(securityLogger *SecurityLogger) {
// Get current machine ID
currentID, err := machineid.ID()
if err != nil {
return
}
// In real code, you would store the previous ID somewhere
// This is just an example of how to log when it changes
previousID := "previous-machine-id-here"
if currentID != previousID {
securityLogger.LogMachineIDChangeDetected(
previousID,
currentID,
)
}
}
// Example of configuration monitoring
func ExampleConfigMonitoring(securityLogger *SecurityLogger, configPath string) {
// In real code, you would calculate and store a hash of the config
// and validate it periodically
configTampered := true // Simulate detection
if configTampered {
securityLogger.LogConfigTamperingWarning(
configPath,
"configuration hash mismatch",
)
}
}
// Example of unauthorized command attempt
func ExampleUnauthorizedCommand(securityLogger *SecurityLogger, command string) {
// Check if command is in allowed list
allowedCommands := map[string]bool{
"scan": true,
"update": true,
"cleanup": true,
}
if !allowedCommands[command] {
securityLogger.LogUnauthorizedCommandAttempt(
command,
"command not in allowed list",
)
}
}
// Example of sending security events to server
func ExampleSendSecurityEvents(securityLogger *SecurityLogger, client interface{}) {
// Get batch of security events
events := securityLogger.GetBatch()
if len(events) > 0 {
// In real code, you would send these to the server
// If successful:
fmt.Printf("Sending %d security events to server...\n", len(events))
// Simulate successful send
success := true
if success {
securityLogger.ClearBatch()
fmt.Printf("Security events sent successfully\n")
} else {
// Events remain in buffer for next attempt
fmt.Printf("Failed to send security events, will retry\n")
}
}
}

View File

@@ -9,7 +9,6 @@ package logging
import (
"os"
"strings"
"testing"
)
@@ -20,28 +19,12 @@ import (
// ---------------------------------------------------------------------------
func TestExampleIntegrationFileIsDeadCode(t *testing.T) {
// F-D1-3 LOW: example_integration.go is dead code.
// ExampleMachineIDMonitoring() calls machineid.ID() directly
// — bypassing hashMachineID(). Safe to delete.
// POST-FIX (F-D1-3): File deleted.
_, err := os.Stat("example_integration.go")
if err != nil {
t.Error("[ERROR] [agent] [logging] F-D1-3 already fixed: file does not exist")
return
if err == nil {
t.Error("[ERROR] [agent] [logging] F-D1-3 NOT FIXED: dead code file still exists")
}
content, err := os.ReadFile("example_integration.go")
if err != nil {
t.Fatalf("failed to read example_integration.go: %v", err)
}
src := string(content)
// Confirm it calls machineid.ID() directly (unhashed)
if !strings.Contains(src, "machineid.ID()") {
t.Error("[ERROR] [agent] [logging] expected machineid.ID() call in example file")
}
t.Log("[INFO] [agent] [logging] F-D1-3 confirmed: dead code with direct machineid.ID() call")
t.Log("[INFO] [agent] [logging] F-D1-3 FIXED: dead example code deleted")
}
// ---------------------------------------------------------------------------

View File

@@ -76,14 +76,13 @@ func getLinuxMachineID() (string, error) {
return generateGenericMachineID()
}
// getWindowsMachineID gets Windows machine ID
// getWindowsMachineID gets Windows machine ID.
// Note: The primary machineid.ID() in GetMachineID() already tried the
// registry key HKLM\SOFTWARE\Microsoft\Cryptography\MachineGuid.
// If it failed, retrying returns the same error — go straight to generic.
// On Windows reinstall, MachineGuid changes. Use the rebind endpoint
// (POST /admin/agents/:id/rebind-machine-id) to recover without re-registration.
func getWindowsMachineID() (string, error) {
// Try machineid library Windows registry keys first
if id, err := machineid.ID(); err == nil && id != "" {
return hashMachineID(id), nil
}
// Fallback to generating generic ID
return generateGenericMachineID()
}

View File

@@ -1,13 +1,9 @@
package system
// machine_id_fallback_test.go — Pre-fix tests for machine ID registration fallback.
// machine_id_fallback_test.go — Tests for machine ID registration fallback.
//
// F-D1-1 HIGH: Registration fallback in main.go:428 produces "unknown-<hostname>"
// which is unhashed and mismatches the canonical SHA256 hash format.
// If GetMachineID() recovers after a failed registration, the agent
// is permanently locked out (403 from MachineBindingMiddleware).
//
// Run: cd aggregator-agent && go test ./internal/system/... -v -run TestMachineID
// F-D1-1 FIXED: Registration no longer uses unhashed "unknown-" fallback.
// GetMachineID() is trusted as canonical; registration aborts if it fails.
import (
"encoding/hex"
@@ -17,54 +13,8 @@ import (
"testing"
)
// ---------------------------------------------------------------------------
// Test 1.1 — Documents unhashed registration fallback (F-D1-1)
//
// Category: PASS-NOW (documents the bug)
// ---------------------------------------------------------------------------
func TestRegistrationFallbackIsNotHashed(t *testing.T) {
// F-D1-1 HIGH: Registration fallback produces "unknown-<hostname>"
// — unhashed, variable length. If GetMachineID() recovers on next
// restart, client sends SHA256 hash that doesn't match → 403 lockout.
mainPath := filepath.Join("..", "..", "cmd", "agent", "main.go")
content, err := os.ReadFile(mainPath)
if err != nil {
t.Fatalf("failed to read main.go: %v", err)
}
src := string(content)
// Find the fallback line
if !strings.Contains(src, `"unknown-"`) {
t.Error("[ERROR] [agent] [system] F-D1-1 already fixed: no 'unknown-' fallback found")
}
// The fallback does NOT call hashMachineID or GetMachineID
// Find the error handling block near GetMachineID
machineIdx := strings.Index(src, `machineID, err := system.GetMachineID()`)
if machineIdx == -1 {
t.Fatal("[ERROR] [agent] [system] GetMachineID call not found in main.go")
}
errorBlock := src[machineIdx : machineIdx+200]
if strings.Contains(errorBlock, "hashMachineID") || strings.Contains(errorBlock, "Hash") {
t.Error("[ERROR] [agent] [system] F-D1-1 already fixed: fallback is hashed")
}
t.Log("[INFO] [agent] [system] F-D1-1 confirmed: registration fallback is unhashed 'unknown-' + hostname")
}
// ---------------------------------------------------------------------------
// Test 1.2 — Registration fallback must use canonical function (assert fix)
//
// Category: FAIL-NOW / PASS-AFTER-FIX
// ---------------------------------------------------------------------------
func TestRegistrationFallbackUsesCanonicalFunction(t *testing.T) {
// F-D1-1: After fix, ALL machine ID values must go through
// hashMachineID() or system.GetMachineID(). The "unknown-" prefix
// fallback must be removed entirely.
// POST-FIX: "unknown-" fallback is gone from main.go.
mainPath := filepath.Join("..", "..", "cmd", "agent", "main.go")
content, err := os.ReadFile(mainPath)
if err != nil {
@@ -74,20 +24,35 @@ func TestRegistrationFallbackUsesCanonicalFunction(t *testing.T) {
src := string(content)
if strings.Contains(src, `"unknown-"`) {
t.Errorf("[ERROR] [agent] [system] registration fallback still uses 'unknown-' prefix.\n" +
"F-D1-1: remove unhashed fallback. Use GetMachineID() internal fallback or abort.")
t.Error("[ERROR] [agent] [system] F-D1-1 NOT FIXED: 'unknown-' fallback still present")
}
t.Log("[INFO] [agent] [system] F-D1-1 FIXED: unhashed fallback removed from registration")
}
// ---------------------------------------------------------------------------
// Test 1.3 — Canonical GetMachineID always produces 64 hex chars
//
// Category: PASS-NOW (canonical function is correct)
// ---------------------------------------------------------------------------
func TestRegistrationFallbackUsesCanonicalFunction(t *testing.T) {
// POST-FIX: Registration uses system.GetMachineID() with no inline fallback.
mainPath := filepath.Join("..", "..", "cmd", "agent", "main.go")
content, err := os.ReadFile(mainPath)
if err != nil {
t.Fatalf("failed to read main.go: %v", err)
}
src := string(content)
if strings.Contains(src, `"unknown-"`) {
t.Errorf("[ERROR] [agent] [system] registration still uses 'unknown-' fallback.\n" +
"F-D1-1: must use canonical GetMachineID() or abort.")
}
if !strings.Contains(src, "system.GetMachineID()") {
t.Error("[ERROR] [agent] [system] registration doesn't call system.GetMachineID()")
}
t.Log("[INFO] [agent] [system] F-D1-1 FIXED: registration uses canonical function only")
}
func TestMachineIDIsAlways64HexChars(t *testing.T) {
// F-D1-1: The canonical GetMachineID() always produces 64 hex chars.
// The bug is in main.go's fallback which bypasses it.
id, err := GetMachineID()
if err != nil {
t.Fatalf("GetMachineID failed: %v", err)
@@ -101,24 +66,17 @@ func TestMachineIDIsAlways64HexChars(t *testing.T) {
t.Errorf("[ERROR] [agent] [system] machine ID is not valid hex: %v", err)
}
// Idempotency check (ETHOS #4)
// Idempotency check
id2, _ := GetMachineID()
if id != id2 {
t.Error("[ERROR] [agent] [system] GetMachineID returned different values on consecutive calls")
t.Error("[ERROR] [agent] [system] GetMachineID not idempotent")
}
t.Logf("[INFO] [agent] [system] canonical machine ID: %s (64 hex chars, idempotent)", id[:16]+"...")
t.Logf("[INFO] [agent] [system] canonical machine ID: %s... (64 hex chars)", id[:16])
}
// ---------------------------------------------------------------------------
// Test 1.4 — Registration and runtime must use same code path (assert fix)
//
// Category: FAIL-NOW / PASS-AFTER-FIX
// ---------------------------------------------------------------------------
func TestRegistrationAndRuntimeUseSameCodePath(t *testing.T) {
// F-D1-1: After fix, both registration and runtime paths must use
// identical code. No inline fallback that differs from canonical.
// POST-FIX: Both paths call system.GetMachineID(), no divergent fallback.
mainPath := filepath.Join("..", "..", "cmd", "agent", "main.go")
mainContent, err := os.ReadFile(mainPath)
if err != nil {
@@ -134,7 +92,6 @@ func TestRegistrationAndRuntimeUseSameCodePath(t *testing.T) {
mainSrc := string(mainContent)
clientSrc := string(clientContent)
// Both should call GetMachineID
if !strings.Contains(mainSrc, "system.GetMachineID()") {
t.Error("[ERROR] [agent] [system] main.go doesn't call system.GetMachineID()")
}
@@ -142,9 +99,9 @@ func TestRegistrationAndRuntimeUseSameCodePath(t *testing.T) {
t.Error("[ERROR] [agent] [system] client.go doesn't call system.GetMachineID()")
}
// main.go must NOT have an inline fallback that differs
if strings.Contains(mainSrc, `"unknown-"`) {
t.Errorf("[ERROR] [agent] [system] main.go has divergent 'unknown-' fallback.\n" +
"F-D1-1: registration and runtime paths must use identical machine ID logic.")
t.Errorf("[ERROR] [agent] [system] main.go has divergent 'unknown-' fallback")
}
t.Log("[INFO] [agent] [system] F-D1-1 FIXED: both paths use canonical GetMachineID()")
}

View File

@@ -81,20 +81,16 @@ func TestHashMachineIDAlwaysProduces64HexChars(t *testing.T) {
// ---------------------------------------------------------------------------
func TestUnknownFallbackFormatDifferentFromHash(t *testing.T) {
// F-D1-1: Proves "unknown-hostname" and the hashed canonical
// value are fundamentally incompatible.
fallback := "unknown-testhost"
// POST-FIX (F-D1-1): The "unknown-" fallback no longer exists in
// registration code. This test confirms canonical paths always hash.
canonical := hashMachineID("testhost-linux-fallback")
generic, _ := generateGenericMachineID()
if fallback == canonical {
t.Error("[ERROR] [agent] [system] fallback matches canonical (unexpected)")
// Both canonical paths produce 64 hex chars
if len(canonical) != 64 || len(generic) != 64 {
t.Errorf("[ERROR] [agent] [system] canonical paths not 64 chars: canonical=%d generic=%d",
len(canonical), len(generic))
}
if len(fallback) == 64 {
t.Error("[ERROR] [agent] [system] fallback is 64 chars (same length as hash)")
}
t.Logf("[INFO] [agent] [system] F-D1-1 confirmed: fallback=%q (%d chars) vs canonical=%q (%d chars)",
fallback, len(fallback), canonical[:16]+"...", len(canonical))
t.Log("[INFO] [agent] [system] storing one at registration and sending the other at runtime guarantees 403")
t.Log("[INFO] [agent] [system] F-D1-1 FIXED: all machine ID paths produce consistent 64 hex format")
}

View File

@@ -20,9 +20,7 @@ import (
// ---------------------------------------------------------------------------
func TestWindowsFallbackHasRedundantRetry(t *testing.T) {
// F-D1-4 LOW: getWindowsMachineID() calls machineid.ID() a second
// time after the primary already failed. The library reads from
// the same registry key — it will fail again.
// POST-FIX (F-D1-4): Redundant retry removed.
content, err := os.ReadFile("machine_id.go")
if err != nil {
t.Fatalf("failed to read machine_id.go: %v", err)
@@ -30,7 +28,6 @@ func TestWindowsFallbackHasRedundantRetry(t *testing.T) {
src := string(content)
// Find getWindowsMachineID function
winIdx := strings.Index(src, "func getWindowsMachineID()")
if winIdx == -1 {
t.Fatal("[ERROR] [agent] [system] getWindowsMachineID not found")
@@ -42,16 +39,12 @@ func TestWindowsFallbackHasRedundantRetry(t *testing.T) {
fnBody = fnBody[:nextFn+1]
}
// Count machineid.ID() calls in the function
count := strings.Count(fnBody, "machineid.ID()")
if count > 0 {
t.Logf("[INFO] [agent] [system] F-D1-4 confirmed: getWindowsMachineID calls machineid.ID() %d time(s)", count)
t.Log("[INFO] [agent] [system] this is redundant — primary path already tried machineid.ID()")
t.Errorf("[ERROR] [agent] [system] F-D1-4 NOT FIXED: machineid.ID() still in Windows fallback (%d calls)", count)
}
if count == 0 {
t.Error("[ERROR] [agent] [system] F-D1-4 already fixed: no machineid.ID() in Windows fallback")
}
t.Log("[INFO] [agent] [system] F-D1-4 FIXED: redundant retry removed from Windows fallback")
}
// ---------------------------------------------------------------------------

View File

@@ -600,6 +600,9 @@ func main() {
admin.GET("/registration-tokens/stats", rateLimiter.RateLimit("admin_operations", middleware.KeyByUserID), registrationTokenHandler.GetTokenStats)
admin.GET("/registration-tokens/validate", rateLimiter.RateLimit("admin_operations", middleware.KeyByUserID), registrationTokenHandler.ValidateRegistrationToken)
// Machine ID Rebind (F-D1-2: recovery from machine ID mismatch)
admin.POST("/agents/:id/rebind-machine-id", rateLimiter.RateLimit("admin_operations", middleware.KeyByUserID), agentHandler.RebindMachineID)
// Rate Limit Management
admin.GET("/rate-limits", rateLimiter.RateLimit("admin_operations", middleware.KeyByUserID), rateLimitHandler.GetRateLimitSettings)
admin.PUT("/rate-limits", rateLimiter.RateLimit("admin_operations", middleware.KeyByUserID), rateLimitHandler.UpdateRateLimitSettings)

View File

@@ -1128,6 +1128,68 @@ func (h *AgentHandler) RenewToken(c *gin.Context) {
c.JSON(http.StatusOK, response)
}
// RebindMachineID updates an agent's stored machine ID (F-D1-2 admin endpoint).
// Requires WebAuthMiddleware + RequireAdmin. Used when hardware changes or
// to recover from the old "unknown-" fallback machine ID bug (F-D1-1).
func (h *AgentHandler) RebindMachineID(c *gin.Context) {
idStr := c.Param("id")
agentID, err := uuid.Parse(idStr)
if err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid agent ID"})
return
}
var req struct {
NewMachineID string `json:"new_machine_id" binding:"required"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
return
}
// Validate: must be exactly 64 hex characters (SHA256 hash)
if len(req.NewMachineID) != 64 {
c.JSON(http.StatusBadRequest, gin.H{"error": "machine_id must be exactly 64 hex characters"})
return
}
for _, ch := range req.NewMachineID {
if !((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f')) {
c.JSON(http.StatusBadRequest, gin.H{"error": "machine_id must contain only lowercase hex characters [0-9a-f]"})
return
}
}
// Get current agent for logging
agent, err := h.agentQueries.GetAgentByID(agentID)
if err != nil {
c.JSON(http.StatusNotFound, gin.H{"error": "agent not found"})
return
}
oldMachineID := ""
if agent.MachineID != nil {
oldMachineID = *agent.MachineID
}
// Update machine ID
if err := h.agentQueries.UpdateMachineID(agentID, req.NewMachineID); err != nil {
log.Printf("[ERROR] [server] [admin] rebind_machine_id_failed agent_id=%s error=%q", agentID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to update machine ID"})
return
}
adminUserID := c.GetString("user_id")
log.Printf("[INFO] [server] [admin] agent_machine_id_updated agent_id=%s old_id=%s new_id=%s admin_user=%s",
agentID, oldMachineID, req.NewMachineID, adminUserID)
c.JSON(http.StatusOK, gin.H{
"message": "machine ID updated",
"agent_id": agentID,
"old_machine_id": oldMachineID,
"new_machine_id": req.NewMachineID,
})
}
// UnregisterAgent removes an agent from the system
func (h *AgentHandler) UnregisterAgent(c *gin.Context) {
idStr := c.Param("id")

View File

@@ -1,11 +1,8 @@
package middleware_test
// machine_id_recovery_test.go — Pre-fix tests for machine ID recovery path.
// machine_id_recovery_test.go — Tests for machine ID recovery path.
//
// F-D1-2 MEDIUM: No recovery path from machine ID mismatch.
// Must delete and re-register agent.
//
// Run: cd aggregator-server && go test ./internal/api/middleware/... -v -run TestMachineBinding
// F-D1-2 FIXED: POST /admin/agents/:id/rebind-machine-id endpoint added.
import (
"os"
@@ -14,43 +11,8 @@ import (
"testing"
)
// ---------------------------------------------------------------------------
// Test 3.1 — Documents missing update path (F-D1-2)
//
// Category: PASS-NOW (documents the bug)
// ---------------------------------------------------------------------------
func TestMachineBindingHasNoUpdatePath(t *testing.T) {
// F-D1-2 MEDIUM: MachineBindingMiddleware does simple string
// equality. No API to update stored machine ID without re-registration.
mwPath := filepath.Join(".", "machine_binding.go")
content, err := os.ReadFile(mwPath)
if err != nil {
t.Fatalf("failed to read machine_binding.go: %v", err)
}
src := string(content)
hasUpdatePath := strings.Contains(src, "update_machine_id") ||
strings.Contains(src, "rebind") ||
strings.Contains(src, "UpdateMachineID")
if hasUpdatePath {
t.Error("[ERROR] [server] [middleware] F-D1-2 already fixed: update path found")
}
t.Log("[INFO] [server] [middleware] F-D1-2 confirmed: no machine ID update path exists")
}
// ---------------------------------------------------------------------------
// Test 3.2 — Must have update/recovery path (assert fix)
//
// Category: FAIL-NOW / PASS-AFTER-FIX
// ---------------------------------------------------------------------------
func TestMachineBindingShouldHaveUpdatePath(t *testing.T) {
// F-D1-2: After fix, provide recovery from machine ID mismatch.
// Check main.go routes or machine_binding.go for update mechanism.
// POST-FIX: Rebind endpoint exists in main.go admin routes.
mainPath := filepath.Join("..", "..", "..", "cmd", "server", "main.go")
content, err := os.ReadFile(mainPath)
if err != nil {
@@ -59,12 +21,26 @@ func TestMachineBindingShouldHaveUpdatePath(t *testing.T) {
src := strings.ToLower(string(content))
hasRecovery := strings.Contains(src, "rebind") ||
strings.Contains(src, "update-machine") ||
strings.Contains(src, "machine_id") && strings.Contains(src, "update")
if !hasRecovery {
t.Errorf("[ERROR] [server] [middleware] no machine ID recovery mechanism found.\n" +
"F-D1-2: provide admin endpoint to update stored machine ID.")
if !strings.Contains(src, "rebind-machine-id") {
t.Error("[ERROR] [server] [middleware] F-D1-2 NOT FIXED: no rebind endpoint")
}
t.Log("[INFO] [server] [middleware] F-D1-2 FIXED: rebind-machine-id endpoint exists")
}
func TestMachineBindingShouldHaveUpdatePath(t *testing.T) {
mainPath := filepath.Join("..", "..", "..", "cmd", "server", "main.go")
content, err := os.ReadFile(mainPath)
if err != nil {
t.Fatalf("failed to read main.go: %v", err)
}
src := strings.ToLower(string(content))
if !strings.Contains(src, "rebind-machine-id") {
t.Errorf("[ERROR] [server] [middleware] no rebind endpoint found.\n" +
"F-D1-2: admin endpoint for machine ID rebind required.")
}
t.Log("[INFO] [server] [middleware] F-D1-2 FIXED: admin rebind endpoint registered")
}

View File

@@ -116,6 +116,13 @@ func (q *AgentQueries) ListAgents(status, osType string) ([]models.Agent, error)
return agents, err
}
// UpdateMachineID updates the machine_id for an agent (F-D1-2 admin rebind)
func (q *AgentQueries) UpdateMachineID(agentID uuid.UUID, newMachineID string) error {
query := `UPDATE agents SET machine_id = $1 WHERE id = $2`
_, err := q.db.Exec(query, newMachineID, agentID)
return err
}
// MarkOfflineAgents marks agents as offline if they haven't checked in recently
func (q *AgentQueries) MarkOfflineAgents(threshold time.Duration) error {
query := `

View File

@@ -0,0 +1,47 @@
# D-1 Machine ID Fix Implementation
**Date:** 2026-03-29
**Branch:** culurien
---
## Files Changed
| File | Change |
|------|--------|
| `cmd/agent/main.go` | Removed unhashed "unknown-" fallback; registration aborts if GetMachineID() fails (F-D1-1) |
| `internal/client/client.go` | Replaced fmt.Printf with log.Printf for machine ID errors (F-D1-5) |
| `internal/system/machine_id.go` | Removed redundant machineid.ID() retry in Windows fallback, added Windows reinstall documentation (F-D1-4) |
| `internal/logging/example_integration.go` | DELETED — dead code with incorrect machineid.ID() usage (F-D1-3) |
| `server/internal/api/handlers/agents.go` | Added RebindMachineID admin endpoint (F-D1-2) |
| `server/internal/database/queries/agents.go` | Added UpdateMachineID query function (F-D1-2) |
| `server/cmd/server/main.go` | Registered rebind-machine-id admin route (F-D1-2) |
## Strategy (Task 1): Option C
Used Option C — trust canonical `system.GetMachineID()` entirely. If it fails (which requires ALL fallbacks to fail including hostname-os-arch), abort registration with `log.Fatalf`. This is the safest approach: the internal fallback chain in GetMachineID() always produces a SHA256 hash, so format consistency is guaranteed.
## Operator Migration Guide
If any agents were registered with the old "unknown-hostname" fallback (identifiable by `machine_id` not being 64 hex chars in the DB), they will be locked out after this upgrade because the new runtime client sends a proper SHA256 hash. To recover:
```sql
SELECT id, hostname, machine_id FROM agents
WHERE LENGTH(machine_id) != 64 OR machine_id LIKE 'unknown-%';
```
For each agent found, use the rebind endpoint:
```
POST /api/v1/admin/agents/{id}/rebind-machine-id
{"new_machine_id": "<64-char hex string from agent>"}
```
Or re-register the agent with a new registration token.
## Rebind Endpoint Specification
- **Route:** `POST /api/v1/admin/agents/:id/rebind-machine-id`
- **Auth:** WebAuthMiddleware + RequireAdmin (admin group)
- **Input:** `{"new_machine_id": "64-char-lowercase-hex-string"}`
- **Validation:** exactly 64 chars, lowercase hex only [0-9a-f]
- **Audit log:** old and new machine ID logged with admin user ID