fix: agent acknowledgment recursion and subsystem UI improvements
- Fix recursive call in reportLogWithAck that caused infinite loop - Add machine binding and security API endpoints - Enhance AgentScanners component with security status display - Update scheduler and timeout service reliability - Remove deprecated install.sh script - Add subsystem configuration and logging improvements
This commit is contained in:
@@ -12,11 +12,12 @@ import (
|
||||
|
||||
// TimeoutService handles timeout management for long-running operations
|
||||
type TimeoutService struct {
|
||||
commandQueries *queries.CommandQueries
|
||||
updateQueries *queries.UpdateQueries
|
||||
ticker *time.Ticker
|
||||
stopChan chan bool
|
||||
timeoutDuration time.Duration
|
||||
commandQueries *queries.CommandQueries
|
||||
updateQueries *queries.UpdateQueries
|
||||
ticker *time.Ticker
|
||||
stopChan chan bool
|
||||
sentTimeout time.Duration // For commands already sent to agents
|
||||
pendingTimeout time.Duration // For commands stuck in queue
|
||||
}
|
||||
|
||||
// NewTimeoutService creates a new timeout service
|
||||
@@ -24,14 +25,16 @@ func NewTimeoutService(cq *queries.CommandQueries, uq *queries.UpdateQueries) *T
|
||||
return &TimeoutService{
|
||||
commandQueries: cq,
|
||||
updateQueries: uq,
|
||||
timeoutDuration: 2 * time.Hour, // 2 hours timeout - allows for system upgrades and large operations
|
||||
sentTimeout: 2 * time.Hour, // 2 hours for commands already sent to agents
|
||||
pendingTimeout: 30 * time.Minute, // 30 minutes for commands stuck in queue
|
||||
// TODO: Make these timeout durations user-adjustable in settings
|
||||
stopChan: make(chan bool),
|
||||
}
|
||||
}
|
||||
|
||||
// Start begins the timeout monitoring service
|
||||
func (ts *TimeoutService) Start() {
|
||||
log.Printf("Starting timeout service with %v timeout duration", ts.timeoutDuration)
|
||||
log.Printf("Starting timeout service with %v sent timeout, %v pending timeout", ts.sentTimeout, ts.pendingTimeout)
|
||||
|
||||
// Create a ticker that runs every 5 minutes
|
||||
ts.ticker = time.NewTicker(5 * time.Minute)
|
||||
@@ -59,25 +62,41 @@ func (ts *TimeoutService) Stop() {
|
||||
func (ts *TimeoutService) checkForTimeouts() {
|
||||
log.Println("Checking for timed out operations...")
|
||||
|
||||
// Get all commands that are in 'sent' status
|
||||
commands, err := ts.commandQueries.GetCommandsByStatus(models.CommandStatusSent)
|
||||
if err != nil {
|
||||
log.Printf("Error getting sent commands: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
timeoutThreshold := time.Now().Add(-ts.timeoutDuration)
|
||||
sentTimeoutThreshold := time.Now().Add(-ts.sentTimeout)
|
||||
pendingTimeoutThreshold := time.Now().Add(-ts.pendingTimeout)
|
||||
timedOutCommands := make([]models.AgentCommand, 0)
|
||||
|
||||
for _, command := range commands {
|
||||
// Check if command has been sent and is older than timeout threshold
|
||||
if command.SentAt != nil && command.SentAt.Before(timeoutThreshold) {
|
||||
timedOutCommands = append(timedOutCommands, command)
|
||||
// Check 'sent' commands (traditional timeout - 2 hours)
|
||||
sentCommands, err := ts.commandQueries.GetCommandsByStatus(models.CommandStatusSent)
|
||||
if err != nil {
|
||||
log.Printf("Error getting sent commands: %v", err)
|
||||
} else {
|
||||
for _, command := range sentCommands {
|
||||
// Check if command has been sent and is older than sent timeout threshold
|
||||
if command.SentAt != nil && command.SentAt.Before(sentTimeoutThreshold) {
|
||||
timedOutCommands = append(timedOutCommands, command)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check 'pending' commands (stuck in queue timeout - 30 minutes)
|
||||
pendingCommands, err := ts.commandQueries.GetCommandsByStatus(models.CommandStatusPending)
|
||||
if err != nil {
|
||||
log.Printf("Error getting pending commands: %v", err)
|
||||
} else {
|
||||
for _, command := range pendingCommands {
|
||||
// Check if command has been pending longer than pending timeout threshold
|
||||
if command.CreatedAt.Before(pendingTimeoutThreshold) {
|
||||
timedOutCommands = append(timedOutCommands, command)
|
||||
log.Printf("Found stuck pending command %s (type: %s, created: %s, age: %v)",
|
||||
command.ID, command.CommandType, command.CreatedAt.Format(time.RFC3339), time.Since(command.CreatedAt))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(timedOutCommands) > 0 {
|
||||
log.Printf("Found %d timed out commands", len(timedOutCommands))
|
||||
log.Printf("Found %d timed out commands (%d sent >2h, %d stuck pending >30m)",
|
||||
len(timedOutCommands), len(sentCommands), len(pendingCommands))
|
||||
|
||||
for _, command := range timedOutCommands {
|
||||
if err := ts.timeoutCommand(&command); err != nil {
|
||||
@@ -91,6 +110,14 @@ func (ts *TimeoutService) checkForTimeouts() {
|
||||
|
||||
// timeoutCommand marks a specific command as timed out and updates related entities
|
||||
func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
|
||||
// Determine which timeout duration was applied
|
||||
var appliedTimeout time.Duration
|
||||
if command.Status == models.CommandStatusSent {
|
||||
appliedTimeout = ts.sentTimeout
|
||||
} else {
|
||||
appliedTimeout = ts.pendingTimeout
|
||||
}
|
||||
|
||||
log.Printf("Timing out command %s (type: %s, agent: %s)",
|
||||
command.ID, command.CommandType, command.AgentID)
|
||||
|
||||
@@ -103,7 +130,7 @@ func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
|
||||
result := models.JSONB{
|
||||
"error": "operation timed out",
|
||||
"timeout_at": time.Now(),
|
||||
"duration": ts.timeoutDuration.String(),
|
||||
"duration": appliedTimeout.String(),
|
||||
"command_id": command.ID.String(),
|
||||
}
|
||||
|
||||
@@ -112,7 +139,7 @@ func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
|
||||
}
|
||||
|
||||
// Update related update package status if applicable
|
||||
if err := ts.updateRelatedPackageStatus(command); err != nil {
|
||||
if err := ts.updateRelatedPackageStatus(command, appliedTimeout); err != nil {
|
||||
log.Printf("Warning: failed to update related package status: %v", err)
|
||||
// Don't return error here as the main timeout operation succeeded
|
||||
}
|
||||
@@ -123,11 +150,11 @@ func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
|
||||
AgentID: command.AgentID,
|
||||
UpdatePackageID: ts.extractUpdatePackageID(command),
|
||||
Action: command.CommandType,
|
||||
Result: "timed_out",
|
||||
Result: "failed", // Use 'failed' to comply with database constraint
|
||||
Stdout: "",
|
||||
Stderr: fmt.Sprintf("Command %s timed out after %v", command.CommandType, ts.timeoutDuration),
|
||||
Stderr: fmt.Sprintf("Command %s timed out after %v (timeout_id: %s)", command.CommandType, appliedTimeout, command.ID),
|
||||
ExitCode: 124, // Standard timeout exit code
|
||||
DurationSeconds: int(ts.timeoutDuration.Seconds()),
|
||||
DurationSeconds: int(appliedTimeout.Seconds()),
|
||||
ExecutedAt: time.Now(),
|
||||
}
|
||||
|
||||
@@ -141,7 +168,7 @@ func (ts *TimeoutService) timeoutCommand(command *models.AgentCommand) error {
|
||||
}
|
||||
|
||||
// updateRelatedPackageStatus updates the status of related update packages when a command times out
|
||||
func (ts *TimeoutService) updateRelatedPackageStatus(command *models.AgentCommand) error {
|
||||
func (ts *TimeoutService) updateRelatedPackageStatus(command *models.AgentCommand, appliedTimeout time.Duration) error {
|
||||
// Extract update_id from command params if it exists
|
||||
_, ok := command.Params["update_id"].(string)
|
||||
if !ok {
|
||||
@@ -153,7 +180,7 @@ func (ts *TimeoutService) updateRelatedPackageStatus(command *models.AgentComman
|
||||
metadata := models.JSONB{
|
||||
"timeout": true,
|
||||
"timeout_at": time.Now(),
|
||||
"timeout_duration": ts.timeoutDuration.String(),
|
||||
"timeout_duration": appliedTimeout.String(),
|
||||
"command_id": command.ID.String(),
|
||||
"failure_reason": "operation timed out",
|
||||
}
|
||||
@@ -196,7 +223,7 @@ func (ts *TimeoutService) GetTimeoutStatus() (map[string]interface{}, error) {
|
||||
}
|
||||
|
||||
// Count commands approaching timeout (within 5 minutes of timeout)
|
||||
timeoutThreshold := time.Now().Add(-ts.timeoutDuration + 5*time.Minute)
|
||||
timeoutThreshold := time.Now().Add(-ts.sentTimeout + 5*time.Minute)
|
||||
approachingTimeout := 0
|
||||
for _, command := range activeCommands {
|
||||
if command.SentAt != nil && command.SentAt.Before(timeoutThreshold) {
|
||||
@@ -205,16 +232,30 @@ func (ts *TimeoutService) GetTimeoutStatus() (map[string]interface{}, error) {
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"total_timed_out": len(timedOutCommands),
|
||||
"total_active": len(activeCommands),
|
||||
"approaching_timeout": approachingTimeout,
|
||||
"timeout_duration": ts.timeoutDuration.String(),
|
||||
"last_check": time.Now(),
|
||||
"total_timed_out": len(timedOutCommands),
|
||||
"total_active": len(activeCommands),
|
||||
"approaching_timeout": approachingTimeout,
|
||||
"sent_timeout_duration": ts.sentTimeout.String(),
|
||||
"pending_timeout_duration": ts.pendingTimeout.String(),
|
||||
"last_check": time.Now(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// SetTimeoutDuration allows changing the timeout duration
|
||||
// SetTimeoutDuration allows changing the timeout duration for sent commands
|
||||
// TODO: This should be deprecated in favor of SetSentTimeout and SetPendingTimeout
|
||||
func (ts *TimeoutService) SetTimeoutDuration(duration time.Duration) {
|
||||
ts.timeoutDuration = duration
|
||||
log.Printf("Timeout duration updated to %v", duration)
|
||||
ts.sentTimeout = duration
|
||||
log.Printf("Sent timeout duration updated to %v", duration)
|
||||
}
|
||||
|
||||
// SetSentTimeout allows changing the timeout duration for sent commands
|
||||
func (ts *TimeoutService) SetSentTimeout(duration time.Duration) {
|
||||
ts.sentTimeout = duration
|
||||
log.Printf("Sent timeout duration updated to %v", duration)
|
||||
}
|
||||
|
||||
// SetPendingTimeout allows changing the timeout duration for pending commands
|
||||
func (ts *TimeoutService) SetPendingTimeout(duration time.Duration) {
|
||||
ts.pendingTimeout = duration
|
||||
log.Printf("Pending timeout duration updated to %v", duration)
|
||||
}
|
||||
Reference in New Issue
Block a user