com.seeq.link.agent.Watchdog Maven / Gradle / Ivy
package com.seeq.link.agent;
import java.lang.reflect.Method;
import java.time.Duration;
import java.util.HashMap;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.function.Function;
import com.google.common.annotations.VisibleForTesting;
import com.seeq.link.agent.config.WatchdogConfigV1;
import com.seeq.link.sdk.utilities.Stopwatch;
import com.seeq.link.sdk.utilities.TimeInterval;
import lombok.extern.slf4j.Slf4j;
/**
* Keeps a series of named timers and requires a call to Reset() in order to suppress "barking".
* Used to trigger a reset of the agent if the connection to the server goes down or the heartbeat
* thread gets stuck or dies.
*/
@Slf4j
public class Watchdog {
public enum WatchdogTimer {
SeeqServerHttpConnection,
SeeqServerWebSocketConnection,
SeeqServerResponse,
AgentHeartbeat,
HighMemoryUse,
ConnectorConfigChange
}
private Thread watchdogThread = null;
private final Consumer barkFunction;
private final Function stopwatchFactory;
private final HashMap watchdogTimers;
private final WatchdogConfigV1 config;
@VisibleForTesting
Watchdog() {
this(null, null, null);
// For testing only
}
public Watchdog(WatchdogConfigV1 config, Consumer barkFunction,
Function stopwatchFactory) {
this.config = config;
this.barkFunction = barkFunction;
this.stopwatchFactory = stopwatchFactory;
this.watchdogTimers = new HashMap<>();
}
public void initialize() {
if (this.config.getWatchdogEnabled() != null && !this.config.getWatchdogEnabled()) {
LOG.debug("Watchdog is disabled");
return;
}
synchronized (this.watchdogTimers) {
for (WatchdogTimer watchdogTimerName : WatchdogTimer.class.getEnumConstants()) {
this.watchdogTimers.put(watchdogTimerName, this.stopwatchFactory.apply(watchdogTimerName));
this.watchdogTimers.get(watchdogTimerName).start();
LOG.info("Watchdog timeout: {}: {}", watchdogTimerName, this.getTimeout(watchdogTimerName));
}
}
this.watchdogThread = new Thread(this::watchdog);
this.watchdogThread.start();
LOG.info("Watchdog initialized");
}
private String getTimeout(WatchdogTimer watchdogTimerName) {
String timeoutString = null;
try {
Method timeoutMethod =
this.config.getClass().getDeclaredMethod("get" + watchdogTimerName.toString());
timeoutString = (String) timeoutMethod.invoke(this.config);
} catch (Exception e) {
// Do nothing, let it fall through to default
LOG.error("Error encountered getting config field '{}'", watchdogTimerName, e);
}
if (timeoutString == null) {
timeoutString = this.getTimeoutDefault();
}
return timeoutString;
}
private String getTimeoutDefault() {
return "10m";
}
private void watchdog() {
Thread.currentThread().setName("Watchdog");
while (true) {
try {
Thread.sleep(this.loopPeriod.toMillis());
for (WatchdogTimer watchdogTimerName : WatchdogTimer.class.getEnumConstants()) {
String timeoutString = this.getTimeout(watchdogTimerName);
Duration timeout = TimeInterval.parseFriendlyDuration(timeoutString);
Duration elapsed;
synchronized (this.watchdogTimers) {
elapsed = Duration.ofMillis(
this.watchdogTimers.get(watchdogTimerName).elapsed(TimeUnit.MILLISECONDS));
}
if (elapsed.toMillis() >= timeout.toMillis()) {
LOG.warn("Watchdog for {} has timed out after {} seconds (timeout is {} second(s))",
watchdogTimerName, String.format("%.1f", elapsed.toMillis() / 1000.0),
(int) (timeout.toMillis() / 1000.0));
this.barkFunction.accept(watchdogTimerName);
}
}
} catch (InterruptedException e) {
LOG.debug("Watchdog thread interrupted");
break;
} catch (Exception e) {
LOG.error("Watchdog thread encountered exception, ignoring", e);
}
}
}
Duration loopPeriod = Duration.ofSeconds(1);
public void reset(WatchdogTimer watchdogTimer) {
if (this.config.getWatchdogEnabled() != null && !this.config.getWatchdogEnabled()) {
return;
}
synchronized (this.watchdogTimers) {
this.watchdogTimers.get(watchdogTimer).restart();
}
}
public void destroy() {
if (this.watchdogThread != null) {
while (true) {
this.watchdogThread.interrupt();
try {
this.watchdogThread.join(Duration.ofSeconds(5).toMillis());
} catch (InterruptedException e) {
LOG.error("Watchdog.destroy() interrupted before it could confirm shut down");
break;
}
if (!this.watchdogThread.isAlive()) {
break;
}
LOG.error("Could not interrupt watchdog thread, trying again...");
}
this.watchdogThread = null;
}
}
}