org.jobrunr.server.ServerZooKeeper Maven / Gradle / Ivy
package org.jobrunr.server;
import org.jobrunr.server.dashboard.CpuAllocationIrregularityNotification;
import org.jobrunr.server.dashboard.DashboardNotificationManager;
import org.jobrunr.storage.BackgroundJobServerStatus;
import org.jobrunr.storage.ServerTimedOutException;
import org.jobrunr.storage.StorageProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Duration;
import java.time.Instant;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import static org.jobrunr.server.DesktopUtils.hasSystemSleptRecently;
import static org.jobrunr.server.DesktopUtils.systemSupportsSleepDetection;
public class ServerZooKeeper implements Runnable {
private static final Logger LOGGER = LoggerFactory.getLogger(ServerZooKeeper.class);
private final BackgroundJobServer backgroundJobServer;
private final StorageProvider storageProvider;
private final DashboardNotificationManager dashboardNotificationManager;
private final Duration timeoutDuration;
private final AtomicInteger restartAttempts;
private UUID masterId;
private Instant lastSignalAlive;
private Instant lastServerTimeoutCheck;
public ServerZooKeeper(BackgroundJobServer backgroundJobServer) {
this.backgroundJobServer = backgroundJobServer;
this.storageProvider = backgroundJobServer.getStorageProvider();
this.dashboardNotificationManager = backgroundJobServer.getDashboardNotificationManager();
this.timeoutDuration = backgroundJobServer.getConfiguration().getPollInterval().multipliedBy(backgroundJobServer.getConfiguration().getServerTimeoutPollIntervalMultiplicand());
this.restartAttempts = new AtomicInteger();
this.lastSignalAlive = Instant.now();
this.lastServerTimeoutCheck = Instant.now();
LOGGER.trace(systemSupportsSleepDetection()
? "JobRunr can detect desktop sleeping."
: "JobRunr can not detect desktop sleeping.");
}
@Override
public void run() {
if (backgroundJobServer.isStopped()) return;
try {
if (backgroundJobServer.isUnAnnounced()) {
announceBackgroundJobServer();
} else {
signalBackgroundJobServerAliveAndDoZooKeeping();
}
} catch (Exception shouldNotHappen) {
LOGGER.error("An unrecoverable error occurred. Shutting server down...", shouldNotHappen);
if (masterId == null) backgroundJobServer.setIsMaster(null);
new Thread(this::stopServer).start();
}
}
public synchronized void stop() {
try {
storageProvider.signalBackgroundJobServerStopped(backgroundJobServer.getServerStatus());
} catch (Exception e) {
LOGGER.error("Error when signalling that BackgroundJobServer stopped", e);
} finally {
masterId = null;
}
}
private void announceBackgroundJobServer() {
final BackgroundJobServerStatus serverStatus = backgroundJobServer.getServerStatus();
storageProvider.announceBackgroundJobServer(serverStatus);
determineIfCurrentBackgroundJobServerIsMaster();
lastSignalAlive = serverStatus.getLastHeartbeat();
}
private void signalBackgroundJobServerAliveAndDoZooKeeping() {
try {
signalBackgroundJobServerAlive();
deleteServersThatTimedOut();
determineIfCurrentBackgroundJobServerIsMaster();
} catch (ServerTimedOutException e) {
if (restartAttempts.getAndIncrement() < 3) {
LOGGER.error("SEVERE ERROR - Server timed out while it's still alive. Are all servers using NTP and in the same timezone? Are you having long GC cycles? Restart attempt {} out of 3", restartAttempts);
new Thread(this::resetServer).start();
} else {
LOGGER.error("FATAL - Server restarted 3 times but still times out by other servers. Shutting down.");
new Thread(this::stopServer).start();
}
}
}
private void signalBackgroundJobServerAlive() {
final BackgroundJobServerStatus serverStatus = backgroundJobServer.getServerStatus();
storageProvider.signalBackgroundJobServerAlive(serverStatus);
cpuAllocationIrregularity(lastSignalAlive, serverStatus.getLastHeartbeat()).ifPresent(amountOfSeconds -> dashboardNotificationManager.notify(new CpuAllocationIrregularityNotification(amountOfSeconds)));
lastSignalAlive = serverStatus.getLastHeartbeat();
}
private void deleteServersThatTimedOut() {
if (Instant.now().isAfter(this.lastServerTimeoutCheck.plus(timeoutDuration))) {
final Instant now = Instant.now();
final Instant defaultTimeoutInstant = now.minus(timeoutDuration);
final Instant timedOutInstantUsingLastSignalAlive = lastSignalAlive.minusMillis(500);
final Instant timedOutInstant = min(defaultTimeoutInstant, timedOutInstantUsingLastSignalAlive);
final int amountOfServersThatTimedOut = storageProvider.removeTimedOutBackgroundJobServers(timedOutInstant);
if (amountOfServersThatTimedOut > 0) {
LOGGER.info("Removed {} server(s) that timed out", amountOfServersThatTimedOut);
}
this.lastServerTimeoutCheck = now;
}
}
private void determineIfCurrentBackgroundJobServerIsMaster() {
UUID longestRunningBackgroundJobServerId = storageProvider.getLongestRunningBackgroundJobServerId();
if (this.masterId == null || !masterId.equals(longestRunningBackgroundJobServerId)) {
this.masterId = longestRunningBackgroundJobServerId;
if (masterId.equals(backgroundJobServer.getId())) {
backgroundJobServer.setIsMaster(true);
LOGGER.info("Server {} is master (this BackgroundJobServer)", masterId);
} else {
backgroundJobServer.setIsMaster(false);
LOGGER.info("Server {} is master (another BackgroundJobServer)", masterId);
}
}
}
private void resetServer() {
backgroundJobServer.stop();
backgroundJobServer.start();
}
private void stopServer() {
backgroundJobServer.stop();
}
private static Instant min(Instant instant1, Instant instant2) {
return instant1.isBefore(instant2) ? instant1 : instant2;
}
private Optional cpuAllocationIrregularity(Instant lastSignalAlive, Instant lastHeartbeat) {
if (systemSupportsSleepDetection() && hasSystemSleptRecently()) return Optional.empty();
final Instant now = Instant.now();
final int amount1OfSec = (int) Math.abs(lastHeartbeat.getEpochSecond() - lastSignalAlive.getEpochSecond());
final int amount2OfSec = (int) (now.getEpochSecond() - lastSignalAlive.getEpochSecond());
final int amount3OfSec = (int) (now.getEpochSecond() - lastHeartbeat.getEpochSecond());
final int max = Math.max(amount1OfSec, Math.max(amount2OfSec, amount3OfSec));
if (max > backgroundJobServer.getConfiguration().getPollInterval().getSeconds() * 2L) {
return Optional.of(max);
}
return Optional.empty();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy