
oracle.kv.impl.sna.ProcessMonitor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of oracle-nosql-server Show documentation
Show all versions of oracle-nosql-server Show documentation
NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.
The newest version!
/*-
* Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle NoSQL
* Database made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle NoSQL Database for a copy of the license and
* additional information.
*/
package oracle.kv.impl.sna;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Logger;
import oracle.kv.impl.fault.ProcessExitCode;
import oracle.kv.impl.util.CommonLoggerUtils;
/**
* The class used to create, manage and restart processes.
*
* An instance of this object is created for each process to be managed. The
* caller is responsible for the command to be executed, setting a restart
* count, and the initial creation of the process, for example:
*
* List command = new ArrayList();
* command.add("/bin/sleep");
* command.add("1");
* ProcessMonitor monitor = new ProcessMonitor(command, -1);
* monitor.startProcess();
*
* Internal objects and threads are used to monitor and restart the process if
* desired. This will happen if the process terminates normally or abnormally.
* A restart count of -1 means restart indefinitely.
*
* Note: exit/restart of a managed process will not change the command line
* arguments used for starting it. Those are cached in this object. The major
* thing affected is JVMParams such as arguments to Java or logging
* configuration. This behavior could be changed but it's simple at this time.
*
* The caller can explicitly stop the process which will not result in restart.
*
* If an excessive number of process restarts are detected the managed service
* will eventually be permanently stopped. The algorithm, which may need
* tuning over time is:
* If there have been more than RESTART_MAX restarts in a period of
* RESTART_MILLIS milliseconds, stop restarting.
*/
public class ProcessMonitor {
private final ReentrantLock lock;
private Logger logger;
private List command;
private final Map env;
private ArrayList restarts;
private String serviceName;
private MonitorThread monitorThread;
private IOThread ioThread;
private int restartCount;
private ProcessState state;
private Process process;
private int totalRestarts;
private int exitCode;
protected StringBuilder startupBuffer;
/**
* Start with 5 restarts in under 60 seconds as a problem. TODO: tune this
* to maybe have both short- and long-term triggers. A long-term trigger
* could just log a warning that something's wrong (e.g. 1 restart/hour
* isn't horrible but probably means something is up).
*/
private static final int RESTART_RESET=30;
private static final int RESTART_MAX=5;
private static final long RESTART_MILLIS=60*1000;
enum ProcessState {
DOWN, RUNNING, STOPPING
}
public ProcessMonitor(List command,
Map env,
int restartCount,
String serviceName,
Logger logger) {
this.lock = new ReentrantLock();
this.restartCount = restartCount;
this.logger = logger;
this.command = command;
this.env = env;
this.serviceName = serviceName;
this.process = null;
this.monitorThread = null;
this.ioThread = null;
this.state = ProcessState.DOWN;
this.totalRestarts = 0;
this.restarts = new ArrayList();
this.exitCode = 0;
}
/**
* A convenience overloading of the above for cases where there is no
* environment associated with the process.
*/
public ProcessMonitor(List command,
int restartCount,
String serviceName,
Logger logger) {
this(command, null, restartCount, serviceName, logger);
}
public void reset(List newCommand,
String newServiceName) {
this.command = newCommand;
this.serviceName = newServiceName;
}
public void dontRestart() {
lock.lock();
restartCount = 0;
lock.unlock();
}
public boolean canRestart() {
return (restartCount != 0);
}
public boolean isRunning() {
return (state != ProcessState.DOWN);
}
public int getExitCode() {
return exitCode;
}
public void resetLogger(Logger logger1) {
this.logger = logger1;
}
private void logFine(String msg) {
if (logger != null) {
logger.fine(serviceName + ": ProcessMonitor: " + msg);
}
}
private void logInfo(String msg) {
if (logger != null) {
logger.info(serviceName + ": ProcessMonitor: " + msg);
}
}
private void logSevere(String msg) {
if (logger != null) {
logger.severe(serviceName + ": ProcessMonitor: " + msg);
}
}
protected void afterStart() {
/* no-op */
}
protected void onRestart() {
/* no-op */
}
protected void onExit() {
/* no-op */
}
public void startProcess()
throws IOException {
lock.lock();
try {
if (state == ProcessState.DOWN) {
ProcessBuilder builder = new ProcessBuilder(command);
if (env != null) {
final Map penv = builder.environment();
penv.putAll(env);
}
builder.redirectErrorStream(true);
process = builder.start();
state = ProcessState.RUNNING;
logInfo("startProcess");
ioThread = new IOThread("SNA.io." + serviceName);
ioThread.start();
monitorThread =
new MonitorThread("SNA.monitor." + serviceName, true);
monitorThread.start();
}
} finally {
lock.unlock();
}
afterStart();
}
/**
* Stop a running process. The process may still be running or it may have
* exited; this method cleans up the object and threads in both cases.
* This is complicated by the need to synchronize access from both the
* MonitorThread and the owning thread. If called by the owning thread the
* process will not be restarted by the MonitorThread. That case is
* simpler. Trickier races occur if the MonitorThread is in the middle of
* stopping/restarting the process and the owning thread calls. There is
* one window where the lock allows the owning thread in. If that occurs
* the restartCount will have been set to 0 causing the MonitorThread to
* simply exit if it is not interrupted.
*/
public void stopProcess(boolean isMonitor)
throws InterruptedException {
if (!isMonitor) {
logInfo("stopProcess");
}
lock.lock();
/* Setting restartCount to 0 ensures that the process won't restart. */
if (!isMonitor) {
restartCount = 0;
}
/**
* If the owning thread is stopping the process and the monitor calls,
* just return. The monitor will eventually exit. If the monitor is
* stopping the process and the owning thread calls, continue on so
* that the process is not restarted and the monitor is killed.
*/
if (state == ProcessState.DOWN ||
(state != ProcessState.RUNNING && isMonitor)) {
/* The process is already stopping or down. */
lock.unlock();
return;
}
state = ProcessState.STOPPING;
if (process != null) {
process.destroy();
/* don't null the process object until after joins, below */
}
lock.unlock();
/* The lock is unlocked, allowing threads to exit. */
try {
/**
* No lock is held but the only threads that will set monitorThread
* and ioThread to null are the object owner and the MonitorThread
* itself, and the Thread.join() synchronizes that race.
*/
if (monitorThread != null && !isMonitor) {
monitorThread.join();
monitorThread = null;
}
if (ioThread != null) {
ioThread.join();
ioThread = null;
}
} catch (InterruptedException e) {
logInfo("Exception in stopProcess");
if (Thread.interrupted() && isMonitor) {
/* Rethrow if MonitorThread was interrupted. */
throw e;
}
}
lock.lock();
state = ProcessState.DOWN;
process = null;
lock.unlock();
}
public void restartProcess()
throws IOException {
logInfo("restartProcess called, totalRestarts is " + totalRestarts +
", restartCount is " + restartCount);
lock.lock();
try {
if (restartCount != 0) {
/* This will lock recursively. */
startProcess();
restarts.add(System.currentTimeMillis());
++totalRestarts;
if (restartCount > 0) {
--restartCount;
}
}
} finally {
lock.unlock();
}
}
/**
* Wait for the managed process to exit to synchronize shutdown. This is
* called by the ProcessServiceManager when a service is shut down cleanly.
*/
public boolean waitProcess(long millis)
throws InterruptedException {
boolean retval = true;
if (monitorThread != null) {
logFine("waiting for MonitorThread");
monitorThread.join(millis);
if (monitorThread != null && monitorThread.isAlive()) {
retval = false;
}
monitorThread = null;
}
if (ioThread != null && retval == true) {
logFine("waiting for IOThread");
ioThread.join(millis);
if (ioThread != null && ioThread.isAlive()) {
retval = false;
}
ioThread = null;
}
return retval;
}
/**
* Forcibly terminate the process. This should only be called if an
* organized stop fails.
*/
public void destroyProcess() {
lock.lock();
Process p = process;
lock.unlock();
if (p != null) {
p.destroy();
}
}
/**
* The class responsible for monitoring a Process and restarting it on
* exit. A new instance of this thread/object is created on each restart.
*/
class MonitorThread extends Thread {
private final boolean useExitCode;
private MonitorThread(String name, boolean useExitCode) {
super(name);
this.useExitCode = useExitCode;
}
/**
* Should the process be restarted? Default is yes.
*/
private boolean okToRestart(int exitStatus) {
if (useExitCode &&
! ProcessExitCode.needsRestart(exitStatus)) {
logInfo("exit code:" + exitStatus);
return false;
}
logInfo("Process restart requested; exit code:" +
exitStatus +
((exitStatus == ProcessExitCode.RESTART_OOME.getValue()) ?
" Process experienced an OOME." : ""));
if (restartCount == 0) {
logInfo("restart count is 0");
return false;
}
return checkExcessiveRestarts();
}
/**
* Determine if there have been too many restarts of this process
* based on algorithm vs number.
*
* Return true if it is OK to restart (not too many restarts).
* Return false (bad) if there have been too many restarts.
*/
private boolean checkExcessiveRestarts() {
boolean ret = true;
if (restarts.size() >= RESTART_MAX) {
long last = restarts.get(restarts.size()-1);
long first = restarts.get(restarts.size()-RESTART_MAX);
if (last - first < RESTART_MILLIS) {
logSevere("excessive restarts (" + restarts +
"), disabling service");
dontRestart();
ret = false;
}
}
if (restarts.size() >= RESTART_RESET) {
/* Reset the list to avoid unbound growth */
restarts = new ArrayList();
}
return ret;
}
@Override
public void run() {
try {
assert (process != null);
exitCode = process.waitFor();
logInfo("exited, exit code: " + exitCode);
/**
* Let the IOThread exit -- it may have useful things to say if
* the exit occurred during startup.
*/
if (ioThread != null) {
ioThread.join();
ioThread = null;
}
/**
* If the process was explicitly stopped the stop and restart
* will be no-ops because the process state will be DOWN and
* the restart count will have been zeroed.
*/
stopProcess(true);
/**
* Restart or not? Default is to restart.
* TODO: manufacture ERROR_* ServiceStatus for the service
*/
if (okToRestart(exitCode)) {
onRestart();
restartProcess();
} else {
onExit();
logInfo("not restarting");
}
} catch (Exception e) {
String msg = "Unexpected exception in MonitorThread: " +
e + CommonLoggerUtils.getStackTrace(e);
logSevere(msg);
}
}
}
/**
* The class responsible for handling output from a managed Process. A new
* instance of this thread/object is created on each restart.
*/
class IOThread extends Thread {
public IOThread(String name) {
super(name);
}
@Override
public void run() {
try {
boolean startupOK = false;
/**
* Small delay to start to give the process a chance to do
* something.
*/
try {
Thread.sleep(500);
} catch (InterruptedException ignored) {}
/**
* Process may have already been stopped and nulled.
*/
startupBuffer = new StringBuilder(512);
logFine("IOThread initializing startup buffer");
BufferedReader reader = null;
lock.lock();
if (process != null) {
reader = new BufferedReader
(new InputStreamReader(process.getInputStream()));
} else {
logInfo("IOthread: no process, exiting");
lock.unlock();
return;
}
lock.unlock();
for (String line = reader.readLine();
line != null;
line = reader.readLine()) {
/**
* Logging output in the new process should only be
* generated by the SNA. Once the real service takes over
* it will log to its own files.
*/
logInfo(line);
if (line.contains(ManagedService.STARTUP_OK)) {
startupOK = true;
startupBuffer = null;
logFine("IOThread clearing startup buffer");
}
if (!startupOK) {
startupBuffer.append("\n" + line);
}
}
} catch (Exception e) {
logInfo("IOThread exception: " +
e.getMessage());
}
logInfo("IOThread exiting");
}
void closeInput()
throws IOException {
/* Provoke an IO exception to cause the thread to exit. */
process.getInputStream().close();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy