com.dasasian.chok.master.OperatorThread Maven / Gradle / Ivy
/**
* Copyright (C) 2014 Dasasian ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.dasasian.chok.master;
import com.dasasian.chok.operation.OperationId;
import com.dasasian.chok.operation.master.MasterOperation;
import com.dasasian.chok.operation.master.MasterOperation.ExecutionInstruction;
import com.dasasian.chok.protocol.MasterQueue;
import org.I0Itec.zkclient.ExceptionUtil;
import org.I0Itec.zkclient.exception.ZkInterruptedException;
import org.apache.log4j.Logger;
import java.util.List;
/**
* Responsible for executing all {@link MasterOperation}s which are offered to
* the master queue sequentially.
*/
class OperatorThread extends Thread {
protected final static Logger LOG = Logger.getLogger(OperatorThread.class);
private final MasterContext _context;
private final MasterQueue _queue;
private final OperationRegistry _registry;
private final long _safeModeMaxTime;
private boolean _safeMode = true;
public OperatorThread(final MasterContext context, final long safeModeMaxTime) {
_context = context;
_queue = context.getMasterQueue();
_registry = new OperationRegistry(context);
setDaemon(true);
setName(getClass().getSimpleName());
_safeModeMaxTime = safeModeMaxTime;
}
public boolean isInSafeMode() {
return _safeMode;
}
public OperationRegistry getOperationRegistry() {
return _registry;
}
public MasterContext getContext() {
return _context;
}
@Override
public void run() {
try {
LOG.info("starting...");
runInSafeMode();
recreateWatchdogs();
while (true) {
try {
// TODO jz: poll only for a certain amount of time and then execute a
// global check operation ?
MasterOperation operation = _queue.peek();
List nodeOperationIds = null;
try {
List runningOperations = _registry.getRunningOperations();
ExecutionInstruction instruction = operation.getExecutionInstruction(runningOperations);
nodeOperationIds = executeOperation(operation, instruction, runningOperations);
} catch (Exception e) {
ExceptionUtil.rethrowInterruptedException(e);
LOG.error("failed to execute " + operation, e);
}
if (nodeOperationIds != null && !nodeOperationIds.isEmpty()) {
OperationWatchdog watchdog = _queue.moveOperationToWatching(operation, nodeOperationIds);
_registry.watchFor(watchdog);
} else {
_queue.remove();
}
} catch (Throwable e) {
ExceptionUtil.rethrowInterruptedException(e);
LOG.fatal("master operation failure", e);
}
}
} catch (final InterruptedException | ZkInterruptedException e) {
Thread.interrupted();
// let go the thread
}
_registry.shutdown();
LOG.info("operator thread stopped");
}
private void recreateWatchdogs() {
List watchdogs = _context.getMasterQueue().getWatchdogs();
for (OperationWatchdog watchdog : watchdogs) {
if (watchdog.isDone()) {
LOG.info("release done watchdog " + watchdog);
_queue.removeWatchdog(watchdog);
} else {
_registry.watchFor(watchdog);
}
}
}
private List executeOperation(MasterOperation operation, ExecutionInstruction instruction, List runningOperations) throws Exception {
List operationIds = null;
switch (instruction) {
case EXECUTE:
LOG.info("executing operation '" + operation + "'");
operationIds = operation.execute(_context, runningOperations);
break;
case CANCEL:
// just do nothing
LOG.info("skipping operation '" + operation + "'");
break;
case ADD_TO_QUEUE_TAIL:
LOG.info("adding operation '" + operation + "' to end of queue");
_queue.add(operation);
break;
default:
throw new IllegalStateException("execution instruction " + instruction + " not handled");
}
return operationIds;
}
private void runInSafeMode() throws InterruptedException {
_safeMode = true;
// List knownNodes = protocol.getKnownNodes(); //TODO jz: use known
// nodes ?
List previousLiveNodes = _context.getProtocol().getLiveNodes();
long lastChange = System.currentTimeMillis();
try {
while (previousLiveNodes.isEmpty() || lastChange + _safeModeMaxTime > System.currentTimeMillis()) {
LOG.trace("SAFE MODE: No nodes available or state unstable within the last " + _safeModeMaxTime + " ms.");
Thread.sleep(_safeModeMaxTime / 4);// TODO jz: listen on life nodes ?
List currentLiveNodes = _context.getProtocol().getLiveNodes();
if (currentLiveNodes.size() != previousLiveNodes.size()) {
lastChange = System.currentTimeMillis();
previousLiveNodes = currentLiveNodes;
}
}
LOG.info("SAFE MODE: leaving safe mode with " + previousLiveNodes.size() + " connected nodes");
} finally {
_safeMode = false;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy