ru.taskurotta.service.recovery.impl.RecoveryServiceImpl Maven / Gradle / Ivy
package ru.taskurotta.service.recovery.impl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ru.taskurotta.service.console.model.Process;
import ru.taskurotta.service.dependency.DependencyService;
import ru.taskurotta.service.dependency.links.Graph;
import ru.taskurotta.service.dependency.links.GraphDao;
import ru.taskurotta.service.gc.GarbageCollectorService;
import ru.taskurotta.service.queue.QueueService;
import ru.taskurotta.service.recovery.RecoveryService;
import ru.taskurotta.service.storage.BrokenProcessService;
import ru.taskurotta.service.storage.ProcessService;
import ru.taskurotta.service.storage.TaskService;
import ru.taskurotta.transport.model.ArgContainer;
import ru.taskurotta.transport.model.DecisionContainer;
import ru.taskurotta.transport.model.ErrorContainer;
import ru.taskurotta.transport.model.TaskContainer;
import ru.taskurotta.transport.utils.TransportUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
/**
* User: stukushin
* Date: 21.10.13
* Time: 18:24
*/
public class RecoveryServiceImpl implements RecoveryService {
private static final Logger logger = LoggerFactory.getLogger(RecoveryServiceImpl.class);
public static AtomicInteger restartedProcessesCounter = new AtomicInteger();
public static AtomicInteger restartedTasksCounter = new AtomicInteger();
public static AtomicInteger resurrectedTasksCounter = new AtomicInteger();
private QueueService queueService;
private DependencyService dependencyService;
private ProcessService processService;
private TaskService taskService;
private BrokenProcessService brokenProcessService;
private GarbageCollectorService garbageCollectorService;
// time out between recovery process in milliseconds
private long recoveryProcessChangeTimeout;
private long findIncompleteProcessPeriod;
public RecoveryServiceImpl() {
}
public RecoveryServiceImpl(QueueService queueService, DependencyService dependencyService,
ProcessService processService, TaskService taskService, BrokenProcessService brokenProcessService,
GarbageCollectorService garbageCollectorService, long recoveryProcessChangeTimeout,
long findIncompleteProcessPeriod) {
this.queueService = queueService;
this.dependencyService = dependencyService;
this.processService = processService;
this.taskService = taskService;
this.brokenProcessService = brokenProcessService;
this.garbageCollectorService = garbageCollectorService;
this.recoveryProcessChangeTimeout = recoveryProcessChangeTimeout;
this.findIncompleteProcessPeriod = findIncompleteProcessPeriod;
}
// @Override
public boolean restartBrokenTasks(final UUID processId) {
boolean result = false;
Graph graph = dependencyService.getGraph(processId);
final Map allReadyTaskIds = getAllReadyTaskIds(graph, true);
if (logger.isDebugEnabled()) {
logger.debug("restartBrokenTasks({}) getAllReadyTaskIds.size() = {}", processId, allReadyTaskIds.size());
}
for (Map.Entry entry : allReadyTaskIds.entrySet()) {
UUID taskId = entry.getKey();
logger.debug("restartBrokenTasks({}) analise task = {}", processId, taskId);
DecisionContainer taskDecision = taskService.getDecision(taskId, processId);
// skip tasks without decision
if (taskDecision == null) {
continue;
}
// skip decisions without error
if (!taskDecision.containsError()) {
logger.debug("{}/{} Can not resurrect task. Task has no error", taskId, processId);
continue;
}
// skip not fatal errors
ErrorContainer errorContainer = taskDecision.getErrorContainer();
if (!errorContainer.isFatalError()) {
logger.debug("{}/{} Can not resurrect task. Task has not fatal error", taskId, processId);
continue;
}
TaskContainer taskContainer = taskService.getTask(taskId, processId);
if (taskService.retryTask(taskId, processId, System.currentTimeMillis())) {
queueService.enqueueItem(taskContainer.getActorId(), taskId, processId, -1l, TransportUtils.getTaskList
(taskContainer));
result = true;
logger.debug("restartBrokenTasks({}) enqueue task = {}", processId, taskId);
resurrectedTasksCounter.incrementAndGet();
} else {
logger.warn("{}/{} Can not resurrect task. taskService.retryTask() return is false", taskId, processId);
}
}
if (result) {
brokenProcessService.delete(processId);
// todo: process can receive new broken tasks before this point
processService.markProcessAsStarted(processId);
}
return result;
}
@Override
public boolean resurrectProcess(final UUID processId) {
logger.trace("#[{}]: try to restart process", processId);
// check Broken process
Process process = processService.getProcess(processId);
if (process.getState() == Process.BROKEN) {
if (restartBrokenTasks(processId)) {
return true;
}
// else try to resurrect process in general way
}
// val=true if some tasks have been placed to queue
boolean result = false;
Graph graph = dependencyService.getGraph(processId);
if (graph == null) {
// have only process service info => restart whole process
logger.warn("#[{}]: graph was not found (possible data loss?), try to restart process from start task", processId);
result = restartProcessFromBeginning(processId);
} else if (graph.isFinished()) {
// process is already finished => just mark process as finished
// check if Process is finished because Graph are marked as finished before Process
if (process.getState() == Process.FINISH) {
logger.debug("#[{}]: is finished, just skip it", processId);
return false;
}
logger.debug("#[{}]: isn't finished, but graph is finished, force finish process", processId);
TaskContainer startTaskContainer = processService.getStartTask(processId);
finishProcess(processId, startTaskContainer.getTaskId(), graph.getProcessTasks());
} else if (hasRecentActivity(graph)) {
// was restarted or updated recently => leave it alone for now
logger.debug("#[{}]: graph was recently applied or recovered, skip it", processId);
} else {
// require restart => try to find process's tasks for restart
final Collection taskContainers = findIncompleteTaskContainers(graph);
if (taskContainers == null) {
// there is a problem in task store => restart process
logger.warn("#[{}]: task containers were not found (possible data loss?), try to restart process from start task", processId);
result = restartProcessFromBeginning(processId);
} else {
// restart unfinished tasks
final boolean[] boolContainer = new boolean[1];
logger.debug("#[{}]: try to update graph", processId);
boolean graphUpdated = dependencyService.changeGraph(new GraphDao.Updater() {
@Override
public UUID getProcessId() {
return processId;
}
@Override
public boolean apply(Graph graph) {
graph.setTouchTimeMillis(System.currentTimeMillis());
logger.debug("#[{}]: update touch time to [{} ({})]", processId, graph.getTouchTimeMillis());
int restartResult = restartProcessTasks(taskContainers, processId);
restartedTasksCounter.addAndGet(restartResult);
boolContainer[0] = restartResult > 0;
return true;
}
});
result = boolContainer[0];
logger.debug("#[{}]: has been recovered, graph update result [{}]", processId, graphUpdated);
}
}
return result;
}
private boolean hasRecentActivity(Graph graph) {
if (graph == null) {
return false;
}
boolean result = false;
long lastChange = Math.max(graph.getLastApplyTimeMillis(), graph.getTouchTimeMillis());
if (lastChange > 0) {
//has some modifications, check if they expired
long changeTimeout = System.currentTimeMillis() - lastChange;
logger.debug("#[{}]: activity check for graph: change timeout[{}], last change[{}]", graph.getGraphId(), changeTimeout, lastChange);
// todo: may be we not need "recoveryProcessChangeTimeout" property?
// we can have two properties: process-finish-timeout and process-idle-timeout.
// And have different recovery strategies for each other.
// to find processes of process-idle-timeout we can use mongodb Graph collection
result = changeTimeout < recoveryProcessChangeTimeout;
}
return result;
}
@Override
public Collection resurrectProcesses(Collection processIds) {
Collection successfullyRestartedProcesses = new ArrayList<>();
for (UUID processId : processIds) {
if (resurrectProcess(processId)) {
successfullyRestartedProcesses.add(processId);
}
}
brokenProcessService.deleteCollection(successfullyRestartedProcesses);
return successfullyRestartedProcesses;
}
private int restartProcessTasks(Collection taskContainers, UUID processId) {
logger.trace("#[{}]: try to restart [{}] task containers", processId, taskContainers);
int restartedTasks = 0;
if (taskContainers != null && !taskContainers.isEmpty()) {
long lastRecoveryStartTime = System.currentTimeMillis() - findIncompleteProcessPeriod;
// check tasks
for (Iterator it = taskContainers.iterator(); it.hasNext(); ) {
TaskContainer taskContainer = it.next();
UUID taskId = taskContainer.getTaskId();
long startTime = taskContainer.getStartTime();
String taskList = TransportUtils.getTaskList(taskContainer);
String actorId = taskContainer.getActorId();
DecisionContainer decisionContainer = taskService.getDecision(taskId, processId);
if (decisionContainer != null) {
ErrorContainer errorContainer = decisionContainer.getErrorContainer();
if (errorContainer != null && errorContainer.isFatalError()) {
// process is broken now. Skip it.
taskContainers = null;
continue;
}
}
if (!isReadyToRecover(processId, taskId, startTime, actorId, taskList, lastRecoveryStartTime)) {
// remove not ready task from collection
it.remove();
continue;
}
// try to prepare task
boolean result = true;
try {
if (taskService.getTaskToExecute(taskId, processId, true) == null) {
result = false;
}
} catch (IllegalStateException ex) {
result = false;
}
// Need we restart process?
if (!result) {
if (!restartProcessFromBeginning(processId)) {
logger.error("Can not restart process from beginning. Process has ready task without " +
"consistent arguments. Process id = {} task id = {}", processId, taskId);
}
// drop collection of tasks. We not need them in cause its process has been restarted.
taskContainers = null;
break;
}
}
// Should we restart task?
if (taskContainers != null) {
// restart tasks
for (TaskContainer taskContainer : taskContainers) {
UUID taskId = taskContainer.getTaskId();
long startTime = taskContainer.getStartTime();
String taskList = TransportUtils.getTaskList(taskContainer);
String actorId = taskContainer.getActorId();
boolean restartResult = taskService.restartTask(taskId, processId, System.currentTimeMillis(),
false);
if (restartResult) {
if (queueService.enqueueItem(actorId, taskId, processId, startTime, taskList)) {
logger.debug("#[{}]/[{}]: task container [{}] have been restarted", processId, taskId,
taskContainer);
restartedTasks++;
} else {
logger.debug("#[{}]/[{}]: can not restart task. enqueue operation is false", processId,
taskId, taskContainer);
}
} else {
logger.debug("#[{}]/[{}]: can not restart task. taskService.restartTask() operation is false",
processId, taskId, taskContainer);
}
}
}
}
logger.debug("#[{}]: complete restart of [{}] tasks", processId, restartedTasks);
return restartedTasks;
}
private boolean isReadyToRecover(UUID processId, UUID taskId, long startTime, String actorId, String taskList, long lastRecoveryStartTime) {
logger.trace("#[{}]/[{}]: check if task ready to restart", processId, taskId);
boolean result = true;//consider every task as ready by default
if (startTime > System.currentTimeMillis()) {//task must be started in future => skip it //recovery iterations may take some time so check current date here
if (logger.isDebugEnabled()) {
logger.debug("#[{}]/[{}]: must be started later at [{}]", processId, taskId, new Date(startTime));
}
return false;
}
//task is OK but it should be checked if queue is ready
String queueName = queueService.createQueueName(actorId, taskList);
long lastEnqueueTime = queueService.getLastPolledTaskEnqueueTime(queueName);
// is never polled? => not ready
if (lastEnqueueTime <= 0l) {
logger.debug("#[{}]/[{}]: skip process restart, because queue [{}] is not polled by any actor", processId, taskId, queueName);
return false;
}
// not polled since last recovery? => has no wokers
if (lastEnqueueTime < lastRecoveryStartTime) {//still filled with old tasks => not ready
if (logger.isDebugEnabled()) {
logger.debug("#[{}]/[{}]: skip process restart, because queue not polled since last recovery " +
"activity, queue [{}] " +
"(last enqueue time [{}], last recovery start time [{}])",
processId, taskId, queueName, lastEnqueueTime, lastRecoveryStartTime);
}
return false;
}
//still filled with older tasks => this task already in queue
if (lastEnqueueTime < startTime) {
// todo: check decision analise time for this queueName
if (logger.isDebugEnabled()) {
logger.debug("#[{}]/[{}]: skip process restart, because earlier tasks in queue [{}] (last enqueue " +
"time [{}], last task start time [{}])",
processId, taskId, queueName, lastEnqueueTime, startTime);
}
return false;
}
return result;
}
private Map getAllReadyTaskIds(final Graph graph, final boolean touchGraph) {
final Map allReadyTaskIds = new HashMap<>();
dependencyService.changeGraph(new GraphDao.Updater() {
@Override
public UUID getProcessId() {
return graph.getGraphId();
}
@Override
public boolean apply(Graph graph) {
allReadyTaskIds.putAll(graph.getAllReadyItems());
if (touchGraph) {
graph.setTouchTimeMillis(System.currentTimeMillis());
return true;
}
return false;
}
});
return allReadyTaskIds;
}
private Collection findIncompleteTaskContainers(Graph graph) {
if (graph == null) {
return null;
}
final UUID processId = graph.getGraphId();
logger.trace("#[{}]: try to find incomplete tasks", processId);
final Map allReadyTaskIds = getAllReadyTaskIds(graph, false);
if (logger.isDebugEnabled()) {
logger.debug("#[{}]: found [{}] not finished taskIds", processId, allReadyTaskIds.size());
}
Collection taskContainers = new ArrayList<>(allReadyTaskIds.size());
Set keys = allReadyTaskIds.keySet();
for (UUID taskId : keys) {
TaskContainer taskContainer = taskService.getTask(taskId, processId);
if (taskContainer == null) {
logger.warn("#[{}]/[{}]: not found task container in task repository", processId, taskId);
return null;
}
logger.trace("#[{}]/[{}]: found not finished task container [{}]", processId, taskId, taskContainer);
taskContainers.add(taskContainer);
}
if (logger.isDebugEnabled()) {
logger.debug("#[{}]: found [{}] not finished task containers", processId, taskContainers.size());
}
return taskContainers;
}
private boolean restartProcessFromBeginning(UUID processId) {
if (processId == null) {
return false;
}
TaskContainer startTaskContainer = processService.getStartTask(processId);
// emulate TaskServer.startProcess()
UUID taskId = startTaskContainer.getTaskId();
taskService.restartTask(taskId, processId, System.currentTimeMillis(), true);
dependencyService.startProcess(startTaskContainer);
taskService.startProcess(startTaskContainer);
logger.debug("#[{}]: restart process from start task [{}]", processId, startTaskContainer);
boolean result = queueService.enqueueItem(startTaskContainer.getActorId(), taskId, processId,
startTaskContainer.getStartTime(), TransportUtils.getTaskList(startTaskContainer));
if (result) {
restartedProcessesCounter.incrementAndGet();
}
return result;
}
private void finishProcess(UUID processId, UUID startTaskId, Collection finishedTaskIds) {
// save result to process storage
DecisionContainer decisionContainer = taskService.getDecision(startTaskId, processId);
if (decisionContainer == null) {
logger.error("#[{}]/[{}]: decision container for start task is null, stop finishing process");
return;
}
ArgContainer argContainer = decisionContainer.getValue();
String returnValue = argContainer.getJSONValue();
processService.finishProcess(processId, returnValue);
if (finishedTaskIds != null && !finishedTaskIds.isEmpty()) {
taskService.finishProcess(processId, finishedTaskIds);
}
logger.debug("#[{}]: finish process. Save result [{}] from [{}] as process result", processId, returnValue, startTaskId);
// send process to GC
garbageCollectorService.collect(processId);
}
public void setDependencyService(DependencyService dependencyService) {
this.dependencyService = dependencyService;
}
public void setProcessService(ProcessService processService) {
this.processService = processService;
}
public void setBrokenProcessService(BrokenProcessService brokenProcessService) {
this.brokenProcessService = brokenProcessService;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy