com.alibaba.dts.client.executor.grid.timer.CompensationTimer Maven / Gradle / Ivy
package com.alibaba.dts.client.executor.grid.timer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import com.alibaba.dts.client.executor.grid.GridTaskSender;
import com.alibaba.dts.client.executor.job.context.ClientContextImpl;
import com.alibaba.dts.client.executor.job.context.JobContext;
import com.alibaba.dts.client.executor.job.context.JobContextImpl;
import com.alibaba.dts.client.store.access.TaskSnapshotAccess;
import com.alibaba.dts.common.constants.Constants;
import com.alibaba.dts.common.context.InvocationContext;
import com.alibaba.dts.common.domain.ExecutableTask;
import com.alibaba.dts.common.domain.remoting.RemoteMachine;
import com.alibaba.dts.common.domain.result.Result;
import com.alibaba.dts.common.domain.result.ResultCode;
import com.alibaba.dts.common.domain.store.ExecutionCounter;
import com.alibaba.dts.common.domain.store.Job;
import com.alibaba.dts.common.domain.store.JobInstanceSnapshot;
import com.alibaba.dts.common.domain.store.TaskSnapshot;
import com.alibaba.dts.common.exception.AccessException;
import com.alibaba.dts.common.logger.SchedulerXLoggerFactory;
import com.alibaba.dts.common.logger.innerlog.Logger;
/**
* @author Ronan Zhan
* @date 2016/11/3.
*/
public class CompensationTimer {
private static final Logger logger = SchedulerXLoggerFactory.getLogger(CompensationTimer.class);
private ClientContextImpl clientContext;
private List jobs = new ArrayList();
private List jobInstanceSnapshots = new ArrayList();
/**
* 执行状态
*/
private int status = Constants.STATUS_STOP;
public CompensationTimer(ClientContextImpl clientContext) {
this.clientContext = clientContext;
}
public void init() {
Executors.newScheduledThreadPool(1).scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
try {
String serverAddress = clientContext.getNodeConfig().getLocalAddress();
if (Constants.STATUS_RUNNING == status) {
logger.warn("[Compensation]: start compensation task is running, so return, serverAddress:" + serverAddress);
return;
}
GridTaskSender gridSender = clientContext.getGridTaskSender();
List compensationTasks = listRetryTasks(0, 1000);
while (compensationTasks != null && !compensationTasks.isEmpty()) {
List executableTasks = new ArrayList();
for (TaskSnapshot compensationTask : compensationTasks) {
compensationTask.setCompensation(true);
long jobInstanceId = compensationTask.getJobInstanceId();
JobInstanceSnapshot jobInstanceSnapshot = getJobInstanceExisted(jobInstanceId);
if (jobInstanceSnapshot == null) {
Result result = getJobInstance(jobInstanceId);
if (result == null || result.getResultCode() != ResultCode.SUCCESS || result.getData() == null) {
continue;
}
jobInstanceSnapshot = result.getData();
jobInstanceSnapshots.add(jobInstanceSnapshot);
}
Job job = getJobExisted(jobInstanceSnapshot.getJobId());
if (job == null) {
Result result = getJob(jobInstanceSnapshot.getJobId());
if (result == null || result.getResultCode() != ResultCode.SUCCESS || result.getData() == null) {
continue;
}
job = result.getData();
jobs.add(job);
}
ExecutableTask executableTask = getExecutableTask(executableTasks, jobInstanceId);
if (executableTask == null) {
executableTask = new ExecutableTask();
executableTask.setJobInstanceSnapshot(jobInstanceSnapshot);
executableTask.setJob(job);
executableTasks.add(executableTask);
}
executableTask.getTaskSnapshotList().add(compensationTask);
}
for (ExecutableTask executableTask : executableTasks) {
JobContext jobContext = new JobContextImpl();
jobContext.setJob(executableTask.getJob());
jobContext.setJobInstanceSnapshot(executableTask.getJobInstanceSnapshot());
updateTasksStatus2Retrying(executableTask);
Result result = gridSender.dispatchCompensateTaskList(executableTask.getTaskSnapshotList(), jobContext);
if (result != null && result.getData() != false) {
//TODO 修改相关计数器状态,将原先的执行结果进行修改
for (TaskSnapshot taskSnapshot : executableTask.getTaskSnapshotList()) {
String receiveNodeAddress = taskSnapshot.getReceiveNodeAddress();
String taskName = taskSnapshot.getTaskName();
ExecutionCounter executionCounter = clientContext.getExecutionCounterTable().get(executableTask.getJobInstanceSnapshot().getId()).get(receiveNodeAddress).get(taskName);
if (executionCounter != null) {
executionCounter.getFailCounter().getAndDecrement();
executionCounter.getTotalCounter().getAndDecrement();
}
}
} else {
updateTasksStatus2Failure(executableTask);
}
}
long start = compensationTasks.get(compensationTasks.size() - 1).getId();
compensationTasks = listRetryTasks(start, 1000);
}
} catch (Throwable throwable) {
logger.error(throwable.getMessage(), throwable);
} finally {
status = Constants.STATUS_STOP;
}
}
private ExecutableTask getExecutableTask(List executableTasks, long jobInstanceId) {
for (ExecutableTask executableTask : executableTasks) {
if (executableTask.getJobInstanceSnapshot().getId() == jobInstanceId) {
return executableTask;
}
}
return null;
}
}, 60, 60, TimeUnit.SECONDS);
}
private Job getJobExisted(long jobId) {
for (Job job : jobs) {
if (job.getId() == jobId) {
return job;
}
}
return null;
}
private JobInstanceSnapshot getJobInstanceExisted(long jobInstanceId) {
for (JobInstanceSnapshot jobInstanceSnapshot : jobInstanceSnapshots) {
if (jobInstanceSnapshot == null) {
continue;
}
if (jobInstanceSnapshot.getId() == jobInstanceId) {
return jobInstanceSnapshot;
}
}
return null;
}
/**
* order by id asc
*
* start
* limit
*/
private List listRetryTasks(long start, int limit) {
List taskSnapshots = null;
try {
taskSnapshots = clientContext.getStore().getTaskSnapshotDao().listTaskSnapshotsFromStartId(start, limit);
} catch (AccessException e) {
logger.error("listRetryTasks error, start id {}, limit {} ", start, limit, e);
return null;
}
return taskSnapshots;
}
public void removeJobInstanceSnapshot(long jobInstanceId) {
Iterator iterator = jobInstanceSnapshots.iterator();
while (iterator.hasNext()) {
JobInstanceSnapshot jobInstanceSnapshot = iterator.next();
if (jobInstanceSnapshot.getId() == jobInstanceId) {
iterator.remove();
return;
}
}
}
private Result getJobInstance(long jobInstanceId) {
List serverList = clientContext.getClientRemoting().getServerList();
Result result = null;
for (String server : serverList) {
InvocationContext.setRemoteMachine(new RemoteMachine(server));
result = clientContext.getServerService().getJobInstanceById(jobInstanceId);
if (result == null) {
logger.error("clientContext getServerService getJobInstanceById error from server {} with job instance id {}", server, jobInstanceId);
continue;
} else {
break;
}
}
return result;
}
private Result getJob(long jobId) {
List serverList = clientContext.getClientRemoting().getServerList();
Result result = null;
for (String server : serverList) {
InvocationContext.setRemoteMachine(new RemoteMachine(server));
result = clientContext.getServerService().getJobById(jobId);
if (result == null) {
logger.error("clientContext getServerService getJobById error from server {} with job id {}", server, jobId);
continue;
} else {
break;
}
}
return result;
}
private void updateTasksStatus2Retrying(ExecutableTask executableTask) {
try {
clientContext.getStore().getTaskSnapshotDao().updateStatusBatch(executableTask.getTaskSnapshotList(), Constants.TASK_STATUS_RETRYING);
} catch (Throwable throwable) {
logger.error("update tasksnapshot status error, id={}", executableTask, throwable);
}
}
private void updateTasksStatus2Failure(ExecutableTask executableTask) {
try {
clientContext.getStore().getTaskSnapshotDao().updateStatusBatch(executableTask.getTaskSnapshotList(), Constants.TASK_STATUS_FAILURE);
} catch (Throwable throwable) {
logger.error("update tasksnapshot status error, id={}", executableTask, throwable);
}
}
}