com.alibaba.schedulerx.worker.timer.ZombieContainerCheckTimer Maven / Gradle / Ivy
package com.alibaba.schedulerx.worker.timer;
import akka.actor.ActorSelection;
import com.alibaba.schedulerx.common.util.JsonUtil;
import com.alibaba.schedulerx.protocol.Worker.ContainerCheckZombieRequest;
import com.alibaba.schedulerx.protocol.Worker.ContainerCheckZombieResponse;
import com.alibaba.schedulerx.protocol.utils.FutureUtils;
import com.alibaba.schedulerx.worker.SchedulerxWorker;
import com.alibaba.schedulerx.worker.batch.ContainerStatusReqHandlerPool;
import com.alibaba.schedulerx.worker.container.ContainerFactory;
import com.alibaba.schedulerx.worker.container.ContainerPool;
import com.alibaba.schedulerx.worker.domain.WorkerConstants;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
/**
* check local zombie container according task master response
* @author yanxun on 2019/1/16.
*/
public class ZombieContainerCheckTimer extends AbstractTimerTask{
private ContainerStatusReqHandlerPool statusReqBatchHandlerPool = ContainerStatusReqHandlerPool.INSTANCE;
private ContainerPool containerPool = ContainerFactory.getContainerPool();
@Override
public String getName() {
return "ZombieContainerCheckTimer";
}
@Override
public long getInitialDelay() {
// 5min
return 60 * 5;
}
@Override
public long getPeriod() {
// 10 min
return 60 * 10;
}
@Override
public void run() {
Map> masterPath2JobInstanceIds = Maps.newHashMap();
// Long jobInstanceId;
// String masterPath;
// for (Entry> entry : statusReqBatchHandlerPool.getHandlers().entrySet()) {
// jobInstanceId = entry.getKey();
// masterPath = entry.getValue().getTaskMasterAkkaPath();
// if (!masterPath2JobInstanceIds.containsKey(masterPath)) {
// masterPath2JobInstanceIds.put(masterPath, Lists.newArrayList(jobInstanceId));
// } else {
// masterPath2JobInstanceIds.get(masterPath).add(jobInstanceId);
// }
// }
ContainerPool containerPool = ContainerFactory.getContainerPool();
containerPool.getInstanceMasterActorPathMap();
for (Entry entry:containerPool.getInstanceMasterActorPathMap().entrySet()) {
if (!masterPath2JobInstanceIds.containsKey(entry.getValue())) {
masterPath2JobInstanceIds.put(entry.getValue(), Lists.newArrayList(entry.getKey()));
} else {
masterPath2JobInstanceIds.get(entry.getValue()).add(entry.getKey());
}
}
for (Entry> entry : masterPath2JobInstanceIds.entrySet()) {
String masterCheckPath = entry.getKey().replace(WorkerConstants.WORKER_AKKA_TASK_ROUTING_PATH, WorkerConstants.WORKER_AKKA_HEARTBEAT_ROUTING_PATH);
List jobInstanceIds = entry.getValue();
ContainerCheckZombieRequest request = ContainerCheckZombieRequest
.newBuilder()
.addAllJobInstanceId(jobInstanceIds)
.build();
ActorSelection selection = SchedulerxWorker.actorSystem.actorSelection(masterCheckPath);
try {
ContainerCheckZombieResponse response = (ContainerCheckZombieResponse)FutureUtils.awaitResult(selection,
request, 10);
List zombieJobInstanceIds = response.getZombieJobInstanceIdList();
if (!CollectionUtils.isEmpty(zombieJobInstanceIds)) {
LOGGER.warn("detect zombieJobInstanceIds:{}, clean...", StringUtils.join(zombieJobInstanceIds, ","));
for (Long zombieJobInstanceId : zombieJobInstanceIds) {
if (zombieJobInstanceId != 0) {
statusReqBatchHandlerPool.stop(zombieJobInstanceId);
containerPool.destroyByInstance(zombieJobInstanceId, true);
}
}
}
} catch (Throwable e) {
LOGGER.warn("JobInstanceId={}, masterCheckPath={}, ZombieContainerCheckTimer check error ", JsonUtil.toJson(jobInstanceIds), masterCheckPath, e);
}
}
}
}