cn.ponfee.disjob.supervisor.scanner.RunningInstanceScanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of disjob-supervisor Show documentation
Show all versions of disjob-supervisor Show documentation
Distributed job supervisor module
The newest version!
/*
* Copyright 2022-2024 Ponfee (http://www.ponfee.cn/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cn.ponfee.disjob.supervisor.scanner;
import cn.ponfee.disjob.common.base.SingletonClassConstraint;
import cn.ponfee.disjob.common.collect.Collects;
import cn.ponfee.disjob.common.concurrent.AbstractHeartbeatThread;
import cn.ponfee.disjob.common.concurrent.PeriodExecutor;
import cn.ponfee.disjob.common.lock.LockTemplate;
import cn.ponfee.disjob.supervisor.component.JobManager;
import cn.ponfee.disjob.supervisor.component.JobQuerier;
import cn.ponfee.disjob.supervisor.component.WorkerClient;
import cn.ponfee.disjob.supervisor.configuration.SupervisorProperties;
import cn.ponfee.disjob.supervisor.model.SchedInstance;
import cn.ponfee.disjob.supervisor.model.SchedJob;
import cn.ponfee.disjob.supervisor.model.SchedTask;
import org.apache.commons.collections4.CollectionUtils;
import java.util.Date;
import java.util.List;
import static cn.ponfee.disjob.core.base.JobConstants.PROCESS_BATCH_SIZE;
/**
* Scan running a long time, but still is running state sched_instance record.
*
* @author Ponfee
*/
public class RunningInstanceScanner extends AbstractHeartbeatThread {
private final LockTemplate lockTemplate;
private final WorkerClient workerClient;
private final JobManager jobManager;
private final JobQuerier jobQuerier;
private final long beforeMilliseconds;
private final PeriodExecutor logPrinter = new PeriodExecutor(30000, () -> log.warn("Not discovered any worker."));
public RunningInstanceScanner(SupervisorProperties conf,
LockTemplate lockTemplate,
WorkerClient workerClient,
JobManager jobManager,
JobQuerier jobQuerier) {
super(conf.getScanRunningInstancePeriodMs());
SingletonClassConstraint.constrain(this);
this.lockTemplate = lockTemplate;
this.jobManager = jobManager;
this.workerClient = workerClient;
this.jobQuerier = jobQuerier;
// heartbeat period duration: 20s * 12 = 240s
this.beforeMilliseconds = (heartbeatPeriodMs * 12);
}
@Override
protected boolean heartbeat() {
if (workerClient.hasNotDiscoveredWorkers()) {
logPrinter.execute();
return true;
}
Boolean result = lockTemplate.execute(this::process);
return result != null && result;
}
private boolean process() {
Date expireTime = new Date(System.currentTimeMillis() - beforeMilliseconds);
List instances = jobQuerier.findExpireRunningInstance(expireTime, PROCESS_BATCH_SIZE);
if (CollectionUtils.isEmpty(instances)) {
return true;
}
for (SchedInstance instance : instances) {
processEach(instance);
}
return instances.size() < PROCESS_BATCH_SIZE;
}
private void processEach(SchedInstance instance) {
if (!jobManager.updateInstanceNextScanTime(instance, new Date())) {
return;
}
List tasks = jobQuerier.findBaseInstanceTasks(instance.getInstanceId());
List waitingTasks = Collects.filter(tasks, SchedTask::isWaiting);
if (CollectionUtils.isNotEmpty(waitingTasks)) {
processHasWaitingTask(instance, waitingTasks);
} else if (tasks.stream().allMatch(SchedTask::isTerminal)) {
processAllTerminatedTask(instance);
} else {
processHasExecutingTask(instance, tasks);
}
}
private void processHasWaitingTask(SchedInstance instance, List waitingTasks) {
// sieve the (un-dispatch) or (assigned worker dead) waiting tasks to do re-dispatch
List redispatchingTasks = Collects.filter(waitingTasks, workerClient::shouldRedispatch);
if (CollectionUtils.isEmpty(redispatchingTasks)) {
return;
}
SchedJob job = jobQuerier.getJob(instance.getJobId());
if (job == null) {
log.error("Scanned running state instance not found job: {}", instance.getJobId());
return;
}
// check is whether not discovered worker
if (workerClient.hasNotDiscoveredWorkers(job.getGroup())) {
log.error("Scanned running state instance not discovered worker: {}, {}", instance.getInstanceId(), job.getGroup());
return;
}
boolean result = jobManager.redispatch(job, instance, redispatchingTasks);
log.info("Scanned running state instance re-dispatch task: {}, {}", result, instance.getInstanceId());
}
private void processAllTerminatedTask(SchedInstance instance) {
// double check instance run state
SchedInstance reloadInstance = jobQuerier.getInstance(instance.getInstanceId());
if (reloadInstance == null) {
log.error("Scanned running state instance not exists: {}", instance.getInstanceId());
return;
}
if (reloadInstance.isTerminal()) {
return;
}
boolean purged = jobManager.purgeInstance(instance);
log.info("Purge scanned running instance task all terminated: {}, {}", instance.getInstanceId(), purged);
}
private void processHasExecutingTask(SchedInstance instance, List tasks) {
// check has alive executing state task
if (workerClient.hasAliveExecutingTasks(tasks)) {
return;
}
boolean purged = jobManager.purgeInstance(instance);
log.info("Purge scanned running instance was dead: {}, {}", instance.getInstanceId(), purged);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy