
com.vip.saturn.job.sharding.task.AbstractAsyncShardingTask Maven / Gradle / Ivy
package com.vip.saturn.job.sharding.task;
import com.google.common.collect.Lists;
import com.vip.saturn.job.integrate.service.ReportAlarmService;
import com.vip.saturn.job.sharding.entity.Executor;
import com.vip.saturn.job.sharding.entity.Shard;
import com.vip.saturn.job.sharding.node.SaturnExecutorsNode;
import com.vip.saturn.job.sharding.service.NamespaceShardingContentService;
import com.vip.saturn.job.sharding.service.NamespaceShardingService;
import org.apache.commons.lang3.StringUtils;
import org.apache.curator.framework.CuratorFramework;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.Map.Entry;
import java.util.concurrent.ExecutorService;
public abstract class AbstractAsyncShardingTask implements Runnable {
private static final Logger log = LoggerFactory.getLogger(AbstractAsyncShardingTask.class);
private static final int LOAD_LEVEL_DEFAULT = 1;
private static final String NAME_ENABLE_JOB_BASED_SHARDING = "VIP_SATURN_ENABLE_JOB_BASED_SHARDING";
public static boolean ENABLE_JOB_BASED_SHARDING = true;
static {
String enableJobAverage = System
.getProperty(NAME_ENABLE_JOB_BASED_SHARDING, System.getenv(NAME_ENABLE_JOB_BASED_SHARDING));
if (StringUtils.isNotBlank(enableJobAverage)) {
ENABLE_JOB_BASED_SHARDING = Boolean.parseBoolean(enableJobAverage);
}
log.info("ENABLE_JOB_BASED_SHARDING is {}", ENABLE_JOB_BASED_SHARDING);
}
protected NamespaceShardingService namespaceShardingService;
protected CuratorFramework curatorFramework;
protected NamespaceShardingContentService namespaceShardingContentService;
protected ExecutorService executorService;
protected ReportAlarmService reportAlarmService;
public AbstractAsyncShardingTask(NamespaceShardingService namespaceShardingService) {
this.namespaceShardingService = namespaceShardingService;
this.curatorFramework = namespaceShardingService.getCuratorFramework();
this.namespaceShardingContentService = namespaceShardingService.getNamespaceShardingContentService();
this.executorService = namespaceShardingService.getExecutorService();
this.reportAlarmService = namespaceShardingService.getReportAlarmService();
}
protected abstract void logStartInfo();
/**
* Special enable jobs that need to be notified prior, not consider whether whose shards are changed.
* By default, notify enable jobs whose shards are changed.
*/
protected List notifyEnableJobsPrior() {
return null;
}
@Override
public void run() {
logStartInfo();
boolean isAllShardingTask = this instanceof ExecuteAllShardingTask;
try {
// 如果当前变为非leader,则直接返回
if (!namespaceShardingService.isLeadershipOnly()) {
return;
}
// 如果需要全量分片,且当前线程不是全量分片线程,则直接返回,没必要做分片
if (namespaceShardingService.isNeedAllSharding() && !isAllShardingTask) {
log.info("the {} will be ignored, because there will be {}", this.getClass().getSimpleName(),
ExecuteAllShardingTask.class.getSimpleName());
return;
}
List allJobs = getAllJobs();
List allEnableJobs = getAllEnableJobs(allJobs);
List oldOnlineExecutorList = getLastOnlineExecutorList();
List customLastOnlineExecutorList = customLastOnlineExecutorList();
List lastOnlineExecutorList = customLastOnlineExecutorList == null ?
copyOnlineExecutorList(oldOnlineExecutorList) :
customLastOnlineExecutorList;
List lastOnlineTrafficExecutorList = getTrafficExecutorList(lastOnlineExecutorList);
List shardList = new ArrayList<>();
// 摘取
if (pick(allJobs, allEnableJobs, shardList, lastOnlineExecutorList, lastOnlineTrafficExecutorList)) {
// 放回
putBackBalancing(allEnableJobs, shardList, lastOnlineExecutorList, lastOnlineTrafficExecutorList);
// 如果当前变为非leader,则返回
if (!namespaceShardingService.isLeadershipOnly()) {
return;
}
// 持久化分片结果
if (shardingContentIsChanged(oldOnlineExecutorList, lastOnlineExecutorList)) {
namespaceShardingContentService.persistDirectly(lastOnlineExecutorList);
}
// notify the shards-changed jobs of all enable jobs.
Map>> enabledAndShardsChangedJobShardContent = getEnabledAndShardsChangedJobShardContent(
isAllShardingTask, allEnableJobs, oldOnlineExecutorList, lastOnlineExecutorList);
namespaceShardingContentService
.persistJobsNecessaryInTransaction(enabledAndShardsChangedJobShardContent);
// sharding count ++
increaseShardingCount();
}
} catch (InterruptedException e) {
log.info("{}-{} {} is interrupted", namespaceShardingService.getNamespace(),
namespaceShardingService.getHostValue(), this.getClass().getSimpleName());
Thread.currentThread().interrupt();
} catch (Throwable t) {
log.error(t.getMessage(), t);
if (!isAllShardingTask) { // 如果当前不是全量分片,则需要全量分片来拯救异常
namespaceShardingService.setNeedAllSharding(true);
namespaceShardingService.shardingCountIncrementAndGet();
executorService.submit(new ExecuteAllShardingTask(namespaceShardingService));
} else { // 如果当前是全量分片,则告警并关闭当前服务,重选leader来做事情
raiseAlarm();
shutdownNamespaceShardingService();
}
} finally {
if (isAllShardingTask) { // 如果是全量分片,不再进行全量分片
namespaceShardingService.setNeedAllSharding(false);
}
namespaceShardingService.shardingCountDecrementAndGet();
}
}
private void shutdownNamespaceShardingService() {
try {
namespaceShardingService.shutdownInner(false);
} catch (InterruptedException e) {
log.info("{}-{} {}-shutdownInner is interrupted", namespaceShardingService.getNamespace(),
namespaceShardingService.getHostValue(), this.getClass().getSimpleName());
Thread.currentThread().interrupt();
} catch (Throwable t) {
log.error(t.getMessage(), t);
}
}
private void raiseAlarm() {
if (reportAlarmService != null) {
try {
reportAlarmService.allShardingError(namespaceShardingService.getNamespace(),
namespaceShardingService.getHostValue());
} catch (Throwable t) {
log.error(t.getMessage(), t);
}
}
}
private boolean shardingContentIsChanged(List oldOnlineExecutorList,
List lastOnlineExecutorList) {
return !namespaceShardingContentService.toShardingContent(oldOnlineExecutorList)
.equals(namespaceShardingContentService.toShardingContent(lastOnlineExecutorList));
}
private List copyOnlineExecutorList(List oldOnlineExecutorList) {
List newOnlineExecutorList = new ArrayList<>();
for (Executor oldExecutor : oldOnlineExecutorList) {
Executor newExecutor = new Executor();
newExecutor.setTotalLoadLevel(oldExecutor.getTotalLoadLevel());
newExecutor.setIp(oldExecutor.getIp());
newExecutor.setNoTraffic(oldExecutor.isNoTraffic());
newExecutor.setExecutorName(oldExecutor.getExecutorName());
if (oldExecutor.getJobNameList() != null) {
newExecutor.setJobNameList(new ArrayList());
for (String jobName : oldExecutor.getJobNameList()) {
newExecutor.getJobNameList().add(jobName);
}
}
if (oldExecutor.getShardList() != null) {
newExecutor.setShardList(new ArrayList());
for (Shard oldShard : oldExecutor.getShardList()) {
Shard newShard = new Shard();
newShard.setItem(oldShard.getItem());
newShard.setJobName(oldShard.getJobName());
newShard.setLoadLevel(oldShard.getLoadLevel());
newExecutor.getShardList().add(newShard);
}
}
newOnlineExecutorList.add(newExecutor);
}
return newOnlineExecutorList;
}
/**
* 移除特定作业的shard
*/
protected List removeJobShardsOnExecutors(List lastOnlineTrafficExecutorList, String jobName) {
List removedShards = Lists.newArrayList();
for (int i = 0; i < lastOnlineTrafficExecutorList.size(); i++) {
Executor executor = lastOnlineTrafficExecutorList.get(i);
Iterator iterator = executor.getShardList().iterator();
while (iterator.hasNext()) {
Shard shard = iterator.next();
if (jobName.equals(shard.getJobName())) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel());
iterator.remove();
removedShards.add(shard);
}
}
}
return removedShards;
}
/**
* 修正lastOnlineExecutorList中的jobNameList
*/
protected boolean fixJobNameList(List lastOnlineExecutorList, String jobName) throws Exception {
boolean fixed = false;
for (int i = 0; i < lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
if (executor.getJobNameList() == null) {
executor.setJobNameList(new ArrayList());
}
List jobNameList = executor.getJobNameList();
String jobServersExecutorStatusNodePath = SaturnExecutorsNode
.getJobServersExecutorStatusNodePath(jobName, executor.getExecutorName());
if (curatorFramework.checkExists().forPath(jobServersExecutorStatusNodePath) != null) {
if (!jobNameList.contains(jobName)) {
jobNameList.add(jobName);
fixed = true;
}
} else {
if (jobNameList.contains(jobName)) {
jobNameList.remove(jobName);
fixed = true;
}
}
}
return fixed;
}
private void increaseShardingCount() throws Exception {
Integer shardingCount = 1;
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH) != null) {
byte[] shardingCountData = curatorFramework.getData().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH);
if (shardingCountData != null) {
try {
shardingCount = Integer.parseInt(new String(shardingCountData, StandardCharsets.UTF_8.name())) + 1;
} catch (NumberFormatException e) {
log.error("parse shardingCount error", e);
}
}
curatorFramework.setData().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH,
shardingCount.toString().getBytes(StandardCharsets.UTF_8.name()));
} else {
curatorFramework.create().creatingParentsIfNeeded().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH,
shardingCount.toString().getBytes(StandardCharsets.UTF_8.name()));
}
}
/**
* Get the jobs, that are enabled, and whose shards are changed. Specially, return all enabled jobs when the current
* thread is all-shard-task
* Return the jobs and their shardContent.
*/
private Map>> getEnabledAndShardsChangedJobShardContent(boolean isAllShardingTask,
List allEnableJobs, List oldOnlineExecutorList, List lastOnlineExecutorList) {
Map>> jobShardContent = new HashMap<>();
if (isAllShardingTask) {
for (String enableJob : allEnableJobs) {
Map> lastShardingItems = namespaceShardingContentService
.getShardingItems(lastOnlineExecutorList, enableJob);
jobShardContent.put(enableJob, lastShardingItems);
}
return jobShardContent;
}
List enableJobsPrior = notifyEnableJobsPrior();
for (String enableJob : allEnableJobs) {
Map> lastShardingItems = namespaceShardingContentService
.getShardingItems(lastOnlineExecutorList, enableJob);
// notify prior jobs that are in all enable jobs
if (enableJobsPrior != null && enableJobsPrior.contains(enableJob)) {
jobShardContent.put(enableJob, lastShardingItems);
continue;
}
Map> oldShardingItems = namespaceShardingContentService
.getShardingItems(oldOnlineExecutorList, enableJob);
// just compare whether or not contains the same executorName, and it's shardList
boolean isChanged = false;
Iterator>> oldIterator = oldShardingItems.entrySet().iterator();
wl_loop:
while (oldIterator.hasNext()) {
Entry> next = oldIterator.next();
String executorName = next.getKey();
if (!lastShardingItems.containsKey(executorName)) {
isChanged = true;
break;
}
List shards = next.getValue();
List newShard = lastShardingItems.get(executorName);
if ((shards == null && newShard != null) || (shards != null && newShard == null)) {
isChanged = true;
break;
}
if (shards == null || newShard == null) {
continue;
}
for (Integer shard : shards) {
if (!newShard.contains(shard)) {
isChanged = true;
break wl_loop;
}
}
}
if (!isChanged) {
Iterator>> newIterator = lastShardingItems.entrySet().iterator();
while (newIterator.hasNext()) {
Entry> next = newIterator.next();
String executorName = next.getKey();
if (!oldShardingItems.containsKey(executorName)) {
isChanged = true;
break;
}
List shards = next.getValue();
List oldShard = oldShardingItems.get(executorName);
if ((shards == null && oldShard != null) || (shards != null && oldShard == null)) {
isChanged = true;
break;
}
if (shards == null || oldShard == null) {
continue;
}
if (hasShardChanged(shards, oldShard)) {
isChanged = true;
break;
}
}
}
if (isChanged) {
jobShardContent.put(enableJob, lastShardingItems);
}
}
return jobShardContent;
}
private boolean hasShardChanged(List shards, List oldShard) {
for (Integer shard : shards) {
if (!oldShard.contains(shard)) {
return true;
}
}
return false;
}
protected boolean isLocalMode(String jobName) throws Exception {
String localNodePath = SaturnExecutorsNode.getJobConfigLocalModeNodePath(jobName);
if (curatorFramework.checkExists().forPath(localNodePath) != null) {
byte[] data = curatorFramework.getData().forPath(localNodePath);
if (data != null) {
return Boolean.parseBoolean(new String(data, StandardCharsets.UTF_8.name()));
}
}
return false;
}
protected int getShardingTotalCount(String jobName) throws Exception {
int shardingTotalCount = 0;
String jobConfigShardingTotalCountNodePath = SaturnExecutorsNode
.getJobConfigShardingTotalCountNodePath(jobName);
if (curatorFramework.checkExists().forPath(jobConfigShardingTotalCountNodePath) != null) {
byte[] shardingTotalCountData = curatorFramework.getData().forPath(jobConfigShardingTotalCountNodePath);
if (shardingTotalCountData != null) {
try {
shardingTotalCount = Integer
.parseInt(new String(shardingTotalCountData, StandardCharsets.UTF_8.name()));
} catch (NumberFormatException e) {
log.error("parse shardingTotalCount error, will use the default value", e);
}
}
}
return shardingTotalCount;
}
protected int getLoadLevel(String jobName) throws Exception {
int loadLevel = LOAD_LEVEL_DEFAULT;
String jobConfigLoadLevelNodePath = SaturnExecutorsNode.getJobConfigLoadLevelNodePath(jobName);
if (curatorFramework.checkExists().forPath(jobConfigLoadLevelNodePath) != null) {
byte[] loadLevelData = curatorFramework.getData().forPath(jobConfigLoadLevelNodePath);
try {
if (loadLevelData != null) {
loadLevel = Integer.parseInt(new String(loadLevelData, StandardCharsets.UTF_8.name()));
}
} catch (NumberFormatException e) {
log.error("parse loadLevel error, will use the default value", e);
}
}
return loadLevel;
}
/**
* 获取Executor集合,默认从sharding/content获取
*/
private List getLastOnlineExecutorList() throws Exception {
return namespaceShardingContentService.getExecutorList();
}
/**
* Custom the lastOnlineExecutorList, attention, cannot return null
*/
protected List customLastOnlineExecutorList() throws Exception {
return null;
}
private List getTrafficExecutorList(List executorList) {
List trafficExecutorList = new ArrayList<>();
for (Executor executor : executorList) {
if (!executor.isNoTraffic()) {
trafficExecutorList.add(executor);
}
}
return trafficExecutorList;
}
/**
* 摘取
*
* @param allJobs 该域下所有作业
* @param allEnableJobs 该域下所有启用的作业
* @param shardList 默认为空集合
* @param lastOnlineExecutorList 默认为当前存储的数据,如果不想使用存储数据,请重写{@link #customLastOnlineExecutorList()}}方法
* @param lastOnlineTrafficExecutorList lastOnlineExecutorList中所有noTraffic为false的Executor,注意Executor是同一个对象
* @return true摘取成功;false摘取失败,不需要继续下面的逻辑
*/
protected abstract boolean pick(List allJobs, List allEnableJobs, List shardList,
List lastOnlineExecutorList, List lastOnlineTrafficExecutorList) throws Exception;
/**
* 按照loadLevel降序排序,如果loadLevel相同,按照作业名降序排序
*/
protected void sortShardList(List shardList) {
Collections.sort(shardList, new Comparator() {
@Override
public int compare(Shard o1, Shard o2) {
int loadLevelSub = o2.getLoadLevel() - o1.getLoadLevel();
return loadLevelSub == 0 ? o2.getJobName().compareTo(o1.getJobName()) : loadLevelSub;
}
});
}
private List getNotDockerExecutors(List lastOnlineExecutorList) throws Exception {
// if CONTAINER_ALIGN_WITH_PHYSICAL = false, return all executors; otherwise, return all non-container executors.
if (NamespaceShardingService.CONTAINER_ALIGN_WITH_PHYSICAL) {
return lastOnlineExecutorList;
}
List nonDockerExecutors = new ArrayList<>();
for (int i = 0; i < lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
String executorName = executor.getExecutorName();
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorTaskNodePath(executorName))
== null) {
nonDockerExecutors.add(executor);
}
}
return nonDockerExecutors;
}
protected void putBackBalancing(List allEnableJobs, List shardList,
List lastOnlineExecutorList, List lastOnlineTrafficExecutorList) throws Exception {
if (lastOnlineExecutorList.isEmpty()) {
log.warn("Unnecessary to put shards back to executors balanced because of no executor");
return;
}
sortShardList(shardList);
// 获取所有非容器的executors
List notDockerExecutors = getNotDockerExecutors(lastOnlineTrafficExecutorList);
// 获取shardList中的作业能够被接管的executors
Map> noDockerTrafficExecutorsMapByJob = new HashMap<>();
Map> lastOnlineTrafficExecutorListMapByJob = new HashMap<>();
// 是否为本地模式作业的映射
Map localModeMap = new HashMap<>();
// 是否配置优先节点的作业的映射
Map preferListIsConfiguredMap = new HashMap<>();
// 优先节点的作业的映射
Map> preferListConfiguredMap = new HashMap<>();
// 是否使用非优先节点的作业的映射
Map useDispreferListMap = new HashMap<>();
Iterator iterator = shardList.iterator();
while (iterator.hasNext()) {
String jobName = iterator.next().getJobName();
checkAndPutIntoMap(jobName, filterExecutorsByJob(notDockerExecutors, jobName),
noDockerTrafficExecutorsMapByJob);
checkAndPutIntoMap(jobName, filterExecutorsByJob(lastOnlineTrafficExecutorList, jobName),
lastOnlineTrafficExecutorListMapByJob);
checkAndPutIntoMap(jobName, isLocalMode(jobName), localModeMap);
checkAndPutIntoMap(jobName, preferListIsConfigured(jobName), preferListIsConfiguredMap);
checkAndPutIntoMap(jobName, getPreferListConfigured(jobName), preferListConfiguredMap);
checkAndPutIntoMap(jobName, useDispreferList(jobName), useDispreferListMap);
}
// 整体算法放回算法:拿取Shard,放进负荷最小的executor
// 1、放回localMode的Shard
// 如果配置了preferList,则选取preferList中的executor。
// 如果preferList中的executor都挂了,则不转移;否则,选取没有接管该作业的executor列表的loadLevel最小的一个。
// 如果没有配置preferList,则选取没有接管该作业的executor列表的loadLevel最小的一个。
putBackShardWithLocalMode(shardList, noDockerTrafficExecutorsMapByJob, lastOnlineTrafficExecutorListMapByJob,
localModeMap, preferListIsConfiguredMap, preferListConfiguredMap);
// 2、放回配置了preferList的Shard
putBackShardWithPreferList(shardList, lastOnlineTrafficExecutorListMapByJob, preferListIsConfiguredMap,
preferListConfiguredMap, useDispreferListMap);
// 3、放回没有配置preferList的Shard
putBackShardWithoutPreferList(shardList, noDockerTrafficExecutorsMapByJob);
}
private void checkAndPutIntoMap(String key, T value, Map targetMap) {
if (!targetMap.containsKey(key)) {
targetMap.put(key, value);
}
}
private void putBackShardWithoutPreferList(List shardList,
Map> noDockerTrafficExecutorsMapByJob) {
Iterator iterator = shardList.iterator();
while (iterator.hasNext()) {
Shard shard = iterator.next();
List executors = noDockerTrafficExecutorsMapByJob.get(shard.getJobName());
Executor executor = ENABLE_JOB_BASED_SHARDING ?
getExecutorWithMinJobLoadLevel(executors, shard.getJobName()) :
getExecutorWithMinLoadLevel(executors);
putShardIntoExecutor(shard, executor);
iterator.remove();
}
}
private void putBackShardWithPreferList(List shardList,
Map> lastOnlineTrafficExecutorListMapByJob,
Map preferListIsConfiguredMap, Map> preferListConfiguredMap,
Map useDispreferListMap) {
Iterator iterator = shardList.iterator();
while (iterator.hasNext()) {
Shard shard = iterator.next();
String jobName = shard.getJobName();
if (preferListIsConfiguredMap.get(jobName)) { // fix,
// preferList为空不能作为判断是否配置preferList的依据,比如说配置了容器资源,但是全部下线了。
List preferList = preferListConfiguredMap.get(jobName);
List preferExecutorList = getPreferExecutors(lastOnlineTrafficExecutorListMapByJob, jobName,
preferList);
// 如果preferList的Executor都offline,则放回到全部online的Executor中某一个。如果是这种情况,则后续再操作,避免不均衡的情况
// 如果存在preferExecutor,择优放回
if (!preferExecutorList.isEmpty()) {
Executor executor = ENABLE_JOB_BASED_SHARDING ?
getExecutorWithMinJobLoadLevel(preferExecutorList, jobName) :
getExecutorWithMinLoadLevel(preferExecutorList);
putShardIntoExecutor(shard, executor);
iterator.remove();
} else { // 如果不存在preferExecutor
// 如果“只使用preferExecutor”,则丢弃;否则,等到后续(在第3步)进行放回操作,避免不均衡的情况
if (!useDispreferListMap.get(jobName)) {
iterator.remove();
}
}
}
}
}
private void putBackShardWithLocalMode(List shardList,
Map> noDockerTrafficExecutorsMapByJob,
Map> lastOnlineTrafficExecutorListMapByJob, Map localModeMap,
Map preferListIsConfiguredMap, Map> preferListConfiguredMap) {
Iterator iterator = shardList.iterator();
while (iterator.hasNext()) {
Shard shard = iterator.next();
String jobName = shard.getJobName();
if (!localModeMap.get(jobName)) {
continue;
}
if (preferListIsConfiguredMap.get(jobName)) {
List preferListConfigured = preferListConfiguredMap.get(jobName);
if (!preferListConfigured.isEmpty()) {
List preferExecutorList = getPreferExecutors(lastOnlineTrafficExecutorListMapByJob,
jobName, preferListConfigured);
if (!preferExecutorList.isEmpty()) {
Executor executor = getExecutorWithMinLoadLevelAndNoThisJob(preferExecutorList, jobName);
putShardIntoExecutor(shard, executor);
}
}
} else {
Executor executor = getExecutorWithMinLoadLevelAndNoThisJob(
noDockerTrafficExecutorsMapByJob.get(jobName), jobName);
putShardIntoExecutor(shard, executor);
}
iterator.remove();
}
}
private List getPreferExecutors(Map> lastOnlineTrafficExecutorListMapByJob,
String jobName, List preferListConfigured) {
List preferExecutorList = new ArrayList<>();
List lastOnlineTrafficExecutorListByJob = lastOnlineTrafficExecutorListMapByJob.get(jobName);
for (int i = 0; i < lastOnlineTrafficExecutorListByJob.size(); i++) {
Executor executor = lastOnlineTrafficExecutorListByJob.get(i);
if (preferListConfigured.contains(executor.getExecutorName())) {
preferExecutorList.add(executor);
}
}
return preferExecutorList;
}
/**
* 是否使用非preferList:
* 1、存在结点,并且该结点值为false,返回false;
* 2、其他情况,返回true
*/
protected boolean useDispreferList(String jobName) throws Exception {
String jobConfigUseDispreferListNodePath = SaturnExecutorsNode.getJobConfigUseDispreferListNodePath(jobName);
if (curatorFramework.checkExists().forPath(jobConfigUseDispreferListNodePath) != null) {
byte[] useDispreferListData = curatorFramework.getData().forPath(jobConfigUseDispreferListNodePath);
if (useDispreferListData != null && !Boolean
.parseBoolean(new String(useDispreferListData, StandardCharsets.UTF_8.name()))) {
return false;
}
}
return true;
}
private Executor getExecutorWithMinLoadLevel(List executorList) {
Executor minLoadLevelExecutor = null;
for (int i = 0; i < executorList.size(); i++) {
Executor executor = executorList.get(i);
if (minLoadLevelExecutor == null || minLoadLevelExecutor.getTotalLoadLevel() > executor
.getTotalLoadLevel()) {
minLoadLevelExecutor = executor;
}
}
return minLoadLevelExecutor;
}
/**
* 获取该作业负荷最小的executor,如果相同,那么取所有作业负荷最小的executor
*/
private Executor getExecutorWithMinJobLoadLevel(List executorList, String jobName) {
Executor minLoadLevelExecutor = null;
int minTotalLoadLevel = 0;
for (int i = 0; i < executorList.size(); i++) {
Executor executor = executorList.get(i);
int totalJobLoadLevel = getTotalJobLoadLevel(executor, jobName);
if (minLoadLevelExecutor == null || minTotalLoadLevel > totalJobLoadLevel
|| minTotalLoadLevel == totalJobLoadLevel && minLoadLevelExecutor.getTotalLoadLevel() > executor
.getTotalLoadLevel()) {
minLoadLevelExecutor = executor;
minTotalLoadLevel = totalJobLoadLevel;
}
}
return minLoadLevelExecutor;
}
/**
* 计算指定作业在特定executor上的负荷
*/
private int getTotalJobLoadLevel(Executor executor, String jobName) {
int totalJobLoadLevel = 0;
List shardList = executor.getShardList();
for (int i = 0; i < shardList.size(); i++) {
Shard shard = shardList.get(i);
if (jobName.equals(shard.getJobName())) {
totalJobLoadLevel += shard.getLoadLevel();
}
}
return totalJobLoadLevel;
}
private Executor getExecutorWithMinLoadLevelAndNoThisJob(List executorList, String jobName) {
Executor minLoadLevelExecutor = null;
for (int i = 0; i < executorList.size(); i++) {
Executor executor = executorList.get(i);
List shardList = executor.getShardList();
boolean containThisJob = false;
for (int j = 0; j < shardList.size(); j++) {
Shard shard = shardList.get(j);
if (shard.getJobName().equals(jobName)) {
containThisJob = true;
break;
}
}
if (!containThisJob && (minLoadLevelExecutor == null || minLoadLevelExecutor.getTotalLoadLevel() > executor
.getTotalLoadLevel())) {
minLoadLevelExecutor = executor;
}
}
return minLoadLevelExecutor;
}
private void putShardIntoExecutor(Shard shard, Executor executor) {
if (executor != null) {
if (isIn(shard, executor.getShardList())) {
log.error("The shard({}-{}) is running in the executor of {}, cannot be put again", shard.getJobName(),
shard.getItem(), executor.getExecutorName());
} else {
executor.getShardList().add(shard);
executor.setTotalLoadLevel(executor.getTotalLoadLevel() + shard.getLoadLevel());
}
} else {
log.info("No executor to take over the shard: {}-{}", shard.getJobName(), shard.getItem());
}
}
/**
* 获取该域下的所有作业
*/
private List getAllJobs() throws Exception {
List allJob = new ArrayList<>();
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.JOBSNODE_PATH) == null) {
curatorFramework.create().creatingParentsIfNeeded().forPath(SaturnExecutorsNode.JOBSNODE_PATH);
}
List tmp = curatorFramework.getChildren().forPath(SaturnExecutorsNode.JOBSNODE_PATH);
if (tmp != null) {
allJob.addAll(tmp);
}
return allJob;
}
/**
* 获取该域下的所有enable的作业
*/
protected List getAllEnableJobs(List allJob) throws Exception {
List allEnableJob = new ArrayList<>();
for (int i = 0; i < allJob.size(); i++) {
String job = allJob.get(i);
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.getJobConfigEnableNodePath(job)) != null) {
byte[] enableData = curatorFramework.getData()
.forPath(SaturnExecutorsNode.getJobConfigEnableNodePath(job));
if (enableData != null && Boolean.parseBoolean(new String(enableData, StandardCharsets.UTF_8.name()))) {
allEnableJob.add(job);
}
}
}
return allEnableJob;
}
protected boolean isIn(Shard shard, List shardList) {
for (int i = 0; i < shardList.size(); i++) {
Shard tmp = shardList.get(i);
if (tmp.getJobName().equals(shard.getJobName()) && tmp.getItem() == shard.getItem()) {
return true;
}
}
return false;
}
protected boolean preferListIsConfigured(String jobName) throws Exception {
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName))
!= null) {
byte[] preferListData = curatorFramework.getData()
.forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName));
if (preferListData != null) {
return new String(preferListData, StandardCharsets.UTF_8.name()).trim().length() > 0;
}
}
return false;
}
/**
* 获取配置态的preferList,即使配置的executor不存在,也会返回。 特别的是,对于docker task,如果存在,才去解析出executor列表。
*/
protected List getPreferListConfigured(String jobName) throws Exception {
List preferList = new ArrayList<>();
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName))
!= null) {
byte[] preferListData = curatorFramework.getData()
.forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName));
if (preferListData != null) {
List allExistsExecutors = getAllExistingExecutors();
String[] split = new String(preferListData, StandardCharsets.UTF_8.name()).split(",");
for (String tmp : split) {
String tmpTrim = tmp.trim();
if (!"".equals(tmpTrim)) {
fillRealPreferListIfIsDockerOrNot(preferList, tmpTrim, allExistsExecutors);
}
}
}
}
return preferList;
}
private List getAllExistingExecutors() throws Exception {
List allExistsExecutors = new ArrayList<>();
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorsNodePath()) != null) {
List executors = curatorFramework.getChildren().forPath(SaturnExecutorsNode.getExecutorsNodePath());
if (executors != null) {
allExistsExecutors.addAll(executors);
}
}
return allExistsExecutors;
}
/**
* 如果prefer不是docker容器,并且preferList不包含,则直接添加;
*
* 如果prefer是docker容器(以@开头),则prefer为task,获取该task下的所有executor,如果不包含,添加进preferList。
*/
private void fillRealPreferListIfIsDockerOrNot(List preferList, String prefer,
List allExistsExecutors) throws Exception {
if (!prefer.startsWith("@")) { // not docker server
if (!preferList.contains(prefer)) {
preferList.add(prefer);
}
return;
}
String task = prefer.substring(1);
for (int i = 0; i < allExistsExecutors.size(); i++) {
String executor = allExistsExecutors.get(i);
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorTaskNodePath(executor)) != null) {
byte[] taskData = curatorFramework.getData()
.forPath(SaturnExecutorsNode.getExecutorTaskNodePath(executor));
if (taskData != null && task.equals(new String(taskData, StandardCharsets.UTF_8.name())) && !preferList
.contains(executor)) {
preferList.add(executor);
}
}
}
}
protected List filterExecutorsByJob(List executorList, String jobName) {
List executorListByJob = new ArrayList<>();
for (int i = 0; i < executorList.size(); i++) {
Executor executor = executorList.get(i);
List jobNameList = executor.getJobNameList();
if (jobNameList != null && jobNameList.contains(jobName)) {
executorListByJob.add(executor);
}
}
return executorListByJob;
}
private List getPreferListOnlineByJob(String jobName, List preferListConfigured,
List lastOnlineExecutorList) {
List preferListOnlineByJob = new ArrayList<>();
for (int i = 0; i < lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
if (preferListConfigured.contains(executor.getExecutorName()) && executor.getJobNameList()
.contains(jobName)) {
preferListOnlineByJob.add(executor);
}
}
return preferListOnlineByJob;
}
private List createShards(String jobName, int number, int loadLevel) {
List shards = new ArrayList<>();
for (int i = 0; i < number; i++) {
Shard shard = new Shard();
shard.setJobName(jobName);
shard.setItem(i);
shard.setLoadLevel(loadLevel);
shards.add(shard);
}
return shards;
}
protected List createShards(String jobName, List lastOnlineExecutorList) throws Exception {
List shardList = new ArrayList<>();
boolean preferListIsConfigured = preferListIsConfigured(jobName);
List preferListConfigured = getPreferListConfigured(jobName);
List preferListOnlineByJob = getPreferListOnlineByJob(jobName, preferListConfigured,
lastOnlineExecutorList);
boolean localMode = isLocalMode(jobName);
int shardingTotalCount = getShardingTotalCount(jobName);
int loadLevel = getLoadLevel(jobName);
if (localMode) {
if (preferListIsConfigured) {
// 如果当前存在优先节点在线,则新建在线的优先节点的数量的分片
if (!preferListOnlineByJob.isEmpty()) {
shardList.addAll(createShards(jobName, preferListOnlineByJob.size(), loadLevel));
}
} else {
// 新建在线的executor的数量的分片
shardList.addAll(createShards(jobName, lastOnlineExecutorList.size(), loadLevel));
}
} else {
// 新建shardingTotalCount数量的分片
shardList.addAll(createShards(jobName, shardingTotalCount, loadLevel));
}
return shardList;
}
protected boolean getExecutorNoTraffic(String executorName) throws Exception {
return curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorNoTrafficNodePath(executorName))
!= null;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy