
com.vip.saturn.job.sharding.service.NamespaceShardingService Maven / Gradle / Ivy
package com.vip.saturn.job.sharding.service;
import com.vip.saturn.job.sharding.entity.Executor;
import com.vip.saturn.job.sharding.entity.Shard;
import com.vip.saturn.job.sharding.node.SaturnExecutorsNode;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.api.transaction.CuratorTransactionFinal;
import org.apache.curator.framework.recipes.leader.LeaderLatch;
import org.apache.zookeeper.CreateMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
/**
*
* @author xiaopeng.he
*/
public class NamespaceShardingService {
static Logger log = LoggerFactory.getLogger(NamespaceShardingService.class);
private static final int LOAD_LEVEL_DEFAULT = 1;
private CuratorFramework curatorFramework;
private AtomicInteger shardingCount;
private AtomicBoolean needAllSharding;
private ExecutorService executorService;
private String namespace;
private String hostValue;
private NamespaceShardingContentService namespaceShardingContentService;
public NamespaceShardingService(CuratorFramework curatorFramework, String hostValue) {
this.curatorFramework = curatorFramework;
this.hostValue = hostValue;
this.shardingCount = new AtomicInteger(0);
this.needAllSharding = new AtomicBoolean(false);
this.executorService = newSingleThreadExecutor();
this.namespace = curatorFramework.getNamespace();
this.namespaceShardingContentService = new NamespaceShardingContentService(curatorFramework);
}
private ExecutorService newSingleThreadExecutor() {
return Executors.newSingleThreadExecutor(new ThreadFactory() {
@Override
public Thread newThread(Runnable r) {
return new Thread(r, namespace + "-" + r.getClass().getSimpleName());
}
});
}
private abstract class AbstractAsyncShardingTask implements Runnable {
protected abstract void logStartInfo();
@Override
public void run() {
logStartInfo();
boolean isAllShardingTask = this instanceof ExecuteAllShardingTask;
try {
// 如果当前变为非leader,则直接返回
if(!isLeadership()) {
return;
}
// 如果需要全量分片,且当前线程不是全量分片线程,则直接返回,没必要做分片
if(needAllSharding.get() && !isAllShardingTask) {
log.info("the {} will be ignored, because there will be {}", this.getClass().getSimpleName(), ExecuteAllShardingTask.class.getSimpleName());
return;
}
List allJobs = getAllJobs();
List allEnableJobs = getAllEnableJobs(allJobs);
List lastOnlineExecutorList = getLastOnlineExecutorList();
List shardList = new ArrayList<>();
// 摘取
if(pick(allEnableJobs, shardList, lastOnlineExecutorList)) {
// 放回
putBackBalancing(allEnableJobs, shardList, lastOnlineExecutorList);
// 如果当前变为非leader,则返回
if (!isLeadership()) {
return;
}
// 持久化分片结果
namespaceShardingContentService.persistDirectly(lastOnlineExecutorList);
// fix, notify all enable jobs whatever.
notifyJobShardingNecessary(allEnableJobs);
// sharding count ++
increaseShardingCount();
}
} catch (Throwable t) {
log.error(t.getMessage(), t);
if(!isAllShardingTask) { // 如果当前不是全量分片,则需要全量分片来拯救异常; 如果当前是全量分片,不再全量分片
needAllSharding.set(true);
shardingCount.incrementAndGet();
executorService.submit(new ExecuteAllShardingTask());
}
} finally {
if(isAllShardingTask) { // 如果是全量分片,不再进行全量分片
needAllSharding.set(false);
}
shardingCount.decrementAndGet();
}
}
private void increaseShardingCount() throws Exception {
Integer _shardingCount = 1;
if (null != curatorFramework.checkExists().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH)) {
byte[] shardingCountData = curatorFramework.getData().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH);
if(shardingCountData != null) {
try {
_shardingCount = Integer.parseInt(new String(shardingCountData, "UTF-8")) + 1;
} catch (NumberFormatException e) {
log.error(e.getMessage(), e);
}
}
curatorFramework.setData().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH, _shardingCount.toString().getBytes());
} else {
curatorFramework.create().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH, _shardingCount.toString().getBytes());
}
}
private void notifyJobShardingNecessary(List allEnableJobs) throws Exception {
CuratorTransactionFinal curatorTransactionFinal = curatorFramework.inTransaction().check().forPath("/").and();
for(int i=0; i getLastOnlineExecutorList() throws Exception {
return namespaceShardingContentService.getExecutorList();
}
/**
* 摘取
* @param allEnableJobs 该域下所有启用的作业
* @param shardList 默认为空集合
* @param lastOnlineExecutorList 默认为当前存储的数据,如果不想使用存储数据,请重写{@link #getLastOnlineExecutorList()}}方法
* @return true摘取成功;false摘取失败,不需要继续下面的逻辑
*/
protected abstract boolean pick(List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception;
/**
* 按照loadLevel降序排序,如果loadLevel相同,按照作业名降序排序
*/
protected void sortShardList(List shardList) {
Collections.sort(shardList, new Comparator() {
@Override
public int compare(Shard o1, Shard o2) {
int loadLevelSub = o2.getLoadLevel() - o1.getLoadLevel();
return loadLevelSub == 0 ? o2.getJobName().compareTo(o1.getJobName()) : loadLevelSub;
}
});
}
private List getNotDockerExecutors(List lastOnlineExecutorList) throws Exception {
List notDockerExecutors = new ArrayList<>();
for(int i=0; i allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception {
if(lastOnlineExecutorList.isEmpty()) {
log.warn("Unnecessary to put shards back to executors balanced because of no executor");
return;
}
sortShardList(shardList);
// 获取非容器executor
List notDockerExecutors = getNotDockerExecutors(lastOnlineExecutorList);
// 获取shardList中的作业能够被接管的executors
Map> notDockerExecutorsMapByJob = new HashMap<>();
Map> lastOnlineExecutorListMapByJob = new HashMap<>();
// 是否为本地模式作业的映射
Map localModeMap = new HashMap<>();
// 是否配置优先节点的作业的映射
Map preferListIsConfiguredMap = new HashMap<>();
// 优先节点的作业的映射
Map> preferListConfiguredMap = new HashMap<>();
// 是否使用非优先节点的作业的映射
Map useDispreferListMap = new HashMap<>();
Iterator iterator0 = shardList.iterator();
while(iterator0.hasNext()) {
String jobName = iterator0.next().getJobName();
if(!notDockerExecutorsMapByJob.containsKey(jobName)) {
notDockerExecutorsMapByJob.put(jobName, filterExecutorsByJob(notDockerExecutors, jobName));
}
if(!lastOnlineExecutorListMapByJob.containsKey(jobName)) {
lastOnlineExecutorListMapByJob.put(jobName, filterExecutorsByJob(lastOnlineExecutorList, jobName));
}
if(!localModeMap.containsKey(jobName)) {
localModeMap.put(jobName, isLocalMode(jobName));
}
if(!preferListIsConfiguredMap.containsKey(jobName)) {
preferListIsConfiguredMap.put(jobName, preferListIsConfigured(jobName));
}
if(!preferListConfiguredMap.containsKey(jobName)) {
preferListConfiguredMap.put(jobName, getPreferListConfigured(jobName));
}
if(!useDispreferListMap.containsKey(jobName)) {
useDispreferListMap.put(jobName, useDispreferList(jobName));
}
}
// 整体算法放回算法:拿取Shard,放进负荷最小的executor
// 1、放回localMode的Shard
// 如果配置了preferList,则选取preferList中的executor。 如果preferList中的executor都挂了,则不转移;否则,选取没有接管该作业的executor列表的loadLevel最小的一个。
// 如果没有配置preferList,则选取没有接管该作业的executor列表的loadLevel最小的一个。
Iterator shardIterator = shardList.iterator();
while(shardIterator.hasNext()) {
Shard shard = shardIterator.next();
String jobName = shard.getJobName();
if(localModeMap.get(jobName)) {
if(preferListIsConfiguredMap.get(jobName)) {
List preferListConfigured = preferListConfiguredMap.get(jobName);
if (!preferListConfigured.isEmpty()) {
List preferExecutorList = new ArrayList<>();
List lastOnlineExecutorListByJob = lastOnlineExecutorListMapByJob.get(jobName);
for (int i = 0; i < lastOnlineExecutorListByJob.size(); i++) {
Executor executor = lastOnlineExecutorListByJob.get(i);
if (preferListConfigured.contains(executor.getExecutorName())) {
preferExecutorList.add(executor);
}
}
if (!preferExecutorList.isEmpty()) {
Executor executor = getExecutorWithMinLoadLevelAndNoThisJob(preferExecutorList, jobName);
putShardIntoExecutor(shard, executor);
}
}
} else {
Executor executor = getExecutorWithMinLoadLevelAndNoThisJob(notDockerExecutorsMapByJob.get(jobName), jobName);
putShardIntoExecutor(shard, executor);
}
shardIterator.remove();
}
}
// 2、放回配置了preferList的Shard
Iterator shardIterator2 = shardList.iterator();
while(shardIterator2.hasNext()) {
Shard shard = shardIterator2.next();
String jobName = shard.getJobName();
if(preferListIsConfiguredMap.get(jobName)) { // fix, preferList为空不能作为判断是否配置preferList的依据,比如说配置了容器资源,但是全部下线了。
List preferList = preferListConfiguredMap.get(jobName);
List preferExecutorList = new ArrayList<>();
List lastOnlineExecutorListByJob = lastOnlineExecutorListMapByJob.get(jobName);
for(int i=0; i shardIterator3 = shardList.iterator();
while(shardIterator3.hasNext()) {
Shard shard = shardIterator3.next();
Executor executor = getExecutorWithMinLoadLevel(notDockerExecutorsMapByJob.get(shard.getJobName()));
putShardIntoExecutor(shard, executor);
shardIterator3.remove();
}
}
/**
* 是否使用非preferList
* 1、存在结点,并且该结点值为false,返回false;
* 2、其他情况,返回true
*/
protected boolean useDispreferList(String jobName) {
try {
String jobConfigUseDispreferListNodePath = SaturnExecutorsNode.getJobConfigUseDispreferListNodePath(jobName);
if (curatorFramework.checkExists().forPath(jobConfigUseDispreferListNodePath) != null) {
byte[] useDispreferListData = curatorFramework.getData().forPath(jobConfigUseDispreferListNodePath);
if (useDispreferListData != null && !Boolean.valueOf(new String(useDispreferListData, "UTF-8"))) {
return false;
}
}
return true;
} catch (Exception e) {
log.error(e.getMessage(), e);
return true;
}
}
private Executor getExecutorWithMinLoadLevel(List executorList) {
Executor minLoadLevelExecutor = null;
for(int i=0; i executor.getTotalLoadLevel()) {
minLoadLevelExecutor = executor;
}
}
return minLoadLevelExecutor;
}
private Executor getExecutorWithMinLoadLevelAndNoThisJob(List executorList, String jobName) {
Executor minLoadLevelExecutor = null;
for(int i=0; i shardList = executor.getShardList();
boolean containThisJob = false;
for(int j=0; j executor.getTotalLoadLevel())) {
minLoadLevelExecutor = executor;
}
}
return minLoadLevelExecutor;
}
private void putShardIntoExecutor(Shard shard, Executor executor) {
if(executor != null) {
if(isIn(shard, executor.getShardList())) {
log.error("The shard({}-{}) is running in the executor of {}, cannot be put again", shard.getJobName(), shard.getItem(), executor.getExecutorName());
} else {
executor.getShardList().add(shard);
executor.setTotalLoadLevel(executor.getTotalLoadLevel() + shard.getLoadLevel());
}
} else {
log.info("No executor to take over the shard: {}-{}", shard.getJobName(), shard.getItem());
}
}
/**
* 获取该域下的所有作业
*/
private List getAllJobs() throws Exception {
List allJob = new ArrayList<>();
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.$JOBSNODE_PATH) == null) {
curatorFramework.create().creatingParentsIfNeeded().forPath(SaturnExecutorsNode.$JOBSNODE_PATH);
}
List tmp = curatorFramework.getChildren().forPath(SaturnExecutorsNode.$JOBSNODE_PATH);
if(tmp != null) {
allJob.addAll(tmp);
}
return allJob;
}
/**
* 获取该域下的所有enable的作业
*/
protected List getAllEnableJobs(List allJob) throws Exception {
List allEnableJob = new ArrayList<>();
for(int i=0; i shardList) {
for(int i=0; i 0;
}
}
return false;
}
/**
* 获取配置态的preferList,即使配置的executor不存在,也会返回。 特别的是,对于docker task,如果存在,才去解析出executor列表。
*/
protected List getPreferListConfigured(String jobName) throws Exception {
List preferList = new ArrayList<>();
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName)) != null) {
byte[] preferListData = curatorFramework.getData().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName));
if(preferListData != null) {
List allExistsExecutors = getAllExistingExecutors();
String[] split = new String(preferListData, "UTF-8").split(",");
for(String tmp : split) {
String tmpTrim = tmp.trim();
if(!"".equals(tmpTrim)) {
fillRealPreferListIfIsDockerOrNot(preferList, tmpTrim, allExistsExecutors);
}
}
}
}
return preferList;
}
private List getAllExistingExecutors() throws Exception {
List allExistsExecutors = new ArrayList<>();
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorsNodePath()) != null) {
List executors = curatorFramework.getChildren().forPath(SaturnExecutorsNode.getExecutorsNodePath());
if(executors != null) {
allExistsExecutors.addAll(executors);
}
}
return allExistsExecutors;
}
/**
* 如果prefer不是docker容器,并且preferList不包含,则直接添加;
* 如果prefer是docker容器(以@开头),则prefer为task,获取该task下的所有executor,如果不包含,添加进preferList。
*/
private void fillRealPreferListIfIsDockerOrNot(List preferList, String prefer, List allExistsExecutors) throws Exception {
if(!prefer.startsWith("@")) { // not docker server
if(!preferList.contains(prefer)) {
preferList.add(prefer);
}
} else { // docker server, get the real executorList by task
String task = prefer.substring(1);
for(int i=0; i filterExecutorsByJob(List executorList, String jobName) throws Exception {
List executorListByJob = new ArrayList<>();
for(int i=0; i jobNameList = executor.getJobNameList();
if(jobNameList != null && jobNameList.contains(jobName)) {
executorListByJob.add(executor);
}
}
return executorListByJob;
}
private List getPreferListOnlineByJob(String jobName, List preferListConfigured, List lastOnlineExecutorList) {
List preferListOnlineByJob = new ArrayList<>();
for(int i=0; i createShards(String jobName, int number, int loadLevel) {
List shards = new ArrayList<>();
for(int i=0; i createShards(String jobName, List lastOnlineExecutorList) throws Exception {
List shardList = new ArrayList<>();
boolean preferListIsConfigured = preferListIsConfigured(jobName);
List preferListConfigured = getPreferListConfigured(jobName);
List preferListOnlineByJob = getPreferListOnlineByJob(jobName, preferListConfigured, lastOnlineExecutorList);
boolean localMode = isLocalMode(jobName);
int shardingTotalCount = getShardingTotalCount(jobName);
int loadLevel = getLoadLevel(jobName);
if(localMode) {
if(preferListIsConfigured) {
// 如果当前存在优先节点在线,则新建在线的优先节点的数量的分片
if(!preferListOnlineByJob.isEmpty()) {
shardList.addAll(createShards(jobName, preferListOnlineByJob.size(), loadLevel));
}
} else {
// 新建在线的executor的数量的分片
shardList.addAll(createShards(jobName, lastOnlineExecutorList.size(), loadLevel));
}
} else {
// 新建shardingTotalCount数量的分片
shardList.addAll(createShards(jobName, shardingTotalCount, loadLevel));
}
return shardList;
}
}
/**
* 域下重排,移除已经存在所有executor,重新获取executors,重新获取作业shards
*/
private class ExecuteAllShardingTask extends AbstractAsyncShardingTask {
@Override
protected void logStartInfo() {
log.info("Execute the {} ", this.getClass().getSimpleName());
}
@Override
protected boolean pick(List allEnableJob, List shardList, List lastOnlineExecutorList) throws Exception {
// 从$SaturnExecutors节点下,获取所有正在运行的Executor
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorsNodePath()) != null) {
List zkExecutors = curatorFramework.getChildren().forPath(SaturnExecutorsNode.getExecutorsNodePath());
if(zkExecutors != null) {
for(int i=0; i());
executor.setJobNameList(getJobNameListSupportedByExecutor(zkExecutor, allEnableJob));
lastOnlineExecutorList.add(executor);
}
}
}
}
}
// 获取该域下所有作业的所有分片
for(int i=0; i getJobNameListSupportedByExecutor(String executorName, List allEnableJob) throws Exception {
List jobNameList = new ArrayList<>();
for(int i=0; i getLastOnlineExecutorList() {
return new ArrayList<>();
}
}
/**
* executor上线,仅仅添加executor空壳,如果其不存在;如果已经存在,重新设置下ip,防止ExecuteJobServerOnlineShardingTask先于执行而没设ip
* 特别的,如果当前没有executor,也就是这是第一台executor上线,则需要域全量分片,因为可能已经有作业处理启用状态了。
*/
private class ExecuteOnlineShardingTask extends AbstractAsyncShardingTask {
private String executorName;
private String ip;
public ExecuteOnlineShardingTask(String executorName, String ip) {
this.executorName = executorName;
this.ip = ip;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} online", this.getClass().getSimpleName(), executorName);
}
@Override
protected boolean pick(List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception {//NOSONAR
// 如果没有Executor在运行,则需要进行全量分片
if(lastOnlineExecutorList.isEmpty()) {
log.warn("There are no running executors, need all sharding");
needAllSharding.set(true);
shardingCount.incrementAndGet();
executorService.submit(new ExecuteAllShardingTask());
return false;
}
Executor executor = null;
boolean included = false;
for(int i=0; i< lastOnlineExecutorList.size(); i++) {
Executor tmp = lastOnlineExecutorList.get(i);
if(tmp.getExecutorName().equals(executorName)) {
included = true;
executor = tmp;
break;
}
}
if(!included) {
executor = new Executor();
executor.setExecutorName(executorName);
executor.setIp(ip);
executor.setShardList(new ArrayList());
executor.setJobNameList(new ArrayList());
lastOnlineExecutorList.add(executor);
} else { // 重新设置下ip
executor.setIp(ip);
}
return true;
}
}
/**
* executor下线,摘取该executor运行的所有非本地模式作业,移除该executor
*/
private class ExecuteOfflineShardingTask extends AbstractAsyncShardingTask {
private String executorName;
public ExecuteOfflineShardingTask(String executorName) {
this.executorName = executorName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} offline", this.getClass().getSimpleName(), executorName);
}
@Override
protected boolean pick(List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception {
/**
* 摘取下线的executor全部Shard
*/
boolean wasOffline = true;
Iterator iterator = lastOnlineExecutorList.iterator();
while(iterator.hasNext()) {
Executor executor = iterator.next();
if(executor.getExecutorName().equals(executorName)) {
wasOffline = false;
iterator.remove();
shardList.addAll(executor.getShardList());
break;
}
}
// 如果该executor实际上已经在此之前下线,则摘取失败
if(wasOffline) {
return false;
}
// 移除本地模式的作业分片
Iterator shardIterator = shardList.iterator();
while(shardIterator.hasNext()) {
Shard shard = shardIterator.next();
if(isLocalMode(shard.getJobName())) {
shardIterator.remove();
}
}
return true;
}
}
/**
* 作业启用,获取该作业的shards,注意要过滤不能运行该作业的executors
*/
private class ExecuteJobEnableShardingTask extends AbstractAsyncShardingTask {
private String jobName;
public ExecuteJobEnableShardingTask(String jobName) {
this.jobName = jobName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} enable", this.getClass().getSimpleName(), jobName);
}
@Override
protected boolean pick(List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception {
// 移除已经在Executor运行的该作业的所有Shard
boolean hasRemove = false;
for (int i = 0; i < lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
Iterator iterator = executor.getShardList().iterator();
while (iterator.hasNext()) {
Shard shard = iterator.next();
if (jobName.equals(shard.getJobName())) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel());
iterator.remove();
hasRemove = true;
}
}
}
// 获取该作业的Shard
shardList.addAll(createShards(jobName, lastOnlineExecutorList));
// 如果shardList为空,并且没有移除shard,则没必要再进行放回等操作,摘取失败
if (shardList.isEmpty() && !hasRemove) {
return false;
}
return true;
}
}
/**
* 作业禁用,摘取所有executor运行的该作业的shard,注意要相应地减loadLevel,不需要放回
*/
private class ExecuteJobDisableShardingTask extends AbstractAsyncShardingTask {
private String jobName;
public ExecuteJobDisableShardingTask(String jobName) {
this.jobName = jobName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} disable", this.getClass().getSimpleName(), jobName);
}
@Override
protected boolean pick(List allEnableJobs, List shardList, List lastOnlineExecutorList) {
// 摘取所有该作业的Shard
for(int i=0; i< lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
Iterator iterator = executor.getShardList().iterator();
while(iterator.hasNext()) {
Shard shard = iterator.next();
if (shard.getJobName().equals(jobName)) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel());
iterator.remove();
shardList.add(shard);
}
}
}
// 如果shardList为空,则没必要进行放回等操作,摘取失败
if(shardList.isEmpty()) {
return false;
}
return true;
}
@Override
protected void putBackBalancing(List allEnableJobs, List shardList, List lastOnlineExecutorList) {
// 不做操作
}
}
/**
* 作业重排,移除所有executor的该作业shard,重新获取该作业的shards,finally删除forceShard结点
*/
private class ExecuteJobForceShardShardingTask extends AbstractAsyncShardingTask {
private String jobName;
public ExecuteJobForceShardShardingTask(String jobName) {
this.jobName = jobName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {} with {} forceShard", this.getClass().getSimpleName(), jobName);
}
@Override
public void run() {
try {
super.run();
} finally {
deleteForceShardNode();
}
}
private void deleteForceShardNode() {
try {
String jobConfigForceShardNodePath = SaturnExecutorsNode.getJobConfigForceShardNodePath(jobName);
if (curatorFramework.checkExists().forPath(jobConfigForceShardNodePath) != null) {
curatorFramework.delete().forPath(jobConfigForceShardNodePath);
}
} catch (Throwable t) {
log.error("delete forceShard node error", t);
}
}
@Override
protected boolean pick(List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception {
// 移除已经在Executor运行的该作业的所有Shard
boolean hasRemove = false;
for (int i = 0; i < lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
Iterator iterator = executor.getShardList().iterator();
while (iterator.hasNext()) {
Shard shard = iterator.next();
if (jobName.equals(shard.getJobName())) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel());
iterator.remove();
hasRemove = true;
}
}
}
if(allEnableJobs.contains(jobName)) {
// 获取该作业的Shard
shardList.addAll(createShards(jobName, lastOnlineExecutorList));
}
// 如果shardList为空,并且没有移除shard,则没必要再进行放回等操作,摘取失败
if (shardList.isEmpty() && !hasRemove) {
return false;
}
return true;
}
}
/**
* 作业的executor上线,executor级别平衡摘取,但是只能摘取该作业的shard;添加的新的shard
*/
private class ExecuteJobServerOnlineShardingTask extends AbstractAsyncShardingTask {
private String jobName;
private String executorName;
public ExecuteJobServerOnlineShardingTask(String jobName, String executorName) {
this.jobName = jobName;
this.executorName = executorName;
}
@Override
protected void logStartInfo() {
log.info("Execute the {}, jobName is {}, executorName is {}", this.getClass().getSimpleName(), jobName, executorName);
}
private String getExecutorIp() {
String ip = null;
try {
String executorIpNodePath = SaturnExecutorsNode.getExecutorIpNodePath(executorName);
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorIpNodePath(executorName)) != null) {
byte[] ipBytes = curatorFramework.getData().forPath(executorIpNodePath);
if (ipBytes != null) {
ip = new String(ipBytes, "UTF-8");
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
return ip;
}
private Shard createLocalShard(List lastOnlineExecutorList, int loadLevel) {
Shard shard = null;
List itemList = new ArrayList<>();
for (int i = 0; i < lastOnlineExecutorList.size(); i++) {
List shardList = lastOnlineExecutorList.get(i).getShardList();
for (int j = 0; j < shardList.size(); j++) {
Shard shardAlreadyExists = shardList.get(j);
if (shardAlreadyExists.getJobName().equals(jobName)) {
itemList.add(shardAlreadyExists.getItem());
}
}
}
Collections.sort(itemList, new Comparator() {
@Override
public int compare(Integer o1, Integer o2) {
return 01 - 02;
}
});
int item = 0;
if(!itemList.isEmpty()) {
boolean[] flags = new boolean[itemList.size() + 1];
for(int i=0; i lastOnlineExecutorList) {
for(int i=0; i shardList = lastOnlineExecutorList.get(i).getShardList();
for(int j=0; j pickShardsRunningInDispreferList(List preferListConfigured, List lastOnlineExecutorList) {
List shards = new ArrayList<>();
for(int i=0; i iterator = executor.getShardList().iterator();
while(iterator.hasNext()) {
Shard shard = iterator.next();
if(shard.getJobName().equals(jobName)) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel());
iterator.remove();
shards.add(shard);
}
}
}
}
return shards;
}
private int getTotalLoadLevel(List shardList, List executorList) {
int total = 0;
for(int i=0; i shardList, List allExecutors) {
int totalLoalLevel = getTotalLoadLevel(shardList, allExecutors);
int averageTotalLoal = totalLoalLevel / (allExecutors.size());
for (int i = 0; i < allExecutors.size(); i++) {
Executor executor = allExecutors.get(i);
while (true) {
int pickLoadLevel = executor.getTotalLoadLevel() - averageTotalLoal;
if (pickLoadLevel > 0 && !executor.getShardList().isEmpty()) {
Shard pickShard = null;
for (int j = 0; j < executor.getShardList().size(); j++) {
Shard shard = executor.getShardList().get(j);
if (!shard.getJobName().equals(jobName)) { // 如果当前Shard不属于该作业,则不摘取,继续下一个
continue;
}
if (pickShard == null) {
pickShard = shard;
} else {
if (pickShard.getLoadLevel() >= pickLoadLevel) {
if (shard.getLoadLevel() >= pickLoadLevel && shard.getLoadLevel() < pickShard.getLoadLevel()) {
pickShard = shard;
}
} else {
if (shard.getLoadLevel() >= pickLoadLevel) {
pickShard = shard;
} else {
if (shard.getLoadLevel() > pickShard.getLoadLevel()) {
pickShard = shard;
}
}
}
}
}
if (pickShard != null) {
executor.setTotalLoadLevel(executor.getTotalLoadLevel() - pickShard.getLoadLevel());
executor.getShardList().remove(pickShard);
shardList.add(pickShard);
} else { // 没有符合摘取条件的,无需再选择摘取
break;
}
} else { // 无需再选择摘取
break;
}
}
}
}
private List createUnLocalShards(int shardingTotalCount, int loadLevel) {
List shards = new ArrayList<>();
for(int i=0; i preferListConfigured, List lastOnlineExecutorList) {
for(int i=0; i shardList = executor.getShardList();
for(int j=0; j allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception {
boolean preferListIsConfigured = preferListIsConfigured(jobName); // 是否配置了preferList
boolean useDispreferList = useDispreferList(jobName); // 是否useDispreferList
List preferListConfigured = getPreferListConfigured(jobName); // 配置态的preferList
boolean localMode = isLocalMode(jobName);
int shardingTotalCount = getShardingTotalCount(jobName);
int loadLevel = getLoadLevel(jobName);
// 很小的可能性:status的新增事件先于ip的新增事件
// 那么,如果lastOnlineExecutorList不包含executorName,则添加一个新的Executor
// 添加当前作业至jobNameList
Executor theExecutor = null;
for(int i=0; i< lastOnlineExecutorList.size(); i++) {
Executor executor = lastOnlineExecutorList.get(i);
if(executor.getExecutorName().equals(executorName)) {
theExecutor = executor;
break;
}
}
if(theExecutor == null) {
theExecutor = new Executor();
theExecutor.setExecutorName(executorName);
theExecutor.setIp(getExecutorIp());
theExecutor.setShardList(new ArrayList());
theExecutor.setJobNameList(new ArrayList());
theExecutor.setTotalLoadLevel(0);
lastOnlineExecutorList.add(theExecutor);
}
if(!theExecutor.getJobNameList().contains(jobName)) {
theExecutor.getJobNameList().add(jobName);
}
if(localMode) {
if(!preferListIsConfigured || preferListConfigured.contains(executorName)) {
shardList.add(createLocalShard(lastOnlineExecutorList, loadLevel));
}
} else {
boolean hasShardRunning = hasShardRunning(lastOnlineExecutorList);
if(preferListIsConfigured) {
if(preferListConfigured.contains(executorName)) {
// 如果有分片正在运行,摘取全部运行在非优先节点上的分片,还可以平衡摘取
if(hasShardRunning) {
shardList.addAll(pickShardsRunningInDispreferList(preferListConfigured, lastOnlineExecutorList));
pickBalance(shardList, lastOnlineExecutorList);
} else {
// 如果没有分片正在运行,则需要新建,无需平衡摘取
if(allEnableJobs.contains(jobName)) {
shardList.addAll(createUnLocalShards(shardingTotalCount, loadLevel));
}
}
} else {
if(useDispreferList) {
// 如果有分片正在运行,并且都是运行在非优先节点上,可以平衡摘取分片
// 如果有分片正在运行,并且有运行在优先节点上,则摘取全部运行在非优先节点上的分片,不能再平衡摘取
if(hasShardRunning) {
boolean shardsAllRunningInDispreferList = shardsAllRunningInDispreferList(preferListConfigured, lastOnlineExecutorList);
if(shardsAllRunningInDispreferList) {
pickBalance(shardList, lastOnlineExecutorList);
} else {
shardList.addAll(pickShardsRunningInDispreferList(preferListConfigured, lastOnlineExecutorList));
}
} else {
// 如果没有分片正在运行,则需要新建,无需平衡摘取
if(allEnableJobs.contains(jobName)) {
shardList.addAll(createUnLocalShards(shardingTotalCount, loadLevel));
}
}
} else { // 不能再平衡摘取
// 摘取全部运行在非优先节点上的分片
shardList.addAll(pickShardsRunningInDispreferList(preferListConfigured, lastOnlineExecutorList));
}
}
} else {
// 如果有分片正在运行,则平衡摘取
if(hasShardRunning) {
pickBalance(shardList, lastOnlineExecutorList);
} else {
// 如果没有分片正在运行,则需要新建,无需平衡摘取
if(allEnableJobs.contains(jobName)) {
shardList.addAll(createUnLocalShards(shardingTotalCount, loadLevel));
}
}
}
}
return true;
}
}
/**
* 作业的executor下线,将该executor运行的该作业分片都摘取,如果是本地作业,则移除
*/
private class ExecuteJobServerOfflineShardingTask extends AbstractAsyncShardingTask {
private String jobName;
private String executorName;
@Override
protected void logStartInfo() {
log.info("Execute the {}, jobName is {}, executorName is {}", this.getClass().getSimpleName(), jobName, executorName);
}
public ExecuteJobServerOfflineShardingTask(String jobName, String executorName) {
this.jobName = jobName;
this.executorName = executorName;
}
@Override
protected boolean pick(List allJob, List shardList, List lastOnlineExecutorList) throws Exception {
boolean localMode = isLocalMode(jobName);
boolean find = false;
for(int i=0; i iterator = executor.getShardList().iterator();
while(iterator.hasNext()) {
Shard shard = iterator.next();
if(shard.getJobName().equals(jobName)) {
find = true;
if(!localMode) {
shardList.add(shard);
}
iterator.remove();
}
}
find = find || executor.getJobNameList().remove(jobName);
break;
}
}
return find;
}
}
/**
* 结点上线处理
* @param executorName
* @throws Exception
*/
public void asyncShardingWhenExecutorOnline(String executorName, String ip) throws Exception {
if(isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteOnlineShardingTask(executorName, ip));
}
}
/**
* 结点掉线处理
* @param executorName
* @throws Exception
*/
public void asyncShardingWhenExecutorOffline(String executorName) throws Exception {
if(isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteOfflineShardingTask(executorName));
}
}
/**
* 作业启用事件
* @param jobName
* @throws Exception
*/
public void asyncShardingWhenJobEnable(String jobName) throws Exception {
if(isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobEnableShardingTask(jobName));
}
}
/**
* 处理作业禁用事件
* @param jobName
* @throws Exception
*/
public void asyncShardingWhenJobDisable(String jobName) throws Exception {
if(isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobDisableShardingTask(jobName));
}
}
/**
* 处理作业全排
*/
public void asyncShardingWhenJobForceShard(String jobName) throws Exception {
if (isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobForceShardShardingTask(jobName));
}
}
/**
* 处理作业executor上线
*/
public void asyncShardingWhenJobServerOnline(String jobName, String executorName) throws Exception {
if (isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobServerOnlineShardingTask(jobName, executorName));
}
}
/**
* 处理作业executor下线
*/
public void asyncShardingWhenJobServerOffline(String jobName, String executorName) throws Exception {
if (isLeadership()) {
shardingCount.incrementAndGet();
executorService.submit(new ExecuteJobServerOfflineShardingTask(jobName, executorName));
}
}
/**
* 选举
* @throws Exception
*/
public void leaderElection() throws Exception {
log.info("{}-{} leadership election", namespace, hostValue);
LeaderLatch leaderLatch = new LeaderLatch(curatorFramework, SaturnExecutorsNode.LEADER_LATCHNODE_PATH);
try {
leaderLatch.start();
leaderLatch.await();
if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH) == null) {
// 持久化$Jobs节点
if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.$JOBSNODE_PATH) == null) {
curatorFramework.create().creatingParentsIfNeeded().forPath(SaturnExecutorsNode.$JOBSNODE_PATH);
}
// 持久化LeaderValue
curatorFramework.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH, hostValue.getBytes("UTF-8"));
// 清理、重置变量
executorService.shutdownNow();
while(!executorService.isTerminated()) { // 等待全部任务已经退出
Thread.sleep(200);
}
needAllSharding.set(false);
shardingCount.set(0);
executorService = newSingleThreadExecutor();
// 提交全量分片线程
needAllSharding.set(true);
shardingCount.incrementAndGet();
executorService.submit(new ExecuteAllShardingTask());
log.info("{}-{} become leadership", namespace, hostValue);
}
} catch (Exception e) {
log.error(namespace + "-" + hostValue + " leadership election failed", e);
throw e;
} finally {
try {
leaderLatch.close();
} catch (IOException e) {
log.error(e.getMessage(), e);
}
}
}
private boolean hasLeadership() throws Exception {
return curatorFramework.checkExists().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH) != null;
}
private boolean isLeadership() throws Exception {
while (!hasLeadership()) {
leaderElection();
}
return new String(curatorFramework.getData().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH), "UTF-8").equals(hostValue);
}
private void deleteLeadership() throws Exception {
if(isLeadership()) {
curatorFramework.delete().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH);
}
}
/**
* 关闭
*/
public void shutdown() {
try {
if(curatorFramework.getZookeeperClient().isConnected()){
deleteLeadership();
}
} catch (Exception e) {
log.error("delete leadership failed", e);
}
if(executorService != null) {
executorService.shutdownNow();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy