All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.vip.saturn.job.sharding.service.NamespaceShardingService Maven / Gradle / Ivy

There is a newer version: 3.5.1
Show newest version
package com.vip.saturn.job.sharding.service;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.api.transaction.CuratorTransactionFinal;
import org.apache.curator.framework.recipes.leader.LeaderLatch;
import org.apache.zookeeper.CreateMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.vip.saturn.job.sharding.entity.Executor;
import com.vip.saturn.job.sharding.entity.Shard;
import com.vip.saturn.job.sharding.node.SaturnExecutorsNode;

/**
 *
 * @author hebelala
 */
public class NamespaceShardingService {
	static Logger log = LoggerFactory.getLogger(NamespaceShardingService.class);

	private static final int LOAD_LEVEL_DEFAULT = 1;

	private CuratorFramework curatorFramework;

	private AtomicInteger shardingCount;

	private AtomicBoolean needAllSharding;

	private ExecutorService executorService;

	private String namespace;

	private String hostValue;

	private NamespaceShardingContentService namespaceShardingContentService;

    public NamespaceShardingService(CuratorFramework curatorFramework, String hostValue) {
    	this.curatorFramework = curatorFramework;
		this.hostValue = hostValue;
    	this.shardingCount = new AtomicInteger(0);
    	this.needAllSharding = new AtomicBoolean(false);
    	this.executorService = newSingleThreadExecutor();
		this.namespace = curatorFramework.getNamespace();
		this.namespaceShardingContentService = new NamespaceShardingContentService(curatorFramework);
    }

    private ExecutorService newSingleThreadExecutor() {
    	return Executors.newSingleThreadExecutor(new ThreadFactory() {
    		@Override
    		public Thread newThread(Runnable r) {
    			return new Thread(r, namespace + "-" + r.getClass().getSimpleName());
    		}
		});
    }

    private abstract class AbstractAsyncShardingTask implements Runnable {

		protected abstract void logStartInfo();

		/**
		 * Special enable jobs that need to be notified prior, not consider whether whose shards are changed.
* By default, notify enable jobs whose shards are changed. */ protected abstract List notifyEnableJobsPrior(); @Override public void run() { logStartInfo(); boolean isAllShardingTask = this instanceof ExecuteAllShardingTask; try { // 如果当前变为非leader,则直接返回 if(!isLeadership()) { return; } // 如果需要全量分片,且当前线程不是全量分片线程,则直接返回,没必要做分片 if(needAllSharding.get() && !isAllShardingTask) { log.info("the {} will be ignored, because there will be {}", this.getClass().getSimpleName(), ExecuteAllShardingTask.class.getSimpleName()); return; } List allJobs = getAllJobs(); List allEnableJobs = getAllEnableJobs(allJobs); List oldOnlineExecutorList = getLastOnlineExecutorList(); List lastOnlineExecutorList = isAllShardingTask ? oldOnlineExecutorList : copyOnlineExecutorList(oldOnlineExecutorList); // if all-shard-task, unnecessary to copy List shardList = new ArrayList<>(); // 摘取 if(pick(allJobs, allEnableJobs, shardList, lastOnlineExecutorList)) { // 放回 putBackBalancing(allEnableJobs, shardList, lastOnlineExecutorList); // 如果当前变为非leader,则返回 if (!isLeadership()) { return; } // 持久化分片结果 namespaceShardingContentService.persistDirectly(lastOnlineExecutorList); // fix, notify the shards-changed jobs of all enable jobs. notifyJobShardingNecessary(getEnabledAndShardsChangedJobs(isAllShardingTask, allEnableJobs, oldOnlineExecutorList, lastOnlineExecutorList)); // sharding count ++ increaseShardingCount(); } } catch (Throwable t) { log.error(t.getMessage(), t); if(!isAllShardingTask) { // 如果当前不是全量分片,则需要全量分片来拯救异常; 如果当前是全量分片,不再全量分片 needAllSharding.set(true); shardingCount.incrementAndGet(); executorService.submit(new ExecuteAllShardingTask()); } } finally { if(isAllShardingTask) { // 如果是全量分片,不再进行全量分片 needAllSharding.set(false); } shardingCount.decrementAndGet(); } } private List copyOnlineExecutorList(List oldOnlineExecutorList) { List newOnlineExecutorList = new ArrayList<>(); for (Executor oldExecutor : oldOnlineExecutorList) { Executor newExecutor = new Executor(); newExecutor.setTotalLoadLevel(oldExecutor.getTotalLoadLevel()); newExecutor.setIp(oldExecutor.getIp()); newExecutor.setExecutorName(oldExecutor.getExecutorName()); if (oldExecutor.getJobNameList() != null) { newExecutor.setJobNameList(new ArrayList()); for (String jobName : oldExecutor.getJobNameList()) { newExecutor.getJobNameList().add(jobName); } } if (oldExecutor.getShardList() != null) { newExecutor.setShardList(new ArrayList()); for (Shard oldShard : oldExecutor.getShardList()) { Shard newShard = new Shard(); newShard.setItem(oldShard.getItem()); newShard.setJobName(oldShard.getJobName()); newShard.setLoadLevel(oldShard.getLoadLevel()); newExecutor.getShardList().add(newShard); } } newOnlineExecutorList.add(newExecutor); } return newOnlineExecutorList; } /** * 修正lastOnlineExecutorList中的jobNameList */ protected boolean fixJobNameList(List lastOnlineExecutorList, String jobName) throws Exception { boolean fixed = false; for(int i=0; i()); } List jobNameList = executor.getJobNameList(); String jobServersExecutorStatusNodePath = SaturnExecutorsNode.getJobServersExecutorStatusNodePath(jobName, executor.getExecutorName()); if (curatorFramework.checkExists().forPath(jobServersExecutorStatusNodePath) != null) { if (!jobNameList.contains(jobName)) { jobNameList.add(jobName); fixed = true; } } else { if (jobNameList.contains(jobName)) { jobNameList.remove(jobName); fixed = true; } } } return fixed; } private void increaseShardingCount() throws Exception { Integer _shardingCount = 1; if (null != curatorFramework.checkExists().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH)) { byte[] shardingCountData = curatorFramework.getData().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH); if(shardingCountData != null) { try { _shardingCount = Integer.parseInt(new String(shardingCountData, "UTF-8")) + 1; } catch (NumberFormatException e) { log.error(e.getMessage(), e); } } curatorFramework.setData().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH, _shardingCount.toString().getBytes()); } else { curatorFramework.create().forPath(SaturnExecutorsNode.SHARDING_COUNT_PATH, _shardingCount.toString().getBytes()); } } /** * Get the jobs, that are enabled, and whose shards are changed. Specially, return all enabled jobs when the current thread is all-shard-task */ private List getEnabledAndShardsChangedJobs(boolean isAllShardingTask, List allEnableJobs, List oldOnlineExecutorList, List lastOnlineExecutorList) throws Exception { if (isAllShardingTask) { return allEnableJobs; } List enabledAndShardsChangedJobs = new ArrayList<>(); List enableJobsPrior = notifyEnableJobsPrior(); for (String enableJob : allEnableJobs) { // notify prior jobs that are in all enable jobs if(enableJobsPrior != null && enableJobsPrior.contains(enableJob)) { enabledAndShardsChangedJobs.add(enableJob); continue; } Map> oldShardingItems = namespaceShardingContentService.getShardingItems(oldOnlineExecutorList, enableJob); Map> lastShardingItems = namespaceShardingContentService.getShardingItems(lastOnlineExecutorList, enableJob); // just compare executorName's shardList boolean isChanged = false; Iterator>> oldIterator = oldShardingItems.entrySet().iterator(); wl_loop: while (oldIterator.hasNext()) { Map.Entry> next = oldIterator.next(); String executorName = next.getKey(); List shards = next.getValue(); List newShard = lastShardingItems.get(executorName); if ((shards == null || shards.isEmpty()) && (newShard != null && !newShard.isEmpty()) || (shards != null && !shards.isEmpty()) && (newShard == null || newShard.isEmpty())) { isChanged = true; break; } if (shards != null && newShard != null) { for (Integer shard : shards) { if (!newShard.contains(shard)) { isChanged = true; break wl_loop; } } } } if (!isChanged) { Iterator>> newIterator = lastShardingItems.entrySet().iterator(); wl_loop2: while (newIterator.hasNext()) { Map.Entry> next = newIterator.next(); String executorName = next.getKey(); List shards = next.getValue(); List oldShard = oldShardingItems.get(executorName); if ((shards == null || shards.isEmpty()) && (oldShard != null && !oldShard.isEmpty()) || (shards != null && !shards.isEmpty()) && (oldShard == null || oldShard.isEmpty())) { isChanged = true; break; } if (shards != null && oldShard != null) { for (Integer shard : shards) { if (!oldShard.contains(shard)) { isChanged = true; break wl_loop2; } } } } } if (isChanged) { enabledAndShardsChangedJobs.add(enableJob); } } return enabledAndShardsChangedJobs; } private void notifyJobShardingNecessary(List enabledAndShardsChangedJobs) throws Exception { if(enabledAndShardsChangedJobs != null && !enabledAndShardsChangedJobs.isEmpty()) { log.info("notify jobs sharding necessary, jobs is {}", enabledAndShardsChangedJobs); CuratorTransactionFinal curatorTransactionFinal = curatorFramework.inTransaction().check().forPath("/").and(); for(int i=0; i getLastOnlineExecutorList() throws Exception { return namespaceShardingContentService.getExecutorList(); } /** * 摘取 * @param allJobs 该域下所有作业 * @param allEnableJobs 该域下所有启用的作业 * @param shardList 默认为空集合 * @param lastOnlineExecutorList 默认为当前存储的数据,如果不想使用存储数据,请重写{@link #getLastOnlineExecutorList()}}方法 * @return true摘取成功;false摘取失败,不需要继续下面的逻辑 */ protected abstract boolean pick(List allJobs, List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception; /** * 按照loadLevel降序排序,如果loadLevel相同,按照作业名降序排序 */ protected void sortShardList(List shardList) { Collections.sort(shardList, new Comparator() { @Override public int compare(Shard o1, Shard o2) { int loadLevelSub = o2.getLoadLevel() - o1.getLoadLevel(); return loadLevelSub == 0 ? o2.getJobName().compareTo(o1.getJobName()) : loadLevelSub; } }); } private List getNotDockerExecutors(List lastOnlineExecutorList) throws Exception { List notDockerExecutors = new ArrayList<>(); for(int i=0; i allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception { if(lastOnlineExecutorList.isEmpty()) { log.warn("Unnecessary to put shards back to executors balanced because of no executor"); return; } sortShardList(shardList); // 获取非容器executor List notDockerExecutors = getNotDockerExecutors(lastOnlineExecutorList); // 获取shardList中的作业能够被接管的executors Map> notDockerExecutorsMapByJob = new HashMap<>(); Map> lastOnlineExecutorListMapByJob = new HashMap<>(); // 是否为本地模式作业的映射 Map localModeMap = new HashMap<>(); // 是否配置优先节点的作业的映射 Map preferListIsConfiguredMap = new HashMap<>(); // 优先节点的作业的映射 Map> preferListConfiguredMap = new HashMap<>(); // 是否使用非优先节点的作业的映射 Map useDispreferListMap = new HashMap<>(); Iterator iterator0 = shardList.iterator(); while(iterator0.hasNext()) { String jobName = iterator0.next().getJobName(); if(!notDockerExecutorsMapByJob.containsKey(jobName)) { notDockerExecutorsMapByJob.put(jobName, filterExecutorsByJob(notDockerExecutors, jobName)); } if(!lastOnlineExecutorListMapByJob.containsKey(jobName)) { lastOnlineExecutorListMapByJob.put(jobName, filterExecutorsByJob(lastOnlineExecutorList, jobName)); } if(!localModeMap.containsKey(jobName)) { localModeMap.put(jobName, isLocalMode(jobName)); } if(!preferListIsConfiguredMap.containsKey(jobName)) { preferListIsConfiguredMap.put(jobName, preferListIsConfigured(jobName)); } if(!preferListConfiguredMap.containsKey(jobName)) { preferListConfiguredMap.put(jobName, getPreferListConfigured(jobName)); } if(!useDispreferListMap.containsKey(jobName)) { useDispreferListMap.put(jobName, useDispreferList(jobName)); } } // 整体算法放回算法:拿取Shard,放进负荷最小的executor // 1、放回localMode的Shard // 如果配置了preferList,则选取preferList中的executor。 如果preferList中的executor都挂了,则不转移;否则,选取没有接管该作业的executor列表的loadLevel最小的一个。 // 如果没有配置preferList,则选取没有接管该作业的executor列表的loadLevel最小的一个。 Iterator shardIterator = shardList.iterator(); while(shardIterator.hasNext()) { Shard shard = shardIterator.next(); String jobName = shard.getJobName(); if(localModeMap.get(jobName)) { if(preferListIsConfiguredMap.get(jobName)) { List preferListConfigured = preferListConfiguredMap.get(jobName); if (!preferListConfigured.isEmpty()) { List preferExecutorList = new ArrayList<>(); List lastOnlineExecutorListByJob = lastOnlineExecutorListMapByJob.get(jobName); for (int i = 0; i < lastOnlineExecutorListByJob.size(); i++) { Executor executor = lastOnlineExecutorListByJob.get(i); if (preferListConfigured.contains(executor.getExecutorName())) { preferExecutorList.add(executor); } } if (!preferExecutorList.isEmpty()) { Executor executor = getExecutorWithMinLoadLevelAndNoThisJob(preferExecutorList, jobName); putShardIntoExecutor(shard, executor); } } } else { Executor executor = getExecutorWithMinLoadLevelAndNoThisJob(notDockerExecutorsMapByJob.get(jobName), jobName); putShardIntoExecutor(shard, executor); } shardIterator.remove(); } } // 2、放回配置了preferList的Shard Iterator shardIterator2 = shardList.iterator(); while(shardIterator2.hasNext()) { Shard shard = shardIterator2.next(); String jobName = shard.getJobName(); if(preferListIsConfiguredMap.get(jobName)) { // fix, preferList为空不能作为判断是否配置preferList的依据,比如说配置了容器资源,但是全部下线了。 List preferList = preferListConfiguredMap.get(jobName); List preferExecutorList = new ArrayList<>(); List lastOnlineExecutorListByJob = lastOnlineExecutorListMapByJob.get(jobName); for(int i=0; i shardIterator3 = shardList.iterator(); while(shardIterator3.hasNext()) { Shard shard = shardIterator3.next(); Executor executor = getExecutorWithMinLoadLevel(notDockerExecutorsMapByJob.get(shard.getJobName())); putShardIntoExecutor(shard, executor); shardIterator3.remove(); } } /** * 是否使用非preferList
* 1、存在结点,并且该结点值为false,返回false;
* 2、其他情况,返回true */ protected boolean useDispreferList(String jobName) { try { String jobConfigUseDispreferListNodePath = SaturnExecutorsNode.getJobConfigUseDispreferListNodePath(jobName); if (curatorFramework.checkExists().forPath(jobConfigUseDispreferListNodePath) != null) { byte[] useDispreferListData = curatorFramework.getData().forPath(jobConfigUseDispreferListNodePath); if (useDispreferListData != null && !Boolean.valueOf(new String(useDispreferListData, "UTF-8"))) { return false; } } return true; } catch (Exception e) { log.error(e.getMessage(), e); return true; } } private Executor getExecutorWithMinLoadLevel(List executorList) { Executor minLoadLevelExecutor = null; for(int i=0; i executor.getTotalLoadLevel()) { minLoadLevelExecutor = executor; } } return minLoadLevelExecutor; } private Executor getExecutorWithMinLoadLevelAndNoThisJob(List executorList, String jobName) { Executor minLoadLevelExecutor = null; for(int i=0; i shardList = executor.getShardList(); boolean containThisJob = false; for(int j=0; j executor.getTotalLoadLevel())) { minLoadLevelExecutor = executor; } } return minLoadLevelExecutor; } private void putShardIntoExecutor(Shard shard, Executor executor) { if(executor != null) { if(isIn(shard, executor.getShardList())) { log.error("The shard({}-{}) is running in the executor of {}, cannot be put again", shard.getJobName(), shard.getItem(), executor.getExecutorName()); } else { executor.getShardList().add(shard); executor.setTotalLoadLevel(executor.getTotalLoadLevel() + shard.getLoadLevel()); } } else { log.info("No executor to take over the shard: {}-{}", shard.getJobName(), shard.getItem()); } } /** * 获取该域下的所有作业 */ private List getAllJobs() throws Exception { List allJob = new ArrayList<>(); if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.$JOBSNODE_PATH) == null) { curatorFramework.create().creatingParentsIfNeeded().forPath(SaturnExecutorsNode.$JOBSNODE_PATH); } List tmp = curatorFramework.getChildren().forPath(SaturnExecutorsNode.$JOBSNODE_PATH); if(tmp != null) { allJob.addAll(tmp); } return allJob; } /** * 获取该域下的所有enable的作业 */ protected List getAllEnableJobs(List allJob) throws Exception { List allEnableJob = new ArrayList<>(); for(int i=0; i shardList) { for(int i=0; i 0; } } return false; } /** * 获取配置态的preferList,即使配置的executor不存在,也会返回。 特别的是,对于docker task,如果存在,才去解析出executor列表。 */ protected List getPreferListConfigured(String jobName) throws Exception { List preferList = new ArrayList<>(); if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName)) != null) { byte[] preferListData = curatorFramework.getData().forPath(SaturnExecutorsNode.getJobConfigPreferListNodePath(jobName)); if(preferListData != null) { List allExistsExecutors = getAllExistingExecutors(); String[] split = new String(preferListData, "UTF-8").split(","); for(String tmp : split) { String tmpTrim = tmp.trim(); if(!"".equals(tmpTrim)) { fillRealPreferListIfIsDockerOrNot(preferList, tmpTrim, allExistsExecutors); } } } } return preferList; } private List getAllExistingExecutors() throws Exception { List allExistsExecutors = new ArrayList<>(); if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorsNodePath()) != null) { List executors = curatorFramework.getChildren().forPath(SaturnExecutorsNode.getExecutorsNodePath()); if(executors != null) { allExistsExecutors.addAll(executors); } } return allExistsExecutors; } /** * 如果prefer不是docker容器,并且preferList不包含,则直接添加;
* 如果prefer是docker容器(以@开头),则prefer为task,获取该task下的所有executor,如果不包含,添加进preferList。 */ private void fillRealPreferListIfIsDockerOrNot(List preferList, String prefer, List allExistsExecutors) throws Exception { if(!prefer.startsWith("@")) { // not docker server if(!preferList.contains(prefer)) { preferList.add(prefer); } } else { // docker server, get the real executorList by task String task = prefer.substring(1); for(int i=0; i filterExecutorsByJob(List executorList, String jobName) throws Exception { List executorListByJob = new ArrayList<>(); for(int i=0; i jobNameList = executor.getJobNameList(); if(jobNameList != null && jobNameList.contains(jobName)) { executorListByJob.add(executor); } } return executorListByJob; } private List getPreferListOnlineByJob(String jobName, List preferListConfigured, List lastOnlineExecutorList) { List preferListOnlineByJob = new ArrayList<>(); for(int i=0; i createShards(String jobName, int number, int loadLevel) { List shards = new ArrayList<>(); for(int i=0; i createShards(String jobName, List lastOnlineExecutorList) throws Exception { List shardList = new ArrayList<>(); boolean preferListIsConfigured = preferListIsConfigured(jobName); List preferListConfigured = getPreferListConfigured(jobName); List preferListOnlineByJob = getPreferListOnlineByJob(jobName, preferListConfigured, lastOnlineExecutorList); boolean localMode = isLocalMode(jobName); int shardingTotalCount = getShardingTotalCount(jobName); int loadLevel = getLoadLevel(jobName); if(localMode) { if(preferListIsConfigured) { // 如果当前存在优先节点在线,则新建在线的优先节点的数量的分片 if(!preferListOnlineByJob.isEmpty()) { shardList.addAll(createShards(jobName, preferListOnlineByJob.size(), loadLevel)); } } else { // 新建在线的executor的数量的分片 shardList.addAll(createShards(jobName, lastOnlineExecutorList.size(), loadLevel)); } } else { // 新建shardingTotalCount数量的分片 shardList.addAll(createShards(jobName, shardingTotalCount, loadLevel)); } return shardList; } } /** * 域下重排,移除已经存在所有executor,重新获取executors,重新获取作业shards */ private class ExecuteAllShardingTask extends AbstractAsyncShardingTask { @Override protected void logStartInfo() { log.info("Execute the {} ", this.getClass().getSimpleName()); } @Override protected List notifyEnableJobsPrior() { return null; } @Override protected boolean pick(List allJobs, List allEnableJob, List shardList, List lastOnlineExecutorList) throws Exception { // 修正所有executor对所有作业的jobNameList for(int j=0; j getLastOnlineExecutorList() throws Exception { // 从$SaturnExecutors节点下,获取所有正在运行的Executor List lastOnlineExecutorList = new ArrayList<>(); if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorsNodePath()) != null) { List zkExecutors = curatorFramework.getChildren().forPath(SaturnExecutorsNode.getExecutorsNodePath()); if(zkExecutors != null) { for(int i=0; i()); executor.setJobNameList(new ArrayList()); lastOnlineExecutorList.add(executor); } } } } } return lastOnlineExecutorList; } } /** * executor上线,仅仅添加executor空壳,如果其不存在;如果已经存在,重新设置下ip,防止ExecuteJobServerOnlineShardingTask先于执行而没设ip
* 特别的,如果当前没有executor,也就是这是第一台executor上线,则需要域全量分片,因为可能已经有作业处理启用状态了。 */ private class ExecuteOnlineShardingTask extends AbstractAsyncShardingTask { private String executorName; private String ip; public ExecuteOnlineShardingTask(String executorName, String ip) { this.executorName = executorName; this.ip = ip; } @Override protected void logStartInfo() { log.info("Execute the {} with {} online", this.getClass().getSimpleName(), executorName); } @Override protected List notifyEnableJobsPrior() { return null; } @Override protected boolean pick(List allJobs, List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception {//NOSONAR // 如果没有Executor在运行,则需要进行全量分片 if(lastOnlineExecutorList.isEmpty()) { log.warn("There are no running executors, need all sharding"); needAllSharding.set(true); shardingCount.incrementAndGet(); executorService.submit(new ExecuteAllShardingTask()); return false; } Executor theExecutor = null; for(int i=0; i< lastOnlineExecutorList.size(); i++) { Executor tmp = lastOnlineExecutorList.get(i); if(tmp.getExecutorName().equals(executorName)) { theExecutor = tmp; break; } } if(theExecutor == null) { theExecutor = new Executor(); theExecutor.setExecutorName(executorName); theExecutor.setIp(ip); theExecutor.setShardList(new ArrayList()); theExecutor.setJobNameList(new ArrayList()); lastOnlineExecutorList.add(theExecutor); } else { // 重新设置下ip theExecutor.setIp(ip); } return true; } } /** * executor下线,摘取该executor运行的所有非本地模式作业,移除该executor */ private class ExecuteOfflineShardingTask extends AbstractAsyncShardingTask { private String executorName; public ExecuteOfflineShardingTask(String executorName) { this.executorName = executorName; } @Override protected void logStartInfo() { log.info("Execute the {} with {} offline", this.getClass().getSimpleName(), executorName); } @Override protected List notifyEnableJobsPrior() { return null; } @Override protected boolean pick(List allJobs, List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception { /** * 摘取下线的executor全部Shard */ boolean wasOffline = true; Iterator iterator = lastOnlineExecutorList.iterator(); while(iterator.hasNext()) { Executor executor = iterator.next(); if(executor.getExecutorName().equals(executorName)) { wasOffline = false; iterator.remove(); shardList.addAll(executor.getShardList()); break; } } // 如果该executor实际上已经在此之前下线,则摘取失败 if(wasOffline) { return false; } // 移除本地模式的作业分片 Iterator shardIterator = shardList.iterator(); while(shardIterator.hasNext()) { Shard shard = shardIterator.next(); if(isLocalMode(shard.getJobName())) { shardIterator.remove(); } } return true; } } /** * 作业启用,获取该作业的shards,注意要过滤不能运行该作业的executors */ private class ExecuteJobEnableShardingTask extends AbstractAsyncShardingTask { private String jobName; public ExecuteJobEnableShardingTask(String jobName) { this.jobName = jobName; } @Override protected void logStartInfo() { log.info("Execute the {} with {} enable", this.getClass().getSimpleName(), jobName); } @Override protected List notifyEnableJobsPrior() { List notifyEnableJobsPrior = new ArrayList<>(); notifyEnableJobsPrior.add(jobName); return notifyEnableJobsPrior; } @Override protected boolean pick(List allJobs, List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception { // 移除已经在Executor运行的该作业的所有Shard boolean hasRemove = false; for (int i = 0; i < lastOnlineExecutorList.size(); i++) { Executor executor = lastOnlineExecutorList.get(i); Iterator iterator = executor.getShardList().iterator(); while (iterator.hasNext()) { Shard shard = iterator.next(); if (jobName.equals(shard.getJobName())) { executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel()); iterator.remove(); hasRemove = true; } } } // 修正该所有executor的对该作业的jobNameList fixJobNameList(lastOnlineExecutorList, jobName); // 获取该作业的Shard shardList.addAll(createShards(jobName, lastOnlineExecutorList)); // need notify this job, so return true return true; } } /** * 作业禁用,摘取所有executor运行的该作业的shard,注意要相应地减loadLevel,不需要放回 */ private class ExecuteJobDisableShardingTask extends AbstractAsyncShardingTask { private String jobName; public ExecuteJobDisableShardingTask(String jobName) { this.jobName = jobName; } @Override protected void logStartInfo() { log.info("Execute the {} with {} disable", this.getClass().getSimpleName(), jobName); } @Override protected List notifyEnableJobsPrior() { return null; } @Override protected boolean pick(List allJobs, List allEnableJobs, List shardList, List lastOnlineExecutorList) { // 摘取所有该作业的Shard for(int i=0; i< lastOnlineExecutorList.size(); i++) { Executor executor = lastOnlineExecutorList.get(i); Iterator iterator = executor.getShardList().iterator(); while(iterator.hasNext()) { Shard shard = iterator.next(); if (shard.getJobName().equals(jobName)) { executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel()); iterator.remove(); shardList.add(shard); } } } // 如果shardList为空,则没必要进行放回等操作,摘取失败 if(shardList.isEmpty()) { return false; } return true; } @Override protected void putBackBalancing(List allEnableJobs, List shardList, List lastOnlineExecutorList) { // 不做操作 } } /** * 作业重排,移除所有executor的该作业shard,重新获取该作业的shards,finally删除forceShard结点 */ private class ExecuteJobForceShardShardingTask extends AbstractAsyncShardingTask { private String jobName; public ExecuteJobForceShardShardingTask(String jobName) { this.jobName = jobName; } @Override protected void logStartInfo() { log.info("Execute the {} with {} forceShard", this.getClass().getSimpleName(), jobName); } @Override protected List notifyEnableJobsPrior() { List notifyEnableJobsPrior = new ArrayList<>(); notifyEnableJobsPrior.add(jobName); return notifyEnableJobsPrior; } @Override public void run() { try { super.run(); } finally { deleteForceShardNode(); } } private void deleteForceShardNode() { try { String jobConfigForceShardNodePath = SaturnExecutorsNode.getJobConfigForceShardNodePath(jobName); if (curatorFramework.checkExists().forPath(jobConfigForceShardNodePath) != null) { curatorFramework.delete().forPath(jobConfigForceShardNodePath); } } catch (Throwable t) { log.error("delete forceShard node error", t); } } @Override protected boolean pick(List allJobs, List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception { // 移除已经在Executor运行的该作业的所有Shard for (int i = 0; i < lastOnlineExecutorList.size(); i++) { Executor executor = lastOnlineExecutorList.get(i); Iterator iterator = executor.getShardList().iterator(); while (iterator.hasNext()) { Shard shard = iterator.next(); if (jobName.equals(shard.getJobName())) { executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel()); iterator.remove(); } } } // 修正所有executor对该作业的jobNameList fixJobNameList(lastOnlineExecutorList, jobName); // 如果该作业是启用状态,则创建该作业的Shard if(allEnableJobs.contains(jobName)) { shardList.addAll(createShards(jobName, lastOnlineExecutorList)); } // need notify this enabled job, so return true return true; } } /** * 作业的executor上线,executor级别平衡摘取,但是只能摘取该作业的shard;添加的新的shard */ private class ExecuteJobServerOnlineShardingTask extends AbstractAsyncShardingTask { private String jobName; private String executorName; public ExecuteJobServerOnlineShardingTask(String jobName, String executorName) { this.jobName = jobName; this.executorName = executorName; } @Override protected void logStartInfo() { log.info("Execute the {}, jobName is {}, executorName is {}", this.getClass().getSimpleName(), jobName, executorName); } @Override protected List notifyEnableJobsPrior() { List notifyEnableJobsPrior = new ArrayList<>(); notifyEnableJobsPrior.add(jobName); return notifyEnableJobsPrior; } private String getExecutorIp() { String ip = null; try { String executorIpNodePath = SaturnExecutorsNode.getExecutorIpNodePath(executorName); if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.getExecutorIpNodePath(executorName)) != null) { byte[] ipBytes = curatorFramework.getData().forPath(executorIpNodePath); if (ipBytes != null) { ip = new String(ipBytes, "UTF-8"); } } } catch (Exception e) { log.error(e.getMessage(), e); } return ip; } private Shard createLocalShard(List lastOnlineExecutorList, int loadLevel) { Shard shard = null; List itemList = new ArrayList<>(); for (int i = 0; i < lastOnlineExecutorList.size(); i++) { List shardList = lastOnlineExecutorList.get(i).getShardList(); for (int j = 0; j < shardList.size(); j++) { Shard shardAlreadyExists = shardList.get(j); if (shardAlreadyExists.getJobName().equals(jobName)) { itemList.add(shardAlreadyExists.getItem()); } } } Collections.sort(itemList, new Comparator() { @Override public int compare(Integer o1, Integer o2) { return 01 - 02; } }); int item = 0; if(!itemList.isEmpty()) { boolean[] flags = new boolean[itemList.size() + 1]; for(int i=0; i lastOnlineExecutorList) { for(int i=0; i shardList = lastOnlineExecutorList.get(i).getShardList(); for(int j=0; j pickShardsRunningInDispreferList(List preferListConfigured, List lastOnlineExecutorList) { List shards = new ArrayList<>(); for(int i=0; i iterator = executor.getShardList().iterator(); while(iterator.hasNext()) { Shard shard = iterator.next(); if(shard.getJobName().equals(jobName)) { executor.setTotalLoadLevel(executor.getTotalLoadLevel() - shard.getLoadLevel()); iterator.remove(); shards.add(shard); } } } } return shards; } private int getTotalLoadLevel(List shardList, List executorList) { int total = 0; for(int i=0; i shardList, List allExecutors) { int totalLoalLevel = getTotalLoadLevel(shardList, allExecutors); int averageTotalLoal = totalLoalLevel / (allExecutors.size()); for (int i = 0; i < allExecutors.size(); i++) { Executor executor = allExecutors.get(i); while (true) { int pickLoadLevel = executor.getTotalLoadLevel() - averageTotalLoal; if (pickLoadLevel > 0 && !executor.getShardList().isEmpty()) { Shard pickShard = null; for (int j = 0; j < executor.getShardList().size(); j++) { Shard shard = executor.getShardList().get(j); if (!shard.getJobName().equals(jobName)) { // 如果当前Shard不属于该作业,则不摘取,继续下一个 continue; } if (pickShard == null) { pickShard = shard; } else { if (pickShard.getLoadLevel() >= pickLoadLevel) { if (shard.getLoadLevel() >= pickLoadLevel && shard.getLoadLevel() < pickShard.getLoadLevel()) { pickShard = shard; } } else { if (shard.getLoadLevel() >= pickLoadLevel) { pickShard = shard; } else { if (shard.getLoadLevel() > pickShard.getLoadLevel()) { pickShard = shard; } } } } } if (pickShard != null) { executor.setTotalLoadLevel(executor.getTotalLoadLevel() - pickShard.getLoadLevel()); executor.getShardList().remove(pickShard); shardList.add(pickShard); } else { // 没有符合摘取条件的,无需再选择摘取 break; } } else { // 无需再选择摘取 break; } } } } private List createUnLocalShards(int shardingTotalCount, int loadLevel) { List shards = new ArrayList<>(); for(int i=0; i preferListConfigured, List lastOnlineExecutorList) { for(int i=0; i shardList = executor.getShardList(); for(int j=0; j allJobs, List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception { boolean preferListIsConfigured = preferListIsConfigured(jobName); // 是否配置了preferList boolean useDispreferList = useDispreferList(jobName); // 是否useDispreferList List preferListConfigured = getPreferListConfigured(jobName); // 配置态的preferList boolean localMode = isLocalMode(jobName); int shardingTotalCount = getShardingTotalCount(jobName); int loadLevel = getLoadLevel(jobName); // 很小的可能性:status的新增事件先于ip的新增事件 // 那么,如果lastOnlineExecutorList不包含executorName,则添加一个新的Executor // 添加当前作业至jobNameList Executor theExecutor = null; for(int i=0; i< lastOnlineExecutorList.size(); i++) { Executor executor = lastOnlineExecutorList.get(i); if(executor.getExecutorName().equals(executorName)) { theExecutor = executor; break; } } if(theExecutor == null) { theExecutor = new Executor(); theExecutor.setExecutorName(executorName); theExecutor.setIp(getExecutorIp()); theExecutor.setShardList(new ArrayList()); theExecutor.setJobNameList(new ArrayList()); theExecutor.setTotalLoadLevel(0); lastOnlineExecutorList.add(theExecutor); } if(!theExecutor.getJobNameList().contains(jobName)) { theExecutor.getJobNameList().add(jobName); } if(localMode) { if(!preferListIsConfigured || preferListConfigured.contains(executorName)) { if(allEnableJobs.contains(jobName)) { shardList.add(createLocalShard(lastOnlineExecutorList, loadLevel)); } } } else { boolean hasShardRunning = hasShardRunning(lastOnlineExecutorList); if(preferListIsConfigured) { if(preferListConfigured.contains(executorName)) { // 如果有分片正在运行,摘取全部运行在非优先节点上的分片,还可以平衡摘取 if(hasShardRunning) { shardList.addAll(pickShardsRunningInDispreferList(preferListConfigured, lastOnlineExecutorList)); pickBalance(shardList, lastOnlineExecutorList); } else { // 如果没有分片正在运行,则需要新建,无需平衡摘取 if(allEnableJobs.contains(jobName)) { shardList.addAll(createUnLocalShards(shardingTotalCount, loadLevel)); } } } else { if(useDispreferList) { // 如果有分片正在运行,并且都是运行在非优先节点上,可以平衡摘取分片 // 如果有分片正在运行,并且有运行在优先节点上,则摘取全部运行在非优先节点上的分片,不能再平衡摘取 if(hasShardRunning) { boolean shardsAllRunningInDispreferList = shardsAllRunningInDispreferList(preferListConfigured, lastOnlineExecutorList); if(shardsAllRunningInDispreferList) { pickBalance(shardList, lastOnlineExecutorList); } else { shardList.addAll(pickShardsRunningInDispreferList(preferListConfigured, lastOnlineExecutorList)); } } else { // 如果没有分片正在运行,则需要新建,无需平衡摘取 if(allEnableJobs.contains(jobName)) { shardList.addAll(createUnLocalShards(shardingTotalCount, loadLevel)); } } } else { // 不能再平衡摘取 // 摘取全部运行在非优先节点上的分片 shardList.addAll(pickShardsRunningInDispreferList(preferListConfigured, lastOnlineExecutorList)); } } } else { // 如果有分片正在运行,则平衡摘取 if(hasShardRunning) { pickBalance(shardList, lastOnlineExecutorList); } else { // 如果没有分片正在运行,则需要新建,无需平衡摘取 if(allEnableJobs.contains(jobName)) { shardList.addAll(createUnLocalShards(shardingTotalCount, loadLevel)); } } } } return true; } } /** * 作业的executor下线,将该executor运行的该作业分片都摘取,如果是本地作业,则移除 */ private class ExecuteJobServerOfflineShardingTask extends AbstractAsyncShardingTask { private String jobName; private String executorName; @Override protected void logStartInfo() { log.info("Execute the {}, jobName is {}, executorName is {}", this.getClass().getSimpleName(), jobName, executorName); } @Override protected List notifyEnableJobsPrior() { List notifyEnableJobsPrior = new ArrayList<>(); notifyEnableJobsPrior.add(jobName); return notifyEnableJobsPrior; } public ExecuteJobServerOfflineShardingTask(String jobName, String executorName) { this.jobName = jobName; this.executorName = executorName; } @Override protected boolean pick(List allJobs, List allEnableJobs, List shardList, List lastOnlineExecutorList) throws Exception { boolean localMode = isLocalMode(jobName); for(int i=0; i iterator = executor.getShardList().iterator(); while(iterator.hasNext()) { Shard shard = iterator.next(); if(shard.getJobName().equals(jobName)) { if(!localMode) { shardList.add(shard); } iterator.remove(); } } executor.getJobNameList().remove(jobName); break; } } // need notify this enabled job, so return true return true; } } /** * 进行全量分片 * @throws Exception */ public void asyncShardingWhenExecutorAll() throws Exception { if(isLeadership()) { needAllSharding.set(true); shardingCount.incrementAndGet(); executorService.submit(new ExecuteAllShardingTask()); try { String shardAllAtOnce = SaturnExecutorsNode.getExecutorShardingNodePath("shardAllAtOnce"); if (curatorFramework.checkExists().forPath(shardAllAtOnce) != null) { curatorFramework.delete().deletingChildrenIfNeeded().forPath(shardAllAtOnce); } } catch (Exception e) { log.error(e.getMessage(), e); } } } /** * 结点上线处理 * @param executorName * @throws Exception */ public void asyncShardingWhenExecutorOnline(String executorName, String ip) throws Exception { if(isLeadership()) { shardingCount.incrementAndGet(); executorService.submit(new ExecuteOnlineShardingTask(executorName, ip)); } } /** * 结点掉线处理 * @param executorName * @throws Exception */ public void asyncShardingWhenExecutorOffline(String executorName) throws Exception { if(isLeadership()) { shardingCount.incrementAndGet(); executorService.submit(new ExecuteOfflineShardingTask(executorName)); } } /** * 作业启用事件 * @param jobName * @throws Exception */ public void asyncShardingWhenJobEnable(String jobName) throws Exception { if(isLeadership()) { shardingCount.incrementAndGet(); executorService.submit(new ExecuteJobEnableShardingTask(jobName)); } } /** * 处理作业禁用事件 * @param jobName * @throws Exception */ public void asyncShardingWhenJobDisable(String jobName) throws Exception { if(isLeadership()) { shardingCount.incrementAndGet(); executorService.submit(new ExecuteJobDisableShardingTask(jobName)); } } /** * 处理作业全排 */ public void asyncShardingWhenJobForceShard(String jobName) throws Exception { if (isLeadership()) { shardingCount.incrementAndGet(); executorService.submit(new ExecuteJobForceShardShardingTask(jobName)); } } /** * 处理作业executor上线 */ public void asyncShardingWhenJobServerOnline(String jobName, String executorName) throws Exception { if (isLeadership()) { shardingCount.incrementAndGet(); executorService.submit(new ExecuteJobServerOnlineShardingTask(jobName, executorName)); } } /** * 处理作业executor下线 */ public void asyncShardingWhenJobServerOffline(String jobName, String executorName) throws Exception { if (isLeadership()) { shardingCount.incrementAndGet(); executorService.submit(new ExecuteJobServerOfflineShardingTask(jobName, executorName)); } } /** * 选举 * @throws Exception */ public void leaderElection() throws Exception { log.info("{}-{} leadership election", namespace, hostValue); LeaderLatch leaderLatch = new LeaderLatch(curatorFramework, SaturnExecutorsNode.LEADER_LATCHNODE_PATH); try { leaderLatch.start(); leaderLatch.await(); if (curatorFramework.checkExists().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH) == null) { // 持久化$Jobs节点 if(curatorFramework.checkExists().forPath(SaturnExecutorsNode.$JOBSNODE_PATH) == null) { curatorFramework.create().creatingParentsIfNeeded().forPath(SaturnExecutorsNode.$JOBSNODE_PATH); } // 持久化LeaderValue curatorFramework.create().creatingParentsIfNeeded().withMode(CreateMode.EPHEMERAL).forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH, hostValue.getBytes("UTF-8")); // 清理、重置变量 executorService.shutdownNow(); while(!executorService.isTerminated()) { // 等待全部任务已经退出 Thread.sleep(200); } needAllSharding.set(false); shardingCount.set(0); executorService = newSingleThreadExecutor(); // 提交全量分片线程 needAllSharding.set(true); shardingCount.incrementAndGet(); executorService.submit(new ExecuteAllShardingTask()); log.info("{}-{} become leadership", namespace, hostValue); } } catch (Exception e) { log.error(namespace + "-" + hostValue + " leadership election failed", e); throw e; } finally { try { leaderLatch.close(); } catch (IOException e) { log.error(e.getMessage(), e); } } } private boolean hasLeadership() throws Exception { return curatorFramework.checkExists().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH) != null; } private boolean isLeadership() throws Exception { while (!hasLeadership()) { leaderElection(); } return new String(curatorFramework.getData().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH), "UTF-8").equals(hostValue); } private void deleteLeadership() throws Exception { if(isLeadership()) { curatorFramework.delete().forPath(SaturnExecutorsNode.LEADER_HOSTNODE_PATH); } } /** * 关闭 */ public void shutdown() { try { if(curatorFramework.getZookeeperClient().isConnected()){ deleteLeadership(); } } catch (Exception e) { log.error("delete leadership failed", e); } if(executorService != null) { executorService.shutdownNow(); } } public NamespaceShardingContentService getNamespaceShardingContentService() { return namespaceShardingContentService; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy