All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yoloho.schedule.processor.ScheduleManagerStatic Maven / Gradle / Ivy

The newest version!
package com.yoloho.schedule.processor;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.yoloho.schedule.ScheduleManagerFactory;
import com.yoloho.schedule.types.TaskItemRuntime;
import com.yoloho.schedule.types.InitialResult;
import com.yoloho.schedule.types.ScheduleServer;
import com.yoloho.schedule.types.TaskItem;
import com.yoloho.schedule.util.ScheduleUtil;

public class ScheduleManagerStatic extends AbstractScheduleManager {
	private static transient Logger logger = LoggerFactory.getLogger(ScheduleManagerStatic.class);
    /**
     * 总的任务数量
     */
    protected int taskItemCount = 0;

    protected long lastFetchVersion = -1;
    /**
     * 最近一起重新装载调度任务的时间。
     * 当前实际  - 上此装载时间  > intervalReloadTaskItemList,则向配置中心请求最新的任务分配情况
     */
    private long lastReloadTaskItemListTime = 0;
    private final Object NeedReloadTaskItemLock = new Object();
    private boolean isRuntimeInfoInitial = false;

    public ScheduleManagerStatic(ScheduleManagerFactory factory, String taskName, String ownSign) 
            throws Exception {
        super(factory, taskName, ownSign);
    }

    private void initialRunningInfo() throws Exception {
        // Any node can do cleaning work
        cleanExpiredServer(getTask().getJudgeDeadInterval());
        if (isLeader()) {
            // Only leader can initialize things
            factory.getStorage().emptyTaskItems(this.currentServer().getTaskName(),
                    this.currentServer().getOwnSign());
            factory.getStorage().initTaskItems(this.currentServer().getTaskName(),
                    this.currentServer().getOwnSign());
            factory.getStorage().updateTaskItemsInitialResult(this.currentServer().getTaskName(),
                    this.currentServer().getOwnSign(), this.currentServer().getUuid());
        }
    }
    
    private boolean isInitialRunningInfoSuccess(String taskName, String ownSign) throws Exception {
        List serverUuidList = factory.getStorage().getServerUuidList(taskName, ownSign);
        String leader = ScheduleUtil.getLeader(serverUuidList);
        InitialResult result = factory.getStorage().getInitialRunningInfoResult(taskName, ownSign);
        return result != null && StringUtils.equals(result.getUuid(), leader);
    }

    public void initial() throws Exception {
        new Thread(this.currentServer().getRunningEntry() + "-" + this.currentSerialNumber + "-StartProcess") {
            public void run() {
                try {
                    logger.info("Fetching task items for {}", currentServer().getUuid());
                    while (isRuntimeInfoInitial == false) {
                        if (isStopSchedule == true) {
                            logger.debug("Stop scheduling due to stop flag: {}", currentServer().getUuid());
                            return;
                        }
                        //logger.info("isRuntimeInfoInitial={}", isRuntimeInfoInitial);
                        try {
                            initialRunningInfo();
                            isRuntimeInfoInitial = isInitialRunningInfoSuccess(
                                    currentServer().getTaskName(), currentServer().getOwnSign());
                        } catch (Throwable e) {
                            // ignore exceptions and retry
                            logger.error(e.getMessage(), e);
                        }
                        if (isRuntimeInfoInitial == false) {
                            sleep(1000);
                        }
                    }
                    int count = 0;
                    lastReloadTaskItemListTime = factory.getStorage().getGlobalTime();
                    while (getCurrentScheduleTaskItemListNow().size() <= 0) {
                        if (isStopSchedule == true) {
                            logger.debug("Stop scheduling due to stop flag: {}", currentServer().getUuid());
                            return;
                        }
                        //logger.info("Try to fetch any task item: {}", count) ;
                        Thread.sleep(1000);
                        count = count + 1;
                    }
                    String tmpStr = "TaskItemDefine:";
                    for (int i = 0; i < currentTaskItemList.size(); i++) {
                        if (i > 0) {
                            tmpStr = tmpStr + ",";
                        }
                        tmpStr = tmpStr + currentTaskItemList.get(i);
                    }
                    logger.info("Got task item(s), begin to schedule {} of {}", tmpStr, currentServer().getUuid());

                    // 任务总量
                    taskItemCount = factory.getStorage().getTaskItems(currentServer().getTaskName(),
                            currentServer().getOwnSign()).size();
                    // 只有在已经获取到任务处理队列后才开始启动任务处理器
                    computerStart();
                } catch (Exception e) {
                    logger.error(e.getMessage(), e);
                    String str = e.getMessage();
                    if (str.length() > 300) {
                        str = str.substring(0, 300);
                    }
                    startErrorInfo = "StartProcess error with " + str;
                }
            }
        }.start();
    }
	
	/**
	 * Send heartbeat to center (storage)
	 * 

* If expired for this server it cannot do the heartbeat, instead, * it should reregister as a new server * * @throws Exception */ @Override public void refreshScheduleServerInfo() throws Exception { if (!this.factory.isEnableSchedule()) { return; } try { rewriteScheduleInfo(); // if uninitialized wait 2 seconds most int timeout = 2000; while (this.isRuntimeInfoInitial == false) { Thread.sleep(100); timeout -= 100; if (timeout <= 0) { return; } } // try to reassign assignScheduleTask(); // 判断是否需要重新加载任务队列,避免任务处理进程不必要的检查和等待 boolean tmpBoolean = this.isNeedReLoadTaskItemList(); if (tmpBoolean != this.isNeedReloadTaskItem) { // 只要不相同,就设置需要重新装载,因为在心跳异常的时候,做了清理队列的事情,恢复后需要重新装载。 synchronized (NeedReloadTaskItemLock) { this.isNeedReloadTaskItem = true; } rewriteScheduleInfo(); } if (this.isPauseSchedule == true || this.processor != null && processor.isSleeping() == true) { // 如果服务已经暂停了,则需要重新定时更新 cur_server 和 req_server // 如果服务没有暂停,一定不能调用的 this.getCurrentScheduleTaskItemListNow(); } } catch (Throwable e) { // 清除内存中所有的已经取得的数据和任务队列,避免心跳线程失败时候导致的数据重复 this.clearMemoInfo(); if (e instanceof Exception) { throw (Exception) e; } else { throw new Exception(e.getMessage(), e); } } } /** * 在leader重新分配任务,在每个server释放原来占有的任务项时,都会修改这个版本号 * @return * @throws Exception */ private boolean isNeedReLoadTaskItemList() throws Exception{ return this.lastFetchVersion < factory.getStorage().getServerSchedulingVersion(this.currentServer().getTaskName(), this.currentServer().getOwnSign()); } /** * 判断某个任务对应的线程组是否处于僵尸状态。 true 表示有线程组处于僵尸状态。需要告警。 * * @param runningEntry * @param serverList * @return * @throws Exception */ private boolean isExistZombieServ(String taskName, String ownSign, List uuidList) throws Exception { boolean exist = false; for (String uuid : uuidList) { ScheduleServer server = factory.getStorage().getServer(taskName, ownSign, uuid); if (server == null) { continue; } if (factory.getStorage().getGlobalTime() - server.getHeartBeatTime().getTime() > getTask().getHeartBeatRate() * 40) { logger.error("Detect zombie server! server={}, task={}, ownsign={}", server.getUuid(), taskName, ownSign); exist = true; } } return exist; } /** * Release all the task items current server may operated which maybe held by invalid servers * * @param taskType * @param serverList * @return * @throws Exception */ private int clearTaskItemsHeldByInvalidServer() throws Exception { List uuidList = factory.getStorage().getServerUuidList(this.currentServer().getTaskName(), this.currentServer().getOwnSign()); List taskItemList = factory.getStorage().getTaskItems(currentServer().getTaskName(), currentServer().getOwnSign()); int result = 0; for (TaskItemRuntime item : taskItemList) { if (StringUtils.isNotEmpty(item.getCurrentScheduleServer())) { if (!uuidList.contains(item.getCurrentScheduleServer())) { // invalid server found factory.getStorage().updateTaskItemCurrentServer(currentServer().getTaskName(), currentServer().getOwnSign(), item.getTaskItem(), ""); logger.info("Clear invalid server's task items: {} -> {}", item.getTaskItem(), item.getCurrentScheduleServer()); result++; } } else { result = result + 1; } } return result; } private void assignTaskItem(int maxNumOfOneServer) throws Exception { List serverList = factory.getStorage().getServerUuidList(this.currentServer().getTaskName(), this.currentServer().getOwnSign()); String taskName = currentServer().getTaskName(); String ownSign = currentServer().getOwnSign(); String currentUuid = currentServer().getUuid(); //设置初始化成功标准,避免在leader转换的时候,新增的线程组初始化失败 // flag success first factory.getStorage().updateTaskItemsInitialResult(taskName, ownSign, currentUuid); if (logger.isDebugEnabled()) { logger.debug("{}: Begin to distribute task", currentUuid); } if (serverList.size() <= 0) { // 在服务器动态调整的时候,可能出现服务器列表为空的清空 return; } List taskItemList = factory.getStorage().getTaskItems(taskName, ownSign); int unModifyCount = 0; int[] taskNums = ScheduleUtil.generateSequence(serverList.size(), taskItemList.size(), maxNumOfOneServer); int point = 0; int count = 0; String NO_SERVER_DEAL = "NO SERVER"; int taskNumsTotal = 0; for (int i = 0; i < taskNums.length; i++) { taskNumsTotal += taskNums[i]; } for (int i = 0; i < taskItemList.size(); i++) { TaskItemRuntime item = taskItemList.get(i); if (point < serverList.size() && i >= count + taskNums[point]) { count = count + taskNums[point]; point = point + 1; } String serverName = NO_SERVER_DEAL; if (point < serverList.size()) { serverName = serverList.get(point); } if (StringUtils.isEmpty(item.getCurrentScheduleServer()) || StringUtils.equals(item.getCurrentScheduleServer(), NO_SERVER_DEAL)) { // no current server and target server not equal to "NO SERVER" if (StringUtils.equals(NO_SERVER_DEAL, serverName)) { // Do nothing unModifyCount ++; } else { factory.getStorage().updateTaskItemCurrentServer(taskName, ownSign, item.getTaskItem(), serverName); factory.getStorage().updateTaskItemRequestServer(taskName, ownSign, item.getTaskItem(), ""); } } else if (StringUtils.equals(item.getCurrentScheduleServer(), serverName) && StringUtils.isEmpty(item.getRequestScheduleServer())) { // Do nothing unModifyCount ++; } else { factory.getStorage().updateTaskItemRequestServer(taskName, ownSign, item.getTaskItem(), serverName); } } if (unModifyCount < taskItemList.size()) { // reload configuration logger.info("Request all nodes to reload configuration on {}${}, currentUuid {}", taskName, ownSign, currentUuid); factory.getStorage().increaseServerSchedulingVersion(taskName, ownSign); } } /** * 根据当前调度服务器的信息,重新计算分配所有的调度任务 * 任务的分配是需要加锁,避免数据分配错误。为了避免数据锁带来的负面作用,通过版本号来达到锁的目的 * * 1、获取任务状态的版本号 * 2、获取所有的服务器注册信息和任务队列信息 * 3、清除已经超过心跳周期的服务器注册信息 * 3、重新计算任务分配 * 4、更新任务状态的版本号【乐观锁】 * 5、根系任务队列的分配信息 * @throws Exception */ private void assignScheduleTask() throws Exception { cleanExpiredServer(getTask().getJudgeDeadInterval()); if (!isLeader()) { if (logger.isDebugEnabled()) { logger.debug("{}: It's not the Leader, skip", this.currentServer().getUuid()); } return; } clearTaskItemsHeldByInvalidServer(); assignTaskItem(getTask().getMaxTaskItemsOfOneThreadGroup()); } /** * 重新加载当前服务器的任务队列 * 1、释放当前服务器持有,但有其它服务器进行申请的任务队列 * 2、重新获取当前服务器的处理队列 * * 为了避免此操作的过度,阻塞真正的数据处理能力。系统设置一个重新装载的频率。例如1分钟 * * 特别注意: * 此方法的调用必须是在当前所有任务都处理完毕后才能调用,否则是否任务队列后可能数据被重复处理 */ public List getCurrentScheduleTaskItemList() { try { if (this.isNeedReloadTaskItem == true) { // 特别注意:需要判断数据队列是否已经空了,否则可能在队列切换的时候导致数据重复处理 // 主要是在线程不休眠就加载数据的时候一定需要这个判断 if (this.processor != null) { while (this.processor.hasRemainedTask()) { Thread.sleep(50); } } // 真正开始处理数据 synchronized (NeedReloadTaskItemLock) { this.getCurrentScheduleTaskItemListNow(); this.isNeedReloadTaskItem = false; } } this.lastReloadTaskItemListTime = factory.getStorage().getGlobalTime(); return this.currentTaskItemList; } catch (Exception e) { throw new RuntimeException(e); } } /** * Get current schedule server's task items * * @return * @throws Exception */ private List getTaskItemsShouldScheduled() throws Exception { List taskItems = factory.getStorage().getTaskItems(currentServer().getTaskName(), currentServer().getOwnSign()); List result = new ArrayList(); for (TaskItemRuntime item : taskItems) { if (StringUtils.equals(currentServer().getUuid(), item.getCurrentScheduleServer())) { // current server result.add(new TaskItem(item.getTaskItem(), item.getDealParameter())); } } return result; } /** * @throws Exception */ private void releaseTaskItemsIfNeeded() throws Exception { int released = factory.getStorage().releaseTaskItemByOwner(currentServer().getTaskName(), currentServer().getOwnSign(), currentServer().getUuid()); if (released > 0) { // Request to reload factory.getStorage().increaseServerSchedulingVersion(currentServer().getTaskName(), currentServer().getOwnSign()); } } //由于上面在数据执行时有使用到synchronized ,但是心跳线程并没有对应加锁。 //所以在此方法上加一下synchronized。20151015 private synchronized List getCurrentScheduleTaskItemListNow() throws Exception { //如果已经稳定了,理论上不需要加载去扫描所有的叶子结点 //20151019 by kongxuan.zlj try { List uuidList = factory.getStorage().getServerUuidList(this.currentServer().getTaskName(), this.currentServer().getOwnSign()); // server下面的机器节点的运行时环境是否在刷新,如果 isExistZombieServ(this.currentServer().getTaskName(), this.currentServer().getOwnSign(), uuidList); } catch (Exception e) { logger.error("zombie serverList exists", e); } //获取最新的版本号 this.lastFetchVersion = factory.getStorage().getServerSchedulingVersion(this.currentServer().getTaskName(), this.currentServer().getOwnSign()); logger.debug("this.currentServer().getTaskType()={}, need reload={}", this.currentServer().getRunningEntry(), isNeedReloadTaskItem); try{ // Release task items if any releaseTaskItemsIfNeeded(); //重新查询当前服务器能够处理的队列 //为了避免在休眠切换的过程中出现队列瞬间的不一致,先清除内存中的队列 this.currentTaskItemList.clear(); this.currentTaskItemList = getTaskItemsShouldScheduled(); //如果超过10个心跳周期还没有获取到调度队列,则报警 if(this.currentTaskItemList.size() == 0 && factory.getStorage().getGlobalTime() - this.lastReloadTaskItemListTime > getTask().getHeartBeatRate() * 20) { StringBuffer buf = new StringBuffer(); buf.append("调度服务器"); buf.append(this.currentServer().getUuid()); buf.append("[TASK_TYPE="); buf.append(this.currentServer().getRunningEntry()); buf.append("]自启动以来,超过20个心跳周期,还 没有获取到分配的任务队列;"); buf.append(" currentTaskItemList.size() =" + currentTaskItemList.size()); buf.append(", scheduleCenter.getSystemTime()=" + factory.getStorage().getGlobalTime()); buf.append(", lastReloadTaskItemListTime=" + lastReloadTaskItemListTime); buf.append(", taskTypeInfo.getHeartBeatRate()=" + getTask().getHeartBeatRate() * 10); logger.warn(buf.toString()); } if (this.currentTaskItemList.size() > 0) { // 更新时间戳 this.lastReloadTaskItemListTime = factory.getStorage().getGlobalTime(); } return this.currentTaskItemList; } catch (Throwable e) { this.lastFetchVersion = -1; // 必须把把版本号设置小,避免任务加载失败 if (e instanceof Exception) { throw (Exception) e; } else { throw new Exception(e); } } } public int getTaskItemCount(){ return this.taskItemCount; } private int cleanExpiredServer(long expirationInMillis) throws Exception { String taskName = currentServer().getTaskName(); String ownSign = currentServer().getOwnSign(); List list = factory.getStorage().getServerUuidList(taskName, ownSign); long now = factory.getStorage().getGlobalTime(); int clean_cnt = 0; for (String uuid : list) { ScheduleServer server = factory.getStorage().getServer(taskName, ownSign, uuid); if (server == null) { continue; } if (now - server.getHeartBeatTime().getTime() > expirationInMillis) { factory.getStorage().removeServer(taskName, ownSign, server.getUuid()); clean_cnt ++; } } return clean_cnt; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy