All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.schedulerx.worker.master.ParallelTaskMater Maven / Gradle / Ivy

There is a newer version: 1.12.2
Show newest version
package com.alibaba.schedulerx.worker.master;

import java.io.IOException;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.collections.CollectionUtils;

import com.alibaba.schedulerx.common.constants.CommonConstants;
import com.alibaba.schedulerx.common.domain.JobInstanceInfo;
import com.alibaba.schedulerx.common.domain.MapTaskXAttrs;
import com.alibaba.schedulerx.common.domain.TaskDispatchMode;
import com.alibaba.schedulerx.common.domain.TaskProgressCounter;
import com.alibaba.schedulerx.common.domain.TaskStatus;
import com.alibaba.schedulerx.common.domain.WorkerProgressCounter;
import com.alibaba.schedulerx.common.util.ConfigUtil;
import com.alibaba.schedulerx.common.util.IdUtil;
import com.alibaba.schedulerx.common.util.JsonUtil;
import com.alibaba.schedulerx.protocol.Server.RetryTaskEntity;
import com.alibaba.schedulerx.protocol.Worker.WorkerReportTaskListStatusRequest;
import com.alibaba.schedulerx.protocol.Worker.WorkerReportTaskListStatusResponse;
import com.alibaba.schedulerx.protocol.utils.FutureUtils;
import com.alibaba.schedulerx.worker.batch.ReqQueue;
import com.alibaba.schedulerx.worker.batch.TMStatusReqHandler;
import com.alibaba.schedulerx.worker.batch.TaskPushReqHandler;
import com.alibaba.schedulerx.worker.discovery.GroupManager;
import com.alibaba.schedulerx.worker.domain.WorkerConstants;
import com.alibaba.schedulerx.worker.log.LogFactory;
import com.alibaba.schedulerx.worker.log.Logger;
import com.alibaba.schedulerx.worker.logcollector.ClientLoggerMessage;
import com.alibaba.schedulerx.worker.logcollector.LogCollector;
import com.alibaba.schedulerx.worker.logcollector.LogCollectorFactory;
import com.alibaba.schedulerx.worker.master.persistence.ServerTaskPersistence;
import com.alibaba.schedulerx.worker.pull.TaskPullReqHandler;
import com.google.common.collect.Lists;
import com.google.protobuf.ByteString;

import akka.actor.ActorContext;

/**
 * ParallelTaskMater, using ServerTaskPersistence
 *
 * @author xiaomeng.hxm
 */
public class ParallelTaskMater extends MapTaskMaster {
    private static final Logger LOGGER = LogFactory.getLogger(ParallelTaskMater.class);
    private LogCollector logCollector = LogCollectorFactory.get();

    private static final Integer BATCH_SIZE = 256;

    public ParallelTaskMater(JobInstanceInfo jobInstanceInfo, ActorContext actorContext) throws Exception {
        super(jobInstanceInfo, actorContext);
        this.taskPersistence = new ServerTaskPersistence(jobInstanceInfo.getGroupId());
        long jobInstanceId = jobInstanceInfo.getJobInstanceId();
        taskStatusReqQueue = new ReqQueue<>(jobInstanceId, 1024);
        taskStatusReqBatchHandler = new TMStatusReqHandler<>(jobInstanceId, 1, 1, BATCH_SIZE*2*jobInstanceInfo.getAllWorkers().size(), taskStatusReqQueue);
        taskBlockingQueue = new ReqQueue<>(jobInstanceId, BATCH_SIZE * 4);
        if (jobInstanceInfo.getXattrs() != null) {
            this.xAttrs = JsonUtil.fromJson(jobInstanceInfo.getXattrs(), MapTaskXAttrs.class);
        }
        if (xAttrs != null && xAttrs.getTaskDispatchMode().equals(TaskDispatchMode.PULL.getValue())) {
            taskDispatchReqHandler = new TaskPullReqHandler<>(jobInstanceId, 1, 2, BATCH_SIZE * jobInstanceInfo.getAllWorkers().size(), taskBlockingQueue);
        } else {
            int batchSize = BATCH_SIZE * jobInstanceInfo.getAllWorkers().size();
            if(isWorkerLoadRouter()) {
                batchSize = 2 * jobInstanceInfo.getAllWorkers().size();
            }
            Long dispatchDelay = parseDispatchSpeed();
            if (dispatchDelay != null) {
                batchSize = 1;
            }
            taskDispatchReqHandler = new TaskPushReqHandler<>(jobInstanceId, 1, 2, batchSize, taskBlockingQueue, BATCH_SIZE, dispatchDelay);
        }
    }

    @Override
    public boolean map(List taskList, String taskName) throws Exception {
        String uniqueId = IdUtil.getUniqueIdWithoutTask(jobInstanceInfo.getJobId(), jobInstanceInfo.getJobInstanceId());
        if (CollectionUtils.isEmpty(taskList)) {
            LOGGER.warn("map taskList is empty, taskName:{}", taskName);
            logCollector.collect(jobInstanceInfo.getAppGroupId(), uniqueId, ClientLoggerMessage.appendMessage(ClientLoggerMessage.MAP_INSTANCE_FAIL,
                    "map taskList is empty"), jobInstanceInfo.getGroupId());
            return false;
        }

        LOGGER.info("map taskList, jobInstanceId={}, taskName={}, taskList size={}", jobInstanceInfo.getJobInstanceId(), taskName, taskList.size());
        int counter = taskCounter.addAndGet(taskList.size());
        int defaultTaskMaxSize = GroupManager.INSTANCE.isAdvancedVersion(jobInstanceInfo.getGroupId())?WorkerConstants.PARALLEL_TASK_LIST_SIZE_MAX_ADVANCED:WorkerConstants.PARALLEL_TASK_LIST_SIZE_MAX;
        int parallelTaskMaxSize = ConfigUtil.getWorkerConfig().getInt(CommonConstants.WORKER_PARALLEL_TASK_MAX_SIZE, defaultTaskMaxSize);
        if (counter > parallelTaskMaxSize) {
            LOGGER.error("jobInstanceId={}, task counter={}, beyond {} !", jobInstanceInfo.getJobInstanceId(), counter, parallelTaskMaxSize);
            logCollector.collect(jobInstanceInfo.getAppGroupId(), uniqueId, ClientLoggerMessage.appendMessage(ClientLoggerMessage.MAP_INSTANCE_FAIL,
                    "task list size beyond "+parallelTaskMaxSize), jobInstanceInfo.getGroupId());
            throw new IOException("task list size beyond " + parallelTaskMaxSize + " !");
        }
        return super.map(taskList, taskName);
    }

    @Override
    public void retryTasks(List taskEntities) {
        // update tasks' status to INIT
        List taskIdList = Lists.newArrayList();
        for (RetryTaskEntity taskEntity: taskEntities) {
            if (taskEntity.getTaskName().equals(WorkerConstants.MAP_TASK_ROOT_NAME)) {
                LOGGER.warn("root task can't retry");
            } else {
                taskIdList.add(taskEntity.getTaskId());
            }
        }
        // 用于支持二级子任务重试,防止taskId重复
        taskIdGenerator = new AtomicLong(System.currentTimeMillis()/1000);

        WorkerReportTaskListStatusRequest request = WorkerReportTaskListStatusRequest.newBuilder()
                .setJobInstanceId(jobInstanceInfo.getJobInstanceId())
                .addAllTaskId(taskIdList)
                .setStatus(TaskStatus.INIT.getValue())
                .build();
        try {
            WorkerReportTaskListStatusResponse response = (WorkerReportTaskListStatusResponse) FutureUtils.awaitResult(
                SERVER_DISCOVERY.getMapMasterRouter(), request, 30);
            if (response.getSuccess()) {
                if (!INITED) {
                    //如果没有初始化过,重新初始化
                    startBatchHandler();
                    init();
                    for (RetryTaskEntity taskEntity: taskEntities) {
                        String taskName = taskEntity.getTaskName();
                        if (!taskProgressMap.containsKey(taskName)) {
                            synchronized (this) {
                                if (!taskProgressMap.containsKey(taskName)) {
                                    TaskProgressCounter taskProgressCounter = new TaskProgressCounter(taskName);
                                    taskProgressMap.put(taskName, taskProgressCounter);
                                }
                            }
                        }
                        taskProgressMap.get(taskName).incrementTotal();
                    }
                } else {
                    for (RetryTaskEntity taskEntity: taskEntities) {
                        String taskName = taskEntity.getTaskName();
                        String workerAddr = taskEntity.getWorkerAddr();
                        int oldStatus = taskEntity.getOldStatus();
                        if (taskProgressMap.containsKey(taskName)) {
                            TaskProgressCounter taskProgressCounter = taskProgressMap.get(taskName);
                            if (oldStatus == TaskStatus.SUCCESS.getValue()) {
                                taskProgressCounter.decrementSuccess();
                            } else if (oldStatus == TaskStatus.FAILED.getValue()) {
                                taskProgressCounter.decrementFailed();
                            }
                        }
                        if (workerProgressMap.containsKey(workerAddr)) {
                            WorkerProgressCounter workerProgressCounter = workerProgressMap.get(workerAddr);
                            if (oldStatus == TaskStatus.SUCCESS.getValue()) {
                                workerProgressCounter.decrementSuccess();
                            } else if (oldStatus == TaskStatus.FAILED.getValue()) {
                                workerProgressCounter.decrementFailed();
                            }
                        }
                    }
                }
            } else {
                LOGGER.error("", response.getMessage());
                //TODO 发送失败应该尝试另一个server
            }
        } catch (Throwable e) {
            LOGGER.error("", e);
            //TODO 发送失败应该尝试另一个server
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy