All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.schedulerx.worker.container.ThreadContainer Maven / Gradle / Ivy

There is a newer version: 1.12.2
Show newest version
package com.alibaba.schedulerx.worker.container;

import java.io.IOException;
import java.util.Collection;
import java.util.concurrent.Future;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;

import com.alibaba.schedulerx.common.constants.CommonConstants;
import com.alibaba.schedulerx.common.domain.InstanceStatus;
import com.alibaba.schedulerx.common.util.ExceptionUtil;
import com.alibaba.schedulerx.protocol.Worker.ContainerReportTaskStatusRequest;
import com.alibaba.schedulerx.worker.SchedulerxWorker;
import com.alibaba.schedulerx.worker.batch.ContainerStatusReqHandlerPool;
import com.alibaba.schedulerx.worker.domain.JobContext;
import com.alibaba.schedulerx.worker.listener.ListenerServiceLoader;
import com.alibaba.schedulerx.worker.listener.ThreadContainerListener;
import com.alibaba.schedulerx.worker.log.LogFactory;
import com.alibaba.schedulerx.worker.log.Logger;
import com.alibaba.schedulerx.worker.logcollector.ClientLoggerMessage;
import com.alibaba.schedulerx.worker.logcollector.LogCollector;
import com.alibaba.schedulerx.worker.logcollector.LogCollectorFactory;
import com.alibaba.schedulerx.worker.processor.JobProcessor;
import com.alibaba.schedulerx.worker.processor.JobProcessorEx;
import com.alibaba.schedulerx.worker.processor.MapJobProcessor;
import com.alibaba.schedulerx.worker.processor.ProcessResult;
import com.alibaba.schedulerx.worker.util.JobProcessorUtil;
import com.alibaba.schedulerx.worker.util.WorkerConfigUtil;
import com.alibaba.schedulerx.worker.util.WorkerIdGenerator;

import akka.actor.ActorSelection;
import akka.actor.Address;

/**
 * @author xiaomeng.hxm
 */
public class ThreadContainer implements MarkedRunnable, Container {

    private JobContext context;
    //BaseProcessor=>JobProcessor when SimpleJobProcessor removed
    private JobProcessor jobProcessor;
    protected ContainerPool containerPool;
    protected ActorSelection masterActorSelection;
    private static final int RESULT_SIZE_MAX = CommonConstants.INSTANCE_RESULT_SIZE_MAX;
    private LogCollector logCollector = LogCollectorFactory.get();
    private static final Logger LOGGER = LogFactory.getLogger(ThreadContainer.class);

    private Future future;

    public ThreadContainer() {}

    public ThreadContainer(JobContext context, ContainerPool containerPool) throws Exception {
        this.context = context;
        this.containerPool = containerPool;
        this.masterActorSelection = SchedulerxWorker.actorSystem.actorSelection(context.getInstanceMasterActorPath());
        if (masterActorSelection == null) {
            String errMsg = "get taskMaster akka path error, path=" + context.getInstanceMasterActorPath();
            LOGGER.error(errMsg);
            throw new IOException(errMsg);
        }
    }

    @Override
    public void run() {
        try {
            start();
        } catch(Throwable t){
            LOGGER.error("start processor thread fail uniqueId={}, serialNum={}", context.getUniqueId(), context.getSerialNum(), t);
            // 返回最终结果状态
            containerPool.remove(context.getUniqueId());
            Address address = SchedulerxWorker.actorSystem.provider().getDefaultAddress();
            String workerAddr = address.host().get() + ":" + address.port().get();
            reportTaskStatus(new ProcessResult(false, "start processor thread failed, uniqueId="+context.getUniqueId()), workerAddr);
            containerPool.removeContext();
        }
    }

    /**
     * 执行前置处理监听
     *
     * @param context
     */
    public void executeBeforeListener(final JobContext context) {
        Collection listeners = ListenerServiceLoader.INSTANCE.getListeners(ThreadContainerListener.class);
        if (CollectionUtils.isNotEmpty(listeners)) {
            for (ThreadContainerListener listener : listeners) {
                try {
                    listener.before(context);
                } catch (Throwable t) {
                    LOGGER.warn("ThreadContainerListener<{}> before exec failed.", listener.getClass().getSimpleName(), t);
                }
            }
        }
    }

    /**
     * 执行前置处理监听
     *
     * @param context
     */
    public void executeAfterListener(final JobContext context, final ProcessResult result) {
        try {
            Collection listeners = ListenerServiceLoader.INSTANCE.getListeners(ThreadContainerListener.class);
            if (CollectionUtils.isNotEmpty(listeners)) {
                for (ThreadContainerListener listener : listeners) {
                    try {
                        listener.after(context, result);
                    } catch (Throwable t) {
                        LOGGER.warn("ThreadContainerListener<{}> after exec failed.", listener.getClass().getSimpleName(), t);
                    }
                }
            }
        } catch (Throwable t) {
            LOGGER.warn("ThreadContainerListener after exec failed. ", t);
        }
    }

    @Override
    public void start() {
        String threadName = ThreadContainerPool.getInstance().genThreadName(context.getJobId(), context.getJobInstanceId(), context.getTaskId());
        Thread.currentThread().setName(threadName);
        // 执行前置监听器
        executeBeforeListener(context);
        containerPool.setContext(context);
        long start = System.currentTimeMillis();
        LOGGER.debug("start run container, uniqueId={}, cost={}ms", context.getUniqueId(),
                (start - context.getScheduleTime().getMillis()));

        ProcessResult result = new ProcessResult(false);
        Address address = SchedulerxWorker.actorSystem.provider().getDefaultAddress();
        String workerAddr = address.host().get() + ":" + address.port().get();
        String uniqueId = context.getUniqueId();

        try {
            if (context.getTaskAttempt() == 0) {
                reportTaskStatus(new ProcessResult(InstanceStatus.RUNNING), workerAddr);
            }
            // 初始化加载Processor
            jobProcessor = JobProcessorUtil.getJobProcessor(context);
            if (jobProcessor != null) {
                try {
                    if ("java".equalsIgnoreCase(context.getJobType()) &&
                            (jobProcessor instanceof MapJobProcessor || context.getExecuteMode().equals("broadcast"))) {
                        result = jobProcessor.process(context);
                    } else {
                        if (jobProcessor instanceof JobProcessorEx) {
                            ((JobProcessorEx) jobProcessor).preProcess(context);
                            result = jobProcessor.process(context);
                            ((JobProcessorEx) jobProcessor).postProcess(context);
                        } else {
                            result = jobProcessor.process(context);
                        }
                    }
                } catch (InterruptedException e1) {
                    throw e1;
                } catch (Throwable t) {
                    LOGGER.error("run fail uniqueId={}, serialNum={}", uniqueId, context.getSerialNum(), t);
                    String fixedErrMsg = ExceptionUtil.getFixedErrMsgByThrowable(t, RESULT_SIZE_MAX);
                    result = new ProcessResult(InstanceStatus.FAILED, fixedErrMsg);
                    logCollector.collect(context.getAppGroupId(), uniqueId, ClientLoggerMessage.JOB_PROCESSOR_EXEC_FAIL, t, context.getGroupId());
                }
                long end = System.currentTimeMillis();
                LOGGER.debug("container run finished, uniqueId={}, cost={}ms", uniqueId, (end - start));
                // processor执行失败打印
                if (result == null) {
                    result = new ProcessResult(InstanceStatus.FAILED, "result can't be null");
                }
//                LOGGER.info(ClientLoggerMessage.JOB_PROCESSOR_EXEC_FAIL + result.getResult());
//                if (result.getStatus() != null && result.getStatus().getValue() == InstanceStatus.FAILED.getValue()) {
//                    logCollector.collect(uniqueId, ClientLoggerMessage.appendMessage(ClientLoggerMessage.JOB_PROCESSOR_EXEC_FAIL, result.getResult()));
//                }
            } else {
                result = new ProcessResult(InstanceStatus.FAILED, "jobProcessor is null");
                logCollector.collect(context.getAppGroupId(), uniqueId, ClientLoggerMessage.appendMessage(ClientLoggerMessage.JOB_PROCESSOR_EXEC_FAIL, result.getResult()),
                    context.getGroupId());
            }

            // map模型子任务(非根任务)失败自动重试
            if (context.getTaskMaxAttempt() > 0 && context.getTaskId() > 0 && result.getStatus().equals(InstanceStatus.FAILED)
                    && Thread.currentThread().isAlive()) {
                int taskAttempt = context.getTaskAttempt();
                if (taskAttempt < context.getTaskMaxAttempt()) {
                    taskAttempt++;
                    Thread.sleep(1000 * context.getTaskAttemptInterval());
                    context.setTaskAttempt(taskAttempt);
                    start();
                    // 重试时无需返回当前结果状态
                    return;
                }
            }
        } catch (Throwable t) {
            LOGGER.error("run fail uniqueId={}, serialNum={}", uniqueId, context.getSerialNum(), t);
            String fixedErrMsg = ExceptionUtil.getFixedErrMsgByThrowable(t, RESULT_SIZE_MAX);
            result = new ProcessResult(InstanceStatus.FAILED, fixedErrMsg);
            logCollector.collect(context.getAppGroupId(), uniqueId, ClientLoggerMessage.JOB_PROCESSOR_EXEC_FAIL, t, context.getGroupId());
        }
        // 返回最终结果状态
        containerPool.remove(context.getUniqueId());
        // remove context与当前执行线程不在一起存在并发问题:
        // 当前一次任务执行完成汇报在最一个任务状态后,下一轮秒级别任务相同的uniqueId会过来,
        // 而此时如果当前线程再remove时删除的实际是下一轮新的Container
        reportTaskStatus(result, workerAddr);
        containerPool.removeContext();
        // 执行后置监听器
        executeAfterListener(context, result);
    }

    @Override
    public void kill(boolean mayInterruptIfRunning) {
        LOGGER.info("kill container, jobInstanceId={}", context.getJobInstanceId());
        if (jobProcessor != null && (jobProcessor instanceof JobProcessorEx)) {
            ((JobProcessorEx) jobProcessor).kill(context);
        }

        if (this.future != null) {
            this.future.cancel(mayInterruptIfRunning);
        }
        containerPool.remove(context.getUniqueId());
    }

    private void reportTaskStatus(ProcessResult result, String workerAddr) {
        ContainerReportTaskStatusRequest.Builder resultBuilder = ContainerReportTaskStatusRequest.newBuilder();
        resultBuilder.setJobId(context.getJobId());
        resultBuilder.setJobInstanceId(context.getJobInstanceId());
        resultBuilder.setTaskId(context.getTaskId());
        resultBuilder.setStatus(result.getStatus().getValue());
        resultBuilder.setWorkerAddr(workerAddr);
        resultBuilder.setWorkerId(WorkerIdGenerator.get());
        resultBuilder.setSerialNum(context.getSerialNum());
        if (StringUtils.isNotBlank(context.getTraceId())) {
            resultBuilder.setTraceId(context.getTraceId());
        }
        resultBuilder.setInstanceMasterActorPath(context.getInstanceMasterActorPath());
        if (context.getTaskName() != null) {
            resultBuilder.setTaskName(context.getTaskName());
        }
        if (result.getResult() != null) {
            resultBuilder.setResult(result.getResult());
        }

        boolean enableShareContainerPool = WorkerConfigUtil.isEnableShareContainerPool();
        boolean submitResult = false;
        if (enableShareContainerPool) {
            submitResult = ContainerStatusReqHandlerPool.INSTANCE.submitReq(0, resultBuilder.build());
        } else {
            submitResult = ContainerStatusReqHandlerPool.INSTANCE.submitReq(context.getJobInstanceId(), resultBuilder.build());
        }
        LOGGER.info("reportTaskStatus instanceId={}, serialNum={}, submitResult={}, processResult={}", context.getUniqueId(), context.getSerialNum(),
                submitResult, result);
        if (!submitResult) {
            masterActorSelection.tell(resultBuilder.build(), null);
        }
    }

    public JobContext getContext() {
        return context;
    }

    public void setContext(JobContext context) {
        this.context = context;
    }

    public Future getFuture() {
        return future;
    }

    public void setFuture(Future future) {
        this.future = future;
    }

    public JobProcessor getJobProcessor() {
        return jobProcessor;
    }

    @Override
    public Long identify() {
        return context.getJobInstanceId();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy