All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.shoulder.batch.service.impl.BatchManager Maven / Gradle / Ivy

Go to download

Shoulder 扩展-批处理模块,提供批量数据导入、导出、异步校验、导入历史记录管理等能力。

The newest version!
package org.shoulder.batch.service.impl;

import org.shoulder.batch.constant.BatchConstants;
import org.shoulder.batch.enums.BatchDetailResultStatusEnum;
import org.shoulder.batch.log.ShoulderBatchLoggers;
import org.shoulder.batch.model.BatchData;
import org.shoulder.batch.model.BatchDataSlice;
import org.shoulder.batch.model.BatchRecord;
import org.shoulder.batch.model.BatchRecordDetail;
import org.shoulder.batch.progress.BatchProgressRecord;
import org.shoulder.batch.progress.ProgressAble;
import org.shoulder.batch.repository.BatchRecordDetailPersistentService;
import org.shoulder.batch.repository.BatchRecordPersistentService;
import org.shoulder.batch.spi.DefaultTaskSplitHandler;
import org.shoulder.batch.spi.TaskSplitHandler;
import org.shoulder.core.context.AppContext;
import org.shoulder.core.exception.CommonErrorCodeEnum;
import org.shoulder.core.log.Logger;
import org.shoulder.core.util.AssertUtils;
import org.shoulder.core.util.ContextUtils;
import org.shoulder.log.operation.context.OpLogContextHolder;
import org.shoulder.log.operation.enums.OperationResult;

import java.util.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.stream.Collectors;

/**
 * 批处理管理员
 * 负责分成小的任务,交给 Worker 执行
 *
 * @author lym
 */
public class BatchManager implements Runnable, ProgressAble {

    protected final static Logger log = ShoulderBatchLoggers.DEFAULT;

    /**
     * 异步工作单元的最大数量
     */
    private static final int MAX_WORKER_SIZE = 4;

    /**
     * 线程池
     */
    protected ExecutorService threadPool = ContextUtils.getBean(BatchConstants.BATCH_THREAD_POOL_NAME);

    /**
     * 批量处理记录
     */
    protected BatchRecordPersistentService batchRecordPersistentService =
            ContextUtils.getBean(BatchRecordPersistentService.class);

    /**
     * 批处理记录详情
     */
    protected BatchRecordDetailPersistentService batchRecordDetailPersistentService =
            ContextUtils.getBean(BatchRecordDetailPersistentService.class);

    // ------------------------------------------------

    /**
     * 操作用户
     */
    protected Long userId;
    /**
     * 语言标识
     */
    protected String languageId;

    /**
     * 本次要批量处理的数据
     */
    protected BatchData batchData;

    /**
     * 当前进度
     */
    protected BatchProgressRecord progress;

    /**
     * 本次任务结果汇总
     */
    protected BatchRecord result;

    /**
     * 任务队列,任务向这里丢
     * todo 【扩展性】后续考虑共享队列,将属性抽出来,一个 manager 可以同时处理多次批处理任务
     */
    protected BlockingQueue jobQueue;

    /**
     * 结果队列,结果从这里取
     */
    protected BlockingQueue resultQueue;

    public BatchManager(BatchData batchData) {
        batchData.setBatchId(generateBatchId());
        AssertUtils.notNull(batchData, CommonErrorCodeEnum.ILLEGAL_PARAM);
        AssertUtils.notNull(batchData.getDataType(), CommonErrorCodeEnum.ILLEGAL_PARAM);
        AssertUtils.notNull(batchData.getOperation(), CommonErrorCodeEnum.ILLEGAL_PARAM);
        AssertUtils.notEmpty(batchData.getDataList(), CommonErrorCodeEnum.ILLEGAL_PARAM);

        String currentUserId = AppContext.getUserId();
        this.userId = currentUserId == null ? 0 : Long.parseLong(currentUserId);
        this.languageId = AppContext.getLocaleOrDefault().toString();
        this.batchData = batchData;

        // 初始化进度对象(保证在构造器中完成)
        this.progress = new BatchProgressRecord();
        this.progress.setId(batchData.getBatchId());
        // 这里的 total 不一定是最终的,分片后才能完全确认 total
        this.progress.setTotal(batchData.getDataList().size());
    }

    private static String generateBatchId() {
        return UUID.randomUUID().toString();
    }

    /**
     * 职责:任务分片,安排工人,汇总结果,保存批处理记录
     */
    @Override
    public void run() {

        log.debug("batch task start, dataType={}, operation={}", batchData.getDataType(), batchData.getOperation());

        // 任务分片,初始化任务队列、结果队列
        List batchSliceList = splitTask(batchData);

        int jobSize = batchSliceList.size();
        AssertUtils.isTrue(jobSize > 0, CommonErrorCodeEnum.ILLEGAL_PARAM, "after splitTask, jobSize can't be 0");
        jobQueue = new LinkedBlockingQueue<>(jobSize);
        jobQueue.addAll(batchSliceList);

        // 安排工人
        int dataItemTotalNum = batchSliceList.stream()
                .map(BatchDataSlice::calculateDataSize)
                .reduce(Integer::sum).orElse(0);
        // 最大不能超过 MAX_WORKER_SIZE
        int workerNum = Integer.min(MAX_WORKER_SIZE, jobSize);
        resultQueue = new LinkedBlockingQueue<>(dataItemTotalNum);
        if (dataItemTotalNum < 2) {
            // 数据量极少还用批处理框架通常是不符合预期的,打印 warn
            log.warn("batch task split and Total only={}! subJobNum={}, workNum={}", dataItemTotalNum, jobSize, workerNum);
        } else {
            log.debug("batch task split, total={}, subJobNum={}, workNum={}", dataItemTotalNum, jobSize, workerNum);
        }
        this.progress.setTotal(dataItemTotalNum);
        compositeBatchRecords(dataItemTotalNum);
        progress.start();

        // 开始分配任务
        for (int i = 0; i < workerNum - 1; i++) {
            BatchProcessor worker = new BatchProcessor(batchData.getBatchId(), jobQueue, resultQueue);
            if (!canEmployWorker(worker)) {
                // 提交失败,说明当且服务器较忙,无法雇佣工人,因此中断委派,转由当前线程执行全部任务
                log.warnWithErrorCode(CommonErrorCodeEnum.SERVER_BUSY.getCode(),
                        "employ workers fail, fail back to execute by current, it may cost more time.");
                break;
            }
        }
        // 当且线程也参与做任务
        BatchProcessor worker = new BatchProcessor(batchData.getBatchId(), jobQueue, resultQueue);
        worker.run();

        // 阻塞式处理结果
        handleResult(dataItemTotalNum);
        progress.finish();
        //result.setTotalNum();
        result.setSuccessNum(progress.getSuccessNum());
        result.setFailNum(progress.getFailNum());
        // todo 【进阶】持久化时机优化
        persistentImportRecord();
        fillOperationLog();
        log.info("batch task finished.");
    }

    /**
     * 提前组装批处理记录,方便后续写入到数据库
     */
    private void compositeBatchRecords(int total) {
        // 初始化数据处理结果对象 Record todo total
        this.result = BatchRecord.builder()
                .id(batchData.getBatchId())
                .dataType(batchData.getDataType())
                .operation(batchData.getOperation())
                .totalNum(total)
                .createTime(new Date())
                .creator(userId)
                .build();

        // 初始化数据处理详情对象 List
        this.result.setDetailList(new ArrayList<>(total));
    }


    // ================================= 任务分片与执行 ==================================

    /**
     * 任务分片
     *
     * @param batchData 所有数据
     * @return 分片后的
     */
    protected List splitTask(BatchData batchData) {
        Map taskSplitHandlerMap = ContextUtils.getBeansOfType(TaskSplitHandler.class);

        List result = taskSplitHandlerMap.values().stream()
                .filter(s -> !(s instanceof DefaultTaskSplitHandler))
                .filter(s -> s.support(batchData))
                .findFirst()
                .map(s -> s.splitTask(batchData))
                .orElse(null);
        if (result != null) {
            return result;
        }

        TaskSplitHandler defaultTaskSplitHandler = ContextUtils.getBean(DefaultTaskSplitHandler.class);
        return defaultTaskSplitHandler.splitTask(batchData);
    }

    /**
     * 雇佣工人(提交到线程池)
     *
     * @param worker 工作线程
     */
    private boolean canEmployWorker(BatchProcessor worker) {
        try {
            threadPool.execute(worker);
            return true;
        } catch (Exception e) {
            // 这里有可能线程池满了,拒绝执行
            log.warn(CommonErrorCodeEnum.SERVER_BUSY, e);
            return false;
        }
    }

    // ================================= 处理结果 ==================================

    /**
     * 处理结果
     *
     * @param n 期待多少个结果
     */
    private void handleResult(int n) {
        for (int i = 0; i < n; i++) {
            // worker 未捕获的异常会交给 UncaughtExceptionHandler,这里设计时让worker保证一定返回结果
            BatchRecordDetail taskResultDetail = takeUnExceptInterrupted(resultQueue);
//            if (taskResultDetail.isCalculateProgress()) {
            boolean success = BatchDetailResultStatusEnum.SUCCESS.getCode() == taskResultDetail.getStatus();
            if (success) {
                progress.addSuccess(1);
            } else {
                progress.addFail(1);
            }
            // 调度者只能修改处理结果和原因
            taskResultDetail.setOperation(batchData.getOperation());
            taskResultDetail.setRecordId(batchData.getBatchId());
            result.getDetailList().add(taskResultDetail);
//            }
        }
        if (!jobQueue.isEmpty()) {
            // 已经结束,不应该还有
            //throw new IllegalStateException("jobQueue not empty")
            log.errorWithErrorCode(CommonErrorCodeEnum.CODING.getCode(), "jobQueue not empty!");
            jobQueue.clear();
        }
    }

    /**
     * 自信取,自信地认为不会比中断
     */
    private  T takeUnExceptInterrupted(BlockingQueue queue) {
        try {
            return queue.take();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }

    // ================================= 后置,如保存记录 ==================================

    /**
     * 根据任务处理的总体结果,增加一条处理记录
     */
    protected void persistentImportRecord() {
        if (!batchData.isPersistentRecord()) {
            return;
        }
        boolean notAllSetSourceStr = result.getDetailList().stream()
                .map(BatchRecordDetail::getSource)
                .anyMatch(Objects::isNull);
        AssertUtils.isFalse(notAllSetSourceStr, CommonErrorCodeEnum.CODING, "impl need invoke setSource().");

        try {
            batchRecordPersistentService.insert(result);
            // 当前: 最后保存一次, 简单。
            // 考虑:事前保存一次,每个 worker 持久化,最终更新;满足事务一致性,同时避免大事务

            batchRecordDetailPersistentService.batchSave(result.getId(),
                    result.getDetailList().stream()
                            .sorted(Comparator.comparingInt(BatchRecordDetail::getIndex))
                            .collect(Collectors.toList())
            );
        } catch (Exception e) {
            log.warnWithErrorCode(CommonErrorCodeEnum.DATA_STORAGE_FAIL.getCode(), "persistentImportRecord fail", e);
            throw CommonErrorCodeEnum.DATA_STORAGE_FAIL.toException(e);
        }
    }

    /**
     * 填充操作日志
     */
    private void fillOperationLog() {
        // 根据处理结果判断总体结果
        OperationResult opResult = OperationResult.of(progress.getSuccessNum() > 0, progress.getFailNum() > 0);
        if (OpLogContextHolder.getContext() == null || OpLogContextHolder.getLog() == null) {
            // 当前没有上下文
            return;
        }
        OpLogContextHolder.getLog().setResult(opResult)
                .addDetailItem(String.valueOf(progress.getSuccessNum()))
                .addDetailItem(String.valueOf(progress.getFailNum()))
                .setObjectId(batchData.getBatchId())
                .setObjectType(batchData.getDataType());
        OpLogContextHolder.enableAutoLog();
    }

    public String getBatchId() {
        return batchData.getBatchId();
    }

    public BatchProgressRecord getBatchProgress() {
        return progress;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy