All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xin.bluesky.leiothrix.server.action.allocate.TableOneByOnePartitionAllocator Maven / Gradle / Ivy

The newest version!
package xin.bluesky.leiothrix.server.action.allocate;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import xin.bluesky.leiothrix.common.util.CollectionsUtils2;
import xin.bluesky.leiothrix.model.task.TaskConfig;
import xin.bluesky.leiothrix.model.task.partition.PartitionTask;
import xin.bluesky.leiothrix.server.action.exception.NoTaskException;
import xin.bluesky.leiothrix.server.action.exception.WaitAndTryLaterException;
import xin.bluesky.leiothrix.server.storage.TableStorage;
import xin.bluesky.leiothrix.server.storage.TaskStorage;

import java.util.ArrayList;
import java.util.List;

/**
 * 按照表为维度,一张表一张表地处理.
 *
 * 

假设有两张表table-1和table-2需要处理,则需要在table-1全部处理完毕之后再处理table-2的数据(比如数据清洗), * 此时需要保证表之间严格有序.在table-1处理完之前,即使有空闲worker,也要等待. *

* * @author 张轲 */ public class TableOneByOnePartitionAllocator extends AbstractPartitionAllocator { public static final Logger logger = LoggerFactory.getLogger(TableOneByOnePartitionAllocator.class); @Override protected List getAllTables(String taskId) { // 从config中取能保证表的有序,即按照从前往后的配置顺序 TaskConfig taskConfig = TaskStorage.getTaskConfig(taskId); List tableList = taskConfig.getTableNameList(); // 如果application没有配table列表,那就只能从zookeeper中取了,这往往意味这application配置不正确 if (CollectionsUtils2.isEmpty(tableList)) { logger.warn("application没有传table列表,这样应该是不正确的!"); return super.getAllTables(taskId); } return tableList; } /** * 这种场景的分配调度比较复杂. * 在不考虑tableStatus为WAIT_FOR_REALLOCATE的情况下: * 1. 如表状态为已结束,则可进入下一张表去分配range * 2. 如表状态为处理中,则抛出{@link WaitAndTryLaterException},通知worker等待该表处理完成 * 3. 如表状态为未分配,则进入.此时所有range都是未分配的(因为取未分配的任务片的原则是一次性将table下的所有未分配range都取出来),所以一定能够找到未分配range来返回 * * 考虑tableStatus为WAIT_FOR_REALLOCATE的情况下,此时该表下可能部分range是已结束,部分range是处理中, * 但一定有range是未分配的,因为在判定range处理超时的时候才会给table置WAIT_FOR_REALLOCATE状态,同时重置超时range为未分配状态. * * @param taskId taskId * @param tableNameList table name list * @return partition task list * @throws NoTaskException if no task can be found * @throws WaitAndTryLaterException if client need wait for current table finished */ @Override public List findRange(String taskId, List tableNameList) throws NoTaskException, WaitAndTryLaterException { List result = new ArrayList(); for (int i = 0; i < tableNameList.size(); i++) { String tableName = tableNameList.get(i); switch (TableStorage.getStatus(taskId, tableName)) { case FINISHED: break; case PROCESSING: if (i == tableNameList.size() - 1) {//如果是最后一张表,则无需worker再等待 throw new NoTaskException(); } else {//该表尚未结束,通知worker等待并稍后尝试 throw new WaitAndTryLaterException(); } case UNALLOCATED: case WAIT_FOR_REALLOCATE: RangeScanner scanner = new RangeScanner(taskId, tableName); RangeScanResult scanResult = scanner.scan(); result = preAllocate(taskId, tableName, scanResult.getUnallocatedRangeNameList()); break; default://这不应该发生 throw new NoTaskException(); } if (!result.isEmpty()) { return result; } } // 如果执行到这还没有未分配任务片,则表明所有table都是FINISHED状态了,抛出NoTask的异常 throw new NoTaskException(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy