All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.olapdb.obase.data.ParallelScanner Maven / Gradle / Ivy

The newest version!
package com.olapdb.obase.data;

import com.olapdb.obase.utils.Obase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.client.AbstractClientScanner;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.BlockingDeque;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit;

/**
 * Implements the scanner interface for the HBase client.
 * If there are multiple regions in a table, this scanner will iterate
 * through them all in parallel.
 */
public class ParallelScanner extends AbstractClientScanner {
    private final Log LOG = LogFactory.getLog(this.getClass());

    // special marker to indicate when a scanning task has finished
    private static final Result MARKER = new Result();
    // the size limited buffer to use
    private BlockingDeque results;
    // the thread pool that is used to execute tasks
    private ForkJoinPool pool;
    // number of scanning tasks still running
    private int taskCount;

    private volatile IOException exception;


    public ParallelScanner(Class entityClass, Scan scan,
                           int threads) throws IOException {
        this(entityClass, scan, threads, threads * scan.getCaching());
    }

    public ParallelScanner(Class entityClass, Scan scan,
                           int threads, int bufferSize) throws IOException {
        init(entityClass, scan, bufferSize, threads);
    }

    private void init(Class entityClass, Scan scan,
                      int bufferSize, int threads) throws IOException {
        bufferSize = bufferSize>0?bufferSize:1000;

        results = new LinkedBlockingDeque(bufferSize);

        List locs = Obase.getRegionsInRange(entityClass, scan.getStartRow(), scan.getStopRow(), false, false);
        LOG.debug("Found "+locs.size()+" regions.");
        if (locs.size() == 0) return;

        taskCount = locs.size();
        List scans = new Vector<>();

        // submit the tasks (one per region)
        int i=0;
        for (HRegionLocation loc : locs) {
            Scan s = new Scan(scan);
            s.withStartRow(i==0?scan.getStartRow() : loc.getRegion().getStartKey());
            i++;
            s.withStopRow(i==locs.size()?scan.getStopRow() : loc.getRegion().getEndKey());
            LOG.debug("Submitting "+Bytes.toStringBinary(s.getStartRow()));
            scans.add(s);
        }

        pool = new ForkJoinPool(threads>taskCount?threads:taskCount);
        pool.submit(()->{
            scans.stream().parallel().forEach(e->{
                LOG.debug("Starting " + Bytes.toStringBinary(e.getStartRow()));
                try {
                    ResultScanner rs = Obase.getTable(entityClass).getScanner(e);
                    for(Result r: rs){
                        results.put(r);
                    }
                    results.put(MARKER);
                }catch(Exception x){
                    if (this.exception == null) {
                        this.exception = x instanceof IOException ? (IOException) x : new IOException(x);
                    }
                }
                LOG.debug("Finished " + Bytes.toStringBinary(e.getStartRow()));
            });
        });
    }

    @Override
    public Result next() throws IOException {
        try {
            // if at least one task is active wait for results to arrive.
            Result r;
            while (taskCount > 0 && exception == null) {
                r = results.take();
                // skip markers, adjust task count if needed
                if (r == MARKER) {
                    --taskCount;
                    continue;
                }
                return r;
            }

            if (exception != null) {
                throw exception;
            }

            return results.poll();
        } catch (InterruptedException x) {
            Thread.currentThread().interrupt();
            throw new IOException(x);
        }
    }

    @Override
    public void close() {
        LOG.debug("Scan pool closed.");
        results = null;
        pool.shutdownNow();
        try {
            pool.awaitTermination(1, TimeUnit.SECONDS);
        } catch (InterruptedException x) {
            // ignore for now
        }
    }

    @Override
    public boolean renewLease(){
        return true;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy