All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.phoenix.coprocessor.HashJoinRegionScanner Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.phoenix.coprocessor;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.regionserver.ScannerContext;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.phoenix.cache.GlobalCache;
import org.apache.phoenix.cache.HashCache;
import org.apache.phoenix.cache.TenantCache;
import org.apache.phoenix.coprocessorclient.HashJoinCacheNotFoundException;
import org.apache.phoenix.execute.TupleProjector;
import org.apache.phoenix.execute.TupleProjector.ProjectedValueTuple;
import org.apache.phoenix.expression.Expression;
import org.apache.phoenix.expression.KeyValueColumnExpression;
import org.apache.phoenix.iterate.RegionScannerFactory;
import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr;
import org.apache.phoenix.join.HashJoinInfo;
import org.apache.phoenix.parse.JoinTableNode.JoinType;
import org.apache.phoenix.schema.IllegalDataException;
import org.apache.phoenix.schema.KeyValueSchema;
import org.apache.phoenix.schema.ValueBitSet;
import org.apache.phoenix.schema.tuple.MultiKeyValueTuple;
import org.apache.phoenix.schema.tuple.PositionBasedResultTuple;
import org.apache.phoenix.schema.tuple.ResultTuple;
import org.apache.phoenix.schema.tuple.SingleKeyValueTuple;
import org.apache.phoenix.schema.tuple.Tuple;
import org.apache.phoenix.util.ClientUtil;
import org.apache.phoenix.util.EnvironmentEdgeManager;
import org.apache.phoenix.util.TupleUtil;

import static org.apache.phoenix.util.ScanUtil.getDummyResult;
import static org.apache.phoenix.util.ScanUtil.getPageSizeMsForRegionScanner;
import static org.apache.phoenix.util.ScanUtil.isDummy;

public class HashJoinRegionScanner implements RegionScanner {

    private final RegionScanner scanner;
    private final TupleProjector projector;
    private final HashJoinInfo joinInfo;
    private final RegionCoprocessorEnvironment env;
    private Queue resultQueue;
    private boolean hasMore;
    private long count;
    private long limit;
    private HashCache[] hashCaches;
    private List[] tempTuples;
    private ValueBitSet tempDestBitSet;
    private ValueBitSet[] tempSrcBitSet;
    private final boolean useQualifierAsListIndex;
    private final boolean useNewValueColumnQualifier;
    private final boolean addArrayCell;
    private final long pageSizeMs;

    @SuppressWarnings("unchecked")
    public HashJoinRegionScanner(RegionScanner scanner, Scan scan, TupleProjector projector,
                                 HashJoinInfo joinInfo, ImmutableBytesPtr tenantId,
                                 RegionCoprocessorEnvironment env, boolean useQualifierAsIndex,
                                 boolean useNewValueColumnQualifier)
        throws IOException {

        this(env, scanner, scan, null, null, projector, joinInfo,
             tenantId, useQualifierAsIndex, useNewValueColumnQualifier);
    }

    @SuppressWarnings("unchecked")
    public HashJoinRegionScanner(RegionCoprocessorEnvironment env, RegionScanner scanner, Scan scan,
                                 final Set arrayKVRefs,
                                 final Expression[] arrayFuncRefs, TupleProjector projector,
                                 HashJoinInfo joinInfo, ImmutableBytesPtr tenantId,
                                 boolean useQualifierAsIndex, boolean useNewValueColumnQualifier)
        throws IOException {

        this.env = env;
        this.scanner = scanner;
        this.projector = projector;
        this.joinInfo = joinInfo;
        this.resultQueue = new LinkedList();
        this.hasMore = true;
        this.count = 0;
        this.limit = Long.MAX_VALUE;
        for (JoinType type : joinInfo.getJoinTypes()) {
            if (type != JoinType.Inner && type != JoinType.Left && type != JoinType.Semi && type != JoinType.Anti)
                throw new DoNotRetryIOException("Got join type '" + type + "'. Expect only INNER or LEFT with hash-joins.");
        }
        if (joinInfo.getLimit() != null) {
            this.limit = joinInfo.getLimit();
        }
        int count = joinInfo.getJoinIds().length;
        this.tempTuples = new List[count];
        this.hashCaches = new HashCache[count];
        this.tempSrcBitSet = new ValueBitSet[count];
        TenantCache cache = GlobalCache.getTenantCache(env, tenantId);
        for (int i = 0; i < count; i++) {
            ImmutableBytesPtr joinId = joinInfo.getJoinIds()[i];
            if (joinId.getLength() == 0) { // semi-join optimized into skip-scan
                hashCaches[i] = null;
                tempSrcBitSet[i] = null;
                tempTuples[i] = null;
                continue;
            }
            HashCache hashCache = (HashCache)cache.getServerCache(joinId);
            if (hashCache == null) {
                Exception cause = new HashJoinCacheNotFoundException(Bytes.toLong(joinId.get()));
                throw new DoNotRetryIOException(cause.getMessage(), cause);
            }

            hashCaches[i] = hashCache;
            tempSrcBitSet[i] = ValueBitSet.newInstance(joinInfo.getSchemas()[i]);
        }
        if (this.projector != null) {
            this.tempDestBitSet = ValueBitSet.newInstance(joinInfo.getJoinedSchema());
            this.projector.setValueBitSet(tempDestBitSet);
        }
        this.useQualifierAsListIndex = useQualifierAsIndex;
        this.useNewValueColumnQualifier = useNewValueColumnQualifier;
        this.addArrayCell = (arrayFuncRefs != null && arrayFuncRefs.length > 0 &&
                             arrayKVRefs != null && arrayKVRefs.size() > 0);
        this.pageSizeMs = getPageSizeMsForRegionScanner(scan);
    }

    private void processResults(List result, boolean hasBatchLimit) throws IOException {
        if (result.isEmpty())
            return;
        Tuple tuple = useQualifierAsListIndex ? new PositionBasedResultTuple(result) : new ResultTuple(Result.create(result));
        boolean projected = false;

        // For backward compatibility. In new versions, HashJoinInfo.forceProjection()
        // always returns true.
        if (joinInfo.forceProjection()) {
            tuple = projector.projectResults(tuple, useNewValueColumnQualifier);
            projected = true;
        }

        // TODO: fix below Scanner.next() and Scanner.nextRaw() methods as well.
        if (hasBatchLimit)
            throw new UnsupportedOperationException("Cannot support join operations in scans with limit");

        int count = joinInfo.getJoinIds().length;
        boolean cont = true;
        for (int i = 0; i < count; i++) {
            if (!(joinInfo.earlyEvaluation()[i]) || hashCaches[i] == null)
                continue;
            ImmutableBytesPtr key = TupleUtil.getConcatenatedValue(tuple, joinInfo.getJoinExpressions()[i]);
            tempTuples[i] = hashCaches[i].get(key);
            JoinType type = joinInfo.getJoinTypes()[i];
            if (((type == JoinType.Inner || type == JoinType.Semi) && tempTuples[i] == null)
                    || (type == JoinType.Anti && tempTuples[i] != null)) {
                cont = false;
                break;
            }
        }
        if (cont) {
            if (projector == null) {
                int dup = 1;
                for (int i = 0; i < count; i++) {
                    dup *= (tempTuples[i] == null ? 1 : tempTuples[i].size());
                }
                for (int i = 0; i < dup; i++) {
                    offerResult(tuple, projected, result);
                }
            } else {
                KeyValueSchema schema = joinInfo.getJoinedSchema();
                if (!joinInfo.forceProjection()) { // backward compatibility
                    tuple = projector.projectResults(tuple, useNewValueColumnQualifier);
                    projected = true;
                }
                offerResult(tuple, projected, result);
                for (int i = 0; i < count; i++) {
                    boolean earlyEvaluation = joinInfo.earlyEvaluation()[i];
                    JoinType type = joinInfo.getJoinTypes()[i];
                    if (earlyEvaluation && (type == JoinType.Semi || type == JoinType.Anti))
                        continue;
                    int j = resultQueue.size();
                    while (j-- > 0) {
                        Tuple lhs = resultQueue.poll();
                        if (!earlyEvaluation) {
                            ImmutableBytesPtr key = TupleUtil.getConcatenatedValue(lhs, joinInfo.getJoinExpressions()[i]);
                            tempTuples[i] = hashCaches[i].get(key);
                            if (tempTuples[i] == null) {
                                if (type == JoinType.Inner || type == JoinType.Semi) {
                                    continue;
                                } else if (type == JoinType.Anti) {
                                    offerResult(lhs, projected, result);
                                    continue;
                                }
                            }
                        }
                        if (tempTuples[i] == null) {
                            Tuple joined = tempSrcBitSet[i] == ValueBitSet.EMPTY_VALUE_BITSET ?
                                    lhs : mergeProjectedValue(
                                            lhs, schema, tempDestBitSet, null,
                                            joinInfo.getSchemas()[i], tempSrcBitSet[i],
                                            joinInfo.getFieldPositions()[i]);
                            offerResult(joined, projected, result);
                            continue;
                        }
                        for (Tuple t : tempTuples[i]) {
                            Tuple joined = tempSrcBitSet[i] == ValueBitSet.EMPTY_VALUE_BITSET ?
                                    lhs : mergeProjectedValue(
                                            lhs, schema, tempDestBitSet, t,
                                            joinInfo.getSchemas()[i], tempSrcBitSet[i],
                                            joinInfo.getFieldPositions()[i]);
                            offerResult(joined, projected, result);
                        }
                    }
                }
            }
            // apply post-join filter
            Expression postFilter = joinInfo.getPostJoinFilterExpression();
            if (postFilter != null) {
                for (Iterator iter = resultQueue.iterator(); iter.hasNext();) {
                    Tuple t = iter.next();
                    postFilter.reset();
                    ImmutableBytesPtr tempPtr = new ImmutableBytesPtr();
                    try {
                        if (!postFilter.evaluate(t, tempPtr) || tempPtr.getLength() == 0) {
                            iter.remove();
                            continue;
                        }
                    } catch (IllegalDataException e) {
                        iter.remove();
                        continue;
                    }
                    Boolean b = (Boolean)postFilter.getDataType().toObject(tempPtr);
                    if (!Boolean.TRUE.equals(b)) {
                        iter.remove();
                    }
                }
            }
        }
    }

    private boolean shouldAdvance() {
        if (!resultQueue.isEmpty())
            return false;

        return hasMore;
    }

    private boolean nextInQueue(List results) {
        if (resultQueue.isEmpty()) {
            return false;
        }

        Tuple tuple = resultQueue.poll();
        for (int i = 0; i < tuple.size(); i++) {
            results.add(tuple.getValue(i));
        }
        return (count++ < limit) && (resultQueue.isEmpty() ? hasMore : true);
    }

    @Override
    public long getMvccReadPoint() {
        return scanner.getMvccReadPoint();
    }

    @Override
    public RegionInfo getRegionInfo() {
        return scanner.getRegionInfo();
    }

    @Override
    public boolean isFilterDone() throws IOException {
        return scanner.isFilterDone() && resultQueue.isEmpty();
    }

    @Override
    public boolean nextRaw(List result) throws IOException {
        try {
            long startTime = EnvironmentEdgeManager.currentTimeMillis();
            while (shouldAdvance()) {
                hasMore = scanner.nextRaw(result);
                if (isDummy(result)) {
                    return true;
                }
                if (result.isEmpty()) {
                    return hasMore;
                }
                Cell cell = result.get(0);
                processResults(result, false);
                if (EnvironmentEdgeManager.currentTimeMillis() - startTime >= pageSizeMs) {
                    byte[] rowKey = CellUtil.cloneRow(cell);
                    result.clear();
                    getDummyResult(rowKey, result);
                    return true;
                }
                result.clear();
            }

            return nextInQueue(result);
        } catch (Throwable t) {
            ClientUtil.throwIOException(env.getRegion().getRegionInfo().getRegionNameAsString(), t);
            return false; // impossible
        }
    }

    @Override
    public boolean nextRaw(List result, ScannerContext scannerContext)
            throws IOException {
        throw new IOException("Next with scannerContext should not be called in Phoenix environment");
    }

    @Override
    public boolean reseek(byte[] row) throws IOException {
        return scanner.reseek(row);
    }

    @Override
    public void close() throws IOException {
        scanner.close();
    }

    @Override
    public boolean next(List result) throws IOException {
        throw new IOException("Next should not be used in HashJoin scanner");
    }

    @Override
    public boolean next(List result, ScannerContext scannerContext) throws IOException {
        throw new IOException("Next with scannerContext should not be called in Phoenix environment");
    }

    @Override
    public long getMaxResultSize() {
        return this.scanner.getMaxResultSize();
    }

    @Override
    public int getBatch() {
        return this.scanner.getBatch();
    }

    // PHOENIX-4791 Propagate array element cell through hash join
    private void offerResult(Tuple tuple, boolean projected, List result) {
        if (!projected || !addArrayCell) {
            resultQueue.offer(tuple);
            return;
        }

        Cell projectedCell = tuple.getValue(0);
        int arrayCellPosition = RegionScannerFactory.getArrayCellPosition(result);
        Cell arrayCell = result.get(arrayCellPosition);

        List cells = new ArrayList(2);
        cells.add(projectedCell);
        cells.add(arrayCell);
        MultiKeyValueTuple multi = new MultiKeyValueTuple(cells);
        resultQueue.offer(multi);
    }

    // PHOENIX-4917 Merge array element cell through hash join.
    // Merge into first cell, then reattach array cell.
    private Tuple mergeProjectedValue(
        Tuple dest, KeyValueSchema destSchema, ValueBitSet destBitSet, Tuple src,
        KeyValueSchema srcSchema, ValueBitSet srcBitSet, int offset)
        throws IOException {

        if (dest instanceof ProjectedValueTuple) {
            return TupleProjector.mergeProjectedValue(
                (ProjectedValueTuple) dest, destBitSet, src,
                srcBitSet, offset, useNewValueColumnQualifier);
        }

        ProjectedValueTuple first = projector.projectResults(
            new SingleKeyValueTuple(dest.getValue(0)));
        ProjectedValueTuple merged = TupleProjector.mergeProjectedValue(
            first, destBitSet, src,
            srcBitSet, offset, useNewValueColumnQualifier);

        int size = dest.size();
        if (size == 1) {
            return merged;
        }

        List cells = new ArrayList(size);
        cells.add(merged.getValue(0));
        for (int i = 1; i < size; i++) {
            cells.add(dest.getValue(i));
        }
        MultiKeyValueTuple multi = new MultiKeyValueTuple(cells);
        return multi;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy