All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.modeshape.jcr.query.engine.process.HashJoinSequence Maven / Gradle / Ivy

There is a newer version: 5.4.1.Final
Show newest version
/*
 * ModeShape (http://www.modeshape.org)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.modeshape.jcr.query.engine.process;

import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.mapdb.Serializer;
import org.modeshape.common.annotation.NotThreadSafe;
import org.modeshape.common.collection.MultiIterator;
import org.modeshape.common.collection.SequentialIterator;
import org.modeshape.jcr.cache.CachedNode;
import org.modeshape.jcr.cache.CachedNodeSupplier;
import org.modeshape.jcr.query.BufferManager;
import org.modeshape.jcr.query.BufferManager.DistinctBuffer;
import org.modeshape.jcr.query.NodeSequence;
import org.modeshape.jcr.query.RowExtractors.ExtractFromRow;
import org.modeshape.jcr.query.engine.process.BufferedRows.BufferedRow;
import org.modeshape.jcr.query.model.JoinType;
import org.modeshape.jcr.query.model.TypeSystem.TypeFactory;

/**
 * A {@link NodeSequence} implementation that performs an equijoin of two delegate sequences. The hash-join algorithm loads all
 * values on the right side into a buffer that hashes the right join condition value of each row. Then, it iterates through all
 * tuples on the left side and finds which of the values on the right have a matching join condition value.
 *
 * @author Randall Hauch ([email protected])
 */
@NotThreadSafe
public class HashJoinSequence extends JoinSequence {

    protected final DistinctBuffer rightMatchedRowKeys;
    protected final DistinctBuffer rightRowsWithNullKey;
    protected final RangeProducer rangeProducer;

    @SuppressWarnings( "unchecked" )
    public HashJoinSequence( String workspaceName,
                             NodeSequence left,
                             NodeSequence right,
                             ExtractFromRow leftExtractor,
                             ExtractFromRow rightExtractor,
                             JoinType joinType,
                             BufferManager bufferMgr,
                             CachedNodeSupplier nodeCache,
                             RangeProducer rangeProducer,
                             boolean pack,
                             boolean useHeap ) {
        super(workspaceName, left, right, leftExtractor, rightExtractor, joinType, bufferMgr, nodeCache, pack, useHeap, true);
        this.rangeProducer = (RangeProducer)rangeProducer;
        if (useNonMatchingRightRows()) {
            TypeFactory keyType = rightExtractor.getType();
            Serializer keySerializer = bufferMgr.serializerFor(keyType);
            rightMatchedRowKeys = (DistinctBuffer)bufferMgr.createDistinctBuffer(keySerializer).keepSize(true)
                                                                   .useHeap(useHeap).make();
            Serializer rowSerializer = (Serializer)BufferedRows.serializer(nodeCache, width);
            rightRowsWithNullKey = bufferMgr.createDistinctBuffer(rowSerializer).keepSize(true).useHeap(useHeap).make();
        } else {
            rightMatchedRowKeys = null;
            rightRowsWithNullKey = null;
        }
    }

    @Override
    protected BatchFactory initialize() {
        // Load all of the right sequence into the buffer ...
        int firstBatchSize = loadAll(delegate, extractor, rightRowsWithNullKey);
        if (firstBatchSize == 0) {
            // No rows were found on the right, so see if we need to return any nodes ...
            switch (joinType) {
                case CROSS:
                case RIGHT_OUTER:
                    // Nothing on the right, so return no rows ...
                    return new EmptyBatchFactory();
                case FULL_OUTER:
                case LEFT_OUTER:
                    // Nothing on the right but something on the left, so ...
                    return new LeftOnlyBatchFactory();
                case INNER:
                    // Nothing on the right, that means no left rows will match ...
                    return new EmptyBatchFactory();
            }
        }
        // Otherwise, there are rows on the left and the right ...
        switch (joinType) {
            case CROSS:
                // We use all of the rows on the right for every row on the left; logic is a little different ...
                return new HashCrossJoinBatchFactory();
            case RIGHT_OUTER:
            case FULL_OUTER:
            case LEFT_OUTER:
            case INNER:
            default:
                // We always try to match left and right rows, but possibly also include unmatched right rows ...
                return rangeProducer != null ? new HashJoinRangeBatchFactory() : new HashJoinBatchFactory();
        }
    }

    protected Iterator allRightRows() {
        if (rightRowsWithNullKey != null) {
            return SequentialIterator.create(rightRowsWithNullKey.iterator(), buffer.ascending());
        }
        return buffer.ascending();
    }

    @Override
    public void close() {
        try {
            super.close();
        } finally {
            try {
                if (rightMatchedRowKeys != null) {
                    rightMatchedRowKeys.close();
                }
            } finally {
                if (rightRowsWithNullKey != null) {
                    rightRowsWithNullKey.close();
                }
            }
        }
    }

    @Override
    public String toString() {
        return "(hash-join width=" + width() + " " + joinType + " left=" + left + ", right=" + delegate + ", on " + leftExtractor
               + "=" + extractor + " )";
    }

    protected class HashJoinBatchFactory implements BatchFactory {
        private Iterator rightRows;

        @Override
        public Batch nextBatch() {
            Batch leftBatch = findNextNonEmptyLeftBatch();
            if (leftBatch != null) {
                currentLeft = null; // reset ...
                return createBatch(leftBatch);
            }
            // Otherwise, we're done with the left side ...
            if (rightMatchedRowKeys == null) {
                // We never need to return any unused/unmatched rows from the right, so we're done ...
                return null;
            }
            if (rightRows == null) {
                // This is the first batch with the unused right rows, so get the iterator ...
                rightRows = allRightRows();
            }
            if (!rightRows.hasNext()) return null; // we're done!
            return new RightRowsBatch(rightRows, 100);
        }

        protected Batch createBatch( Batch leftBatch ) {
            return new HashJoinBatch(leftBatch);
        }
    }

    /**
     * A batch that contains rows that will have a left value and a right value.
     *
     * @author Randall Hauch ([email protected])
     */
    protected class HashJoinBatch implements Batch {
        private final Batch currentLeft;
        private Iterator rightMatchingRows;
        private BufferedRow currentRight;

        protected HashJoinBatch( Batch currentLeft ) {
            this.currentLeft = currentLeft;
            assert this.currentLeft != null;
        }

        @Override
        public int width() {
            return totalWidth;
        }

        @Override
        public String getWorkspaceName() {
            return workspaceName;
        }

        @Override
        public boolean isEmpty() {
            return false;
        }

        @Override
        public long rowCount() {
            return -1; // don't really know how many ...
        }

        @Override
        public boolean hasNext() {
            if (rightMatchingRows != null && rightMatchingRows.hasNext()) {
                // There are more rows on the right that match the current left row ...
                return true;
            }
            if (!currentLeft.hasNext()) {
                // No more left rows in this batch ...
                return false;
            }
            // Advanced to the next left row and find the matching rows on the right ...
            while (currentLeft.hasNext()) {
                currentLeft.nextRow();
                Object matchingValue = leftExtractor.getValueInRow(currentLeft);
                rightMatchingRows = getAllRightRowsFor(matchingValue);
                if (rightMatchingRows != null && rightMatchingRows.hasNext()) {
                    // Found a match which will be recorded when we go through the right matching rows...
                    return true;
                }
                // Did not find any matching rows on the right ...
                if (useAllLeftRowsWhenNoMatchingRightRows()) {
                    // We still have to include the left row ...
                    rightMatchingRows = null;
                    return true;
                }
                // Otherwise, we don't include the left row without matching rows on the right, so find the next left row ...
            }

            // No more left rows ...
            return false;
        }

        private Iterator getAllRightRowsFor( Object leftValue ) {
            if (leftValue instanceof Object[]) {
                // There are multiple left-hand values, so we have to look for each one ...
                List> iterators = new LinkedList<>();
                for (Object left : (Object[])leftValue) {
                    Iterator matching = getRightRowsFor(left);
                    if (matching != null && matching.hasNext()) {
                        iterators.add(matching);
                    }
                }
                if (iterators.isEmpty()) return null;
                if (iterators.size() == 1) return iterators.get(0);
                return MultiIterator.fromIterators(iterators);
            }
            // This is just a single value or even null
            return getRightRowsFor(leftValue);
        }

        protected Iterator getRightRowsFor( Object leftValue ) {
            return buffer.getAll(leftValue);
        }

        protected void recordRightRowsMatched( Object rightKey ) {
            if (rightMatchedRowKeys != null) {
                // We only record the non-null values, since NULL never matches and they will always be unmatched ...
                // logger.trace("Join found matching rows on right with value {0}", matchingValue);
                rightMatchedRowKeys.addIfAbsent(rightKey);
            }
        }

        @Override
        public void nextRow() {
            // This current presumes that 'hasNext' was called and that either 'rightMatchingRows' is null (because there
            // was no match (e.g., left outer join) or that it is non-null and has at least one value ...
            if (rightMatchingRows != null) {
                currentRight = rightMatchingRows.next();
                // since there might be multiple rows on the right, we need to make sure we record each one
                recordRightRowsMatched(extractor.getValueInRow(currentRight));
            } else {
                currentRight = null;
            }
        }

        @Override
        public CachedNode getNode() {
            return currentLeft.getNode();
        }

        @Override
        public CachedNode getNode( int index ) {
            if (index < leftWidth) {
                return currentLeft.getNode(index);
            }
            if (currentRight == null) return null;
            return currentRight.getNode(index - leftWidth);
        }

        @Override
        public float getScore() {
            return currentLeft.getScore();
        }

        @Override
        public float getScore( int index ) {
            if (index < leftWidth) {
                return currentLeft.getScore(index);
            }
            if (currentRight == null) return 0.0f;
            return currentRight.getScore(index - leftWidth);
        }
    }

    protected class HashJoinRangeBatchFactory extends HashJoinBatchFactory {
        @Override
        protected Batch createBatch( Batch leftBatch ) {
            return new HashJoinRangeBatch(leftBatch);
        }
    }

    /**
     * A batch that contains rows that will have a left value and a right value.
     *
     * @author Randall Hauch ([email protected])
     */
    protected class HashJoinRangeBatch extends HashJoinBatch {
        protected HashJoinRangeBatch( Batch currentLeft ) {
            super(currentLeft);
            assert HashJoinSequence.this.rangeProducer != null;
        }

        @Override
        protected Iterator getRightRowsFor( Object leftValue ) {
            if (leftValue == null) {
                // Nothing on the right ever matches a NULL on the left ...
                return null;
            }
            Range range = HashJoinSequence.this.rangeProducer.getRange(leftValue);
            if (range == null) return null;
            return buffer.getAll(range.lowerBound(), range.isLowerBoundIncluded(), range.upperBound(),
                                 range.isUpperBoundIncluded());
        }
    }

    /**
     * A batch that contains rows that will have no left value and a right value. Every value on the left matches all values on
     * the right, since this is a cross-join.
     *
     * @author Randall Hauch ([email protected])
     */
    protected class HashCrossJoinBatch extends HashJoinBatch {

        protected HashCrossJoinBatch( Batch currentLeft ) {
            super(currentLeft);
        }

        @Override
        protected Iterator getRightRowsFor( Object leftValue ) {
            return allRightRows();
        }

        @Override
        protected void recordRightRowsMatched( Object rightKey ) {
            // do nothing
        }
    }

    protected class HashCrossJoinBatchFactory extends HashJoinBatchFactory {
        @Override
        protected Batch createBatch( Batch leftBatch ) {
            return new HashCrossJoinBatch(leftBatch);
        }
    }

    /**
     * A batch that contains rows that will have no left value and a right value.
     *
     * @author Randall Hauch ([email protected])
     */
    protected class RightRowsBatch implements Batch {
        private final Iterator rightRows;
        private final int maxSize;
        private BufferedRow currentRight;
        private int count = 0;

        protected RightRowsBatch( Iterator rightRows,
                                  int maxSize ) {
            this.rightRows = rightRows;
            this.maxSize = maxSize;
            assert this.rightRows != null;
            assert this.maxSize > 0;
        }

        @Override
        public int width() {
            return totalWidth;
        }

        @Override
        public String getWorkspaceName() {
            return workspaceName;
        }

        @Override
        public boolean isEmpty() {
            return false;
        }

        @Override
        public long rowCount() {
            return -1; // don't really know how many ...
        }

        @Override
        public boolean hasNext() {
            // Find the next buffered row that was not used ...
            while (rightRows.hasNext() && count < maxSize) {
                currentRight = rightRows.next();
                Object key = extractor.getValueInRow(currentRight);
                if (key == null || rightMatchedRowKeys.addIfAbsent(key)) {
                    logger.trace("Join found non-matched rows on right with value {0}", key);
                    ++count;
                    return true;
                }
                logger.trace("Join found matched rows on right with value {0}", key);
            }
            return false;
        }

        @Override
        public void nextRow() {
            // This currently presumes that 'hasNext' was called and that 'currentRight' has a value ...
        }

        @Override
        public CachedNode getNode() {
            // We're only returning right values, so there is never a value at index 0 ...
            return null;
        }

        @Override
        public CachedNode getNode( int index ) {
            if (currentRight != null && index >= leftWidth) {
                return currentRight.getNode(index - leftWidth);
            }
            return null;
        }

        @Override
        public float getScore() {
            // We're only returning right values, so there is never a value at index 0 ...
            return 0.0f;
        }

        @Override
        public float getScore( int index ) {
            if (currentRight != null && index >= leftWidth) {
                return currentRight.getScore(index - leftWidth);
            }
            return 0.0f;
        }
    }
}