org.apache.cassandra.db.tries.CollectionMergeTrie Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.db.tries;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import com.google.common.collect.Iterables;

/**
 * A merged view of multiple tries.
 *
 * This is accomplished by walking the cursors in parallel; the merged cursor takes the position and features of the
 * smallest and advances with it; when multiple cursors are equal, all of them are advanced. The ordered view of the
 * cursors is maintained using a custom binary min-heap, built for efficiently reforming the heap when the top elements
 * are advanced (see {@link CollectionMergeCursor}).
 *
 * Crucial for the efficiency of this is the fact that when they are advanced like this, we can compare cursors'
 * positions by their depth descending and then incomingTransition ascending.
 *
 * See Trie.md for further details.
 */
class CollectionMergeTrie extends Trie
{
    private final CollectionMergeResolver resolver;  // only called on more than one input
    protected final Collection> inputs;

    CollectionMergeTrie(Collection> inputs, CollectionMergeResolver resolver)
    {
        this.resolver = resolver;
        this.inputs = inputs;
    }

    @Override
    protected Cursor cursor()
    {
        return new CollectionMergeCursor<>(resolver, inputs);
    }

    /**
     * Compare the positions of two cursors. One is before the other when
     * - its depth is greater, or
     * - its depth is equal, and the incoming transition is smaller.
     */
    static  boolean greaterCursor(Cursor c1, Cursor c2)
    {
        int c1depth = c1.depth();
        int c2depth = c2.depth();
        if (c1depth != c2depth)
            return c1depth < c2depth;
        return c1.incomingTransition() > c2.incomingTransition();
    }

    static  boolean equalCursor(Cursor c1, Cursor c2)
    {
        return c1.depth() == c2.depth() && c1.incomingTransition() == c2.incomingTransition();
    }

    /*
     * The merge cursor is a variation of the idea of a merge iterator with one key observation: because we advance
     * the source iterators together, we can compare them just by depth and incoming transition.
     *
     * The most straightforward way to implement merging of iterators is to use a {@code PriorityQueue},
     * {@code poll} it to find the next item to consume, then {@code add} the iterator back after advancing.
     * This is not very efficient as {@code poll} and {@code add} in all cases require at least
     * {@code log(size)} comparisons and swaps (usually more than {@code 2*log(size)}) per consumed item, even
     * if the input is suitable for fast iteration.
     *
     * The implementation below makes use of the fact that replacing the top element in a binary heap can be
     * done much more efficiently than separately removing it and placing it back, especially in the cases where
     * the top iterator is to be used again very soon (e.g. when there are large sections of the output where
     * only a limited number of input iterators overlap, which is normally the case in many practically useful
     * situations, e.g. levelled compaction).
     *
     * The implementation builds and maintains a binary heap of sources (stored in an array), where we do not
     * add items after the initial construction. Instead we advance the smallest element (which is at the top
     * of the heap) and push it down to find its place for its new position. Should this source be exhausted,
     * we swap it with the last source in the heap and proceed by pushing that down in the heap.
     *
     * In the case where we have multiple sources with matching positions, the merging algorithm
     * must be able to merge all equal values. To achieve this {@code content} walks the heap to
     * find all equal cursors without advancing them, and separately {@code advance} advances
     * all equal sources and restores the heap structure.
     *
     * The latter is done equivalently to the process of initial construction of a min-heap using back-to-front
     * heapification as done in the classic heapsort algorithm. It only needs to heapify subheaps whose top item
     * is advanced (i.e. one whose position matches the current), and we can do that recursively from
     * bottom to top. Should a source be exhausted when advancing, it can be thrown away by swapping in the last
     * source in the heap (note: we must be careful to advance that source too if required).
     *
     * To make it easier to advance efficienty in single-sourced branches of tries, we extract the current smallest
     * cursor (the head) separately, and start any advance with comparing that to the heap's first. When the smallest
     * cursor remains the same (e.g. in branches coming from a single source) this makes it possible to advance with
     * just one comparison instead of two at the expense of increasing the number by one in the general case.
     *
     * Note: This is a simplification of the MergeIterator code from CASSANDRA-8915, without the leading ordered
     * section and equalParent flag since comparisons of cursor positions are cheap.
     */
    static class CollectionMergeCursor implements Cursor
    {
        private final CollectionMergeResolver resolver;

        /**
         * The smallest cursor, tracked separately to improve performance in single-source sections of the trie.
         */
        private Cursor head;

        /**
         * Binary heap of the remaining cursors. The smallest element is at position 0.
         * Every element i is smaller than or equal to its two children, i.e.
         *     heap[i] <= heap[i*2 + 1] && heap[i] <= heap[i*2 + 2]
         */
        private final Cursor[] heap;

        /**
         * A list used to collect contents during content() calls.
         */
        private final List contents;

        public CollectionMergeCursor(CollectionMergeResolver resolver, Collection> inputs)
        {
            this.resolver = resolver;
            int count = inputs.size();
            // Get cursors for all inputs. Put one of them in head and the rest in the heap.
            heap = new Cursor[count - 1];
            contents = new ArrayList<>(count);
            int i = -1;
            for (Trie trie : inputs)
            {
                Cursor cursor = trie.cursor();
                assert cursor.depth() == 0;
                if (i >= 0)
                    heap[i] = cursor;
                else
                    head = cursor;
                ++i;
            }
            // The cursors are all currently positioned on the root and thus in valid heap order.
        }

        /**
         * Interface for internal operations that can be applied to the equal top elements of the heap.
         */
        interface HeapOp
        {
            void apply(CollectionMergeCursor self, Cursor cursor, int index);
        }

        /**
         * Apply a non-interfering operation, i.e. one that does not change the cursor state, to all inputs in the heap
         * that are on equal position to the head.
         * For interfering operations like advancing the cursors, use {@link #advanceEqualAndRestoreHeap(AdvancingHeapOp)}.
         */
        private void applyToEqualOnHeap(HeapOp action)
        {
            applyToEqualElementsInHeap(action, 0);
        }

        /**
         * Interface for internal advancing operations that can be applied to the heap cursors. This interface provides
         * the code to restore the heap structure after advancing the cursors.
         */
        interface AdvancingHeapOp extends HeapOp
        {
            void apply(Cursor cursor);

            default void apply(CollectionMergeCursor self, Cursor cursor, int index)
            {
                // Apply the operation, which should advance the position of the element.
                apply(cursor);

                // This method is called on the back path of the recursion. At this point the heaps at both children are
                // advanced and well-formed.
                // Place current node in its proper position.
                self.heapifyDown(cursor, index);
                // The heap rooted at index is now advanced and well-formed.
            }
        }


        /**
         * Advance the state of all inputs in the heap that are on equal position as the head and restore the heap
         * invariant.
         */
        private void advanceEqualAndRestoreHeap(AdvancingHeapOp action)
        {
            applyToEqualElementsInHeap(action, 0);
        }

        /**
         * Apply an operation to all elements on the heap that are equal to the head. Descends recursively in the heap
         * structure to all equal children and applies the operation on the way back.
         *
         * This operation can be something that does not change the cursor state (see {@link #content}) or an operation
         * that advances the cursor to a new state, wrapped in a {@link AdvancingHeapOp} ({@link #advance} or
         * {@link #skipChildren}). The latter interface takes care of pushing elements down in the heap after advancing
         * and restores the subheap state on return from each level of the recursion.
         */
        private void applyToEqualElementsInHeap(HeapOp action, int index)
        {
            if (index >= heap.length)
                return;
            Cursor item = heap[index];
            if (!equalCursor(item, head))
                return;

            // If the children are at the same position, they also need advancing and their subheap
            // invariant to be restored.
            applyToEqualElementsInHeap(action, index * 2 + 1);
            applyToEqualElementsInHeap(action, index * 2 + 2);

            // Apply the action. This is done on the reverse direction to give the action a chance to form proper
            // subheaps and combine them on processing the parent.
            action.apply(this, item, index);
        }

        /**
         * Push the given state down in the heap from the given index until it finds its proper place among
         * the subheap rooted at that position.
         */
        private void heapifyDown(Cursor item, int index)
        {
            while (true)
            {
                int next = index * 2 + 1;
                if (next >= heap.length)
                    break;
                // Select the smaller of the two children to push down to.
                if (next + 1 < heap.length && greaterCursor(heap[next], heap[next + 1]))
                    ++next;
                // If the child is greater or equal, the invariant has been restored.
                if (!greaterCursor(item, heap[next]))
                    break;
                heap[index] = heap[next];
                index = next;
            }
            heap[index] = item;
        }

        /**
         * Check if the head is greater than the top element in the heap, and if so, swap them and push down the new
         * top until its proper place.
         * @param headDepth the depth of the head cursor (as returned by e.g. advance).
         * @return the new head element's depth
         */
        private int maybeSwapHead(int headDepth)
        {
            int heap0Depth = heap[0].depth();
            if (headDepth > heap0Depth ||
                (headDepth == heap0Depth && head.incomingTransition() <= heap[0].incomingTransition()))
                return headDepth;   // head is still smallest

            // otherwise we need to swap heap and heap[0]
            Cursor newHeap0 = head;
            head = heap[0];
            heapifyDown(newHeap0, 0);
            return heap0Depth;
        }

        @Override
        public int advance()
        {
            advanceEqualAndRestoreHeap(Cursor::advance);
            return maybeSwapHead(head.advance());
        }

        @Override
        public int advanceMultiple(TransitionsReceiver receiver)
        {
            // If the current position is present in just one cursor, we can safely descend multiple levels within
            // its branch as no one of the other tries has content for it.
            if (equalCursor(heap[0], head))
                return advance();   // More than one source at current position, do single-step advance.

            // If there are no children, i.e. the cursor ascends, we have to check if it's become larger than some
            // other candidate.
            return maybeSwapHead(head.advanceMultiple(receiver));
        }

        @Override
        public int skipChildren()
        {
            advanceEqualAndRestoreHeap(Cursor::skipChildren);
            return maybeSwapHead(head.skipChildren());
        }

        @Override
        public int depth()
        {
            return head.depth();
        }

        @Override
        public int incomingTransition()
        {
            return head.incomingTransition();
        }

        @Override
        public T content()
        {
            applyToEqualOnHeap(CollectionMergeCursor::collectContent);
            collectContent(head, -1);

            T toReturn;
            switch (contents.size())
            {
                case 0:
                    toReturn = null;
                    break;
                case 1:
                    toReturn = contents.get(0);
                    break;
                default:
                    toReturn = resolver.resolve(contents);
                    break;
            }
            contents.clear();
            return toReturn;
        }

        private void collectContent(Cursor item, int index)
        {
            T itemContent = item.content();
            if (itemContent != null)
                contents.add(itemContent);
        }
    }

    /**
     * Special instance for sources that are guaranteed distinct. The main difference is that we can form unordered
     * value list by concatenating sources.
     */
    static class Distinct extends CollectionMergeTrie
    {
        Distinct(Collection> inputs)
        {
            super(inputs, throwingResolver());
        }

        @Override
        public Iterable valuesUnordered()
        {
            return Iterables.concat(Iterables.transform(inputs, Trie::valuesUnordered));
        }
    }
}