All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.utils.btree.BTree Maven / Gradle / Ivy

Go to download

The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.

There is a newer version: 5.0-rc1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.cassandra.utils.btree;

import java.util.*;
import java.util.function.BiConsumer;
import java.util.function.Consumer;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterators;
import com.google.common.collect.Ordering;

import org.apache.cassandra.utils.BiLongAccumulator;
import org.apache.cassandra.utils.LongAccumulator;
import org.apache.cassandra.utils.ObjectSizes;

import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.filter;
import static com.google.common.collect.Iterables.transform;
import static java.lang.Math.max;
import static java.lang.Math.min;
import static java.util.Comparator.naturalOrder;

public class BTree
{
    /**
     * Leaf Nodes are a raw array of values: Object[V1, V1, ...,].
     *
     * Branch Nodes: Object[V1, V2, ..., child[<V1.key], child[<V2.key], ..., child[< Inf], size], where
     * each child is another node, i.e., an Object[].  Thus, the value elements in a branch node are the
     * first half of the array (minus one).  In our implementation, each value must include its own key;
     * we access these via Comparator, rather than directly. 
     *
     * So we can quickly distinguish between leaves and branches, we require that leaf nodes are always an odd number
     * of elements (padded with a null, if necessary), and branches are always an even number of elements.
     *
     * BTrees are immutable; updating one returns a new tree that reuses unmodified nodes.
     *
     * There are no references back to a parent node from its children.  (This would make it impossible to re-use
     * subtrees when modifying the tree, since the modified tree would need new parent references.)
     * Instead, we store these references in a Path as needed when navigating the tree.
     */

    // The maximum fan factor used for B-Trees
    static final int FAN_SHIFT;

    // The maximun tree size for certain heigth of tree
    static final int[] TREE_SIZE;

    // NB we encode Path indexes as Bytes, so this needs to be less than Byte.MAX_VALUE / 2
    static final int FAN_FACTOR;

    static final int MAX_TREE_SIZE = Integer.MAX_VALUE;

    static
    {
        int fanfactor = Integer.parseInt(System.getProperty("cassandra.btree.fanfactor", "32"));
        assert fanfactor >= 2 : "the minimal btree fanfactor is 2";
        int shift = 1;
        while (1 << shift < fanfactor)
            shift += 1;

        FAN_SHIFT = shift;
        FAN_FACTOR = 1 << FAN_SHIFT;

        // For current FAN_FACTOR, calculate the maximum height of the tree we could build
        int maxHeight = 0;
        for (long maxSize = 0; maxSize < MAX_TREE_SIZE; maxHeight++)
            // each tree node could have (FAN_FACTOR + 1) children,
            // plus current node could have FAN_FACTOR number of values
            maxSize = maxSize * (FAN_FACTOR + 1) + FAN_FACTOR;

        TREE_SIZE = new int[maxHeight];

        TREE_SIZE[0] = FAN_FACTOR;
        for (int i = 1; i < maxHeight - 1; i++)
            TREE_SIZE[i] = TREE_SIZE[i - 1] * (FAN_FACTOR + 1) + FAN_FACTOR;

        TREE_SIZE[maxHeight - 1] = MAX_TREE_SIZE;
    }


    static final int MINIMAL_NODE_SIZE = FAN_FACTOR >> 1;

    // An empty BTree Leaf - which is the same as an empty BTree
    static final Object[] EMPTY_LEAF = new Object[1];

    // An empty BTree branch - used only for internal purposes in Modifier
    static final Object[] EMPTY_BRANCH = new Object[] { null, new int[0] };

    // direction of iteration
    public static enum Dir
    {
        ASC, DESC;
        public Dir invert() { return this == ASC ? DESC : ASC; }
        public static Dir asc(boolean asc) { return asc ? ASC : DESC; }
        public static Dir desc(boolean desc) { return desc ? DESC : ASC; }
    }

    /**
     * Enables methods to consume the contents of iterators, collections, or arrays without duplicating code or
     * allocating intermediate objects. Instead of taking an argument that implements an interface, a method takes
     * an opaque object as the input, and a singleton helper object it uses as an intermediary to access it's contents.
     * The purpose of doing things this way is to avoid memory allocations on hot paths.
     */
    private interface IteratingFunction
    {
        /**
         * Returns the next object at the given index. This method  must be called with sequentially increasing index
         * values, starting at 0, and must only be called once per index value. The results of calling this method
         * without following these rules are undefined.
         */
         K nextAt(T input, int idx);
    }

    private static final IteratingFunction ITERATOR_FUNCTION = new IteratingFunction()
    {
        public  K nextAt(Iterator input, int idx)
        {
            return (K) input.next();
        }
    };

    private static final IteratingFunction ARRAY_FUNCTION = new IteratingFunction()
    {
        public  K nextAt(Object[] input, int idx)
        {
            return (K) input[idx];
        }
    };

    public static Object[] empty()
    {
        return EMPTY_LEAF;
    }

    public static Object[] singleton(Object value)
    {
        return new Object[] { value };
    }

    public static  Object[] build(Collection source, UpdateFunction updateF)
    {
        return buildInternal(source.iterator(), ITERATOR_FUNCTION, source.size(), updateF);
    }

    /**
     * Creates a BTree containing all of the objects in the provided collection
     *
     * @param source  the items to build the tree with. MUST BE IN STRICTLY ASCENDING ORDER.
     * @param size    the size of the source iterable
     * @return        a btree representing the contents of the provided iterable
     */
    public static  Object[] build(Iterable source, int size, UpdateFunction updateF)
    {
        if (size < 0)
            throw new IllegalArgumentException(Integer.toString(size));
        return buildInternal(source.iterator(), ITERATOR_FUNCTION, size, updateF);
    }

    public static  Object[] build(Object[] source, int size, UpdateFunction updateF)
    {
        if (size < 0)
            throw new IllegalArgumentException(Integer.toString(size));
        return buildInternal(source, ARRAY_FUNCTION, size, updateF);
    }

    private static  Object[] buildLeaf(S source, IteratingFunction iterFunc, int size, int startIdx, UpdateFunction updateF)
    {
        V[] values = (V[]) new Object[size | 1];

        int idx = startIdx;
        for (int i = 0; i < size; i++)
        {
            K k = iterFunc.nextAt(source, idx);
            values[i] = updateF.apply(k);
            idx++;
        }
        if (updateF != UpdateFunction.noOp())
            updateF.allocated(ObjectSizes.sizeOfArray(values));
        return values;
    }

    private static  Object[] buildInternal(S source, IteratingFunction iterFunc, int size, int level, int startIdx, UpdateFunction updateF)
    {
        assert size > 0;
        assert level >= 0;
        if (level == 0)
            return buildLeaf(source, iterFunc, size, startIdx, updateF);

        // calcuate child num: (size - (childNum - 1)) / maxChildSize <= childNum
        int childNum = size / (TREE_SIZE[level - 1] + 1) + 1;

        V[] values = (V[]) new Object[childNum * 2];
        if (updateF != UpdateFunction.noOp())
            updateF.allocated(ObjectSizes.sizeOfArray(values));

        int[] indexOffsets = new int[childNum];
        int childPos = childNum - 1;

        int index = 0;
        for (int i = 0; i < childNum - 1; i++)
        {
            // Calculate the next childSize by splitting the remaining values to the remaining child nodes.
            // The performance could be improved by pre-compute the childSize (see #9989 comments).
            int childSize = (size - index) / (childNum - i);
            // Build the tree with inorder traversal
            values[childPos + i] = (V) buildInternal(source, iterFunc, childSize, level - 1, startIdx + index, updateF);
            index += childSize;
            indexOffsets[i] = index;

            K k = iterFunc.nextAt(source, startIdx + index);
            values[i] = updateF.apply(k);
            index++;
        }

        values[childPos + childNum - 1] = (V) buildInternal(source, iterFunc, size - index, level - 1, startIdx + index, updateF);
        indexOffsets[childNum - 1] = size;

        values[childPos + childNum] = (V) indexOffsets;

        return values;
    }

    private static  Object[] buildInternal(S source, IteratingFunction iterFunc, int size, UpdateFunction updateF)
    {
        assert size >= 0;
        if (size == 0)
            return EMPTY_LEAF;

        // find out the height of the tree
        int level = 0;
        while (size > TREE_SIZE[level])
            level++;
        return buildInternal(source, iterFunc, size, level, 0, updateF);
    }

    public static  Object[] update(Object[] btree,
                                                                Comparator comparator,
                                                                Collection updateWith,
                                                                UpdateFunction updateF)
    {
        return update(btree, comparator, updateWith, updateWith.size(), updateF);
    }

    /**
     * Returns a new BTree with the provided collection inserting/replacing as necessary any equal items
     *
     * @param btree              the tree to update
     * @param comparator         the comparator that defines the ordering over the items in the tree
     * @param updateWith         the items to either insert / update. MUST BE IN STRICTLY ASCENDING ORDER.
     * @param updateWithLength   then number of elements in updateWith
     * @param updateF            the update function to apply to any pairs we are swapping, and maybe abort early
     * @param 
     * @return
     */
    public static  Object[] update(Object[] btree,
                                                                Comparator comparator,
                                                                Iterable updateWith,
                                                                int updateWithLength,
                                                                UpdateFunction updateF)
    {
        if (isEmpty(btree))
            return build(updateWith, updateWithLength, updateF);


        TreeBuilder builder = TreeBuilder.newInstance();
        btree = builder.update(btree, comparator, updateWith, updateF);
        return btree;
    }

    public static  Object[] merge(Object[] tree1, Object[] tree2, Comparator comparator, UpdateFunction updateF)
    {
        if (size(tree1) < size(tree2))
        {
            Object[] tmp = tree1;
            tree1 = tree2;
            tree2 = tmp;
        }
        return update(tree1, comparator, new BTreeSet<>(tree2, comparator), updateF);
    }

    public static  Iterator iterator(Object[] btree)
    {
        return iterator(btree, Dir.ASC);
    }

    public static  Iterator iterator(Object[] btree, Dir dir)
    {
        return isLeaf(btree) ? new LeafBTreeSearchIterator<>(btree, null, dir)
                             : new FullBTreeSearchIterator<>(btree, null, dir);
    }

    public static  Iterator iterator(Object[] btree, int lb, int ub, Dir dir)
    {
        return isLeaf(btree) ? new LeafBTreeSearchIterator<>(btree, null, dir, lb, ub)
                             : new FullBTreeSearchIterator<>(btree, null, dir, lb, ub);
    }

    public static  Iterable iterable(Object[] btree)
    {
        return iterable(btree, Dir.ASC);
    }

    public static  Iterable iterable(Object[] btree, Dir dir)
    {
        return () -> iterator(btree, dir);
    }

    public static  Iterable iterable(Object[] btree, int lb, int ub, Dir dir)
    {
        return () -> iterator(btree, lb, ub, dir);
    }

    /**
     * Returns an Iterator over the entire tree
     *
     * @param btree  the tree to iterate over
     * @param dir    direction of iteration
     * @param 
     * @return
     */
    public static  BTreeSearchIterator slice(Object[] btree, Comparator comparator, Dir dir)
    {
        return isLeaf(btree) ? new LeafBTreeSearchIterator<>(btree, comparator, dir)
                             : new FullBTreeSearchIterator<>(btree, comparator, dir);
    }

    /**
     * @param btree      the tree to iterate over
     * @param comparator the comparator that defines the ordering over the items in the tree
     * @param start      the beginning of the range to return, inclusive (in ascending order)
     * @param end        the end of the range to return, exclusive (in ascending order)
     * @param dir   if false, the iterator will start at the last item and move backwards
     * @return           an Iterator over the defined sub-range of the tree
     */
    public static  BTreeSearchIterator slice(Object[] btree, Comparator comparator, K start, K end, Dir dir)
    {
        return slice(btree, comparator, start, true, end, false, dir);
    }

    /**
     * @param btree      the tree to iterate over
     * @param comparator the comparator that defines the ordering over the items in the tree
     * @param startIndex      the start index of the range to return, inclusive
     * @param endIndex        the end index of the range to return, inclusive
     * @param dir   if false, the iterator will start at the last item and move backwards
     * @return           an Iterator over the defined sub-range of the tree
     */
    public static  BTreeSearchIterator slice(Object[] btree, Comparator comparator, int startIndex, int endIndex, Dir dir)
    {
        return isLeaf(btree) ? new LeafBTreeSearchIterator<>(btree, comparator, dir, startIndex, endIndex)
                             : new FullBTreeSearchIterator<>(btree, comparator, dir, startIndex, endIndex);
    }

    /**
     * @param btree          the tree to iterate over
     * @param comparator     the comparator that defines the ordering over the items in the tree
     * @param start          low bound of the range
     * @param startInclusive inclusivity of lower bound
     * @param end            high bound of the range
     * @param endInclusive   inclusivity of higher bound
     * @param dir            direction of iteration
     * @return               an Iterator over the defined sub-range of the tree
     */
    public static  BTreeSearchIterator slice(Object[] btree, Comparator comparator, K start, boolean startInclusive, K end, boolean endInclusive, Dir dir)
    {
        int inclusiveLowerBound = max(0,
                                      start == null ? Integer.MIN_VALUE
                                                    : startInclusive ? ceilIndex(btree, comparator, start)
                                                                     : higherIndex(btree, comparator, start));
        int inclusiveUpperBound = min(size(btree) - 1,
                                      end == null ? Integer.MAX_VALUE
                                                  : endInclusive ? floorIndex(btree, comparator, end)
                                                                 : lowerIndex(btree, comparator, end));
        return isLeaf(btree) ? new LeafBTreeSearchIterator<>(btree, comparator, dir, inclusiveLowerBound, inclusiveUpperBound)
                             : new FullBTreeSearchIterator<>(btree, comparator, dir, inclusiveLowerBound, inclusiveUpperBound);
    }

    /**
     * @return the item in the tree that sorts as equal to the search argument, or null if no such item
     */
    public static  V find(Object[] node, Comparator comparator, V find)
    {
        while (true)
        {
            int keyEnd = getKeyEnd(node);
            int i = Arrays.binarySearch((V[]) node, 0, keyEnd, find, comparator);

            if (i >= 0)
                return (V) node[i];

            if (isLeaf(node))
                return null;

            i = -1 - i;
            node = (Object[]) node[keyEnd + i];
        }
    }

    /**
     * Modifies the provided btree directly. THIS SHOULD NOT BE USED WITHOUT EXTREME CARE as BTrees are meant to be immutable.
     * Finds and replaces the item provided by index in the tree.
     */
    public static  void replaceInSitu(Object[] tree, int index, V replace)
    {
        // WARNING: if semantics change, see also InternalCursor.seekTo, which mirrors this implementation
        if ((index < 0) | (index >= size(tree)))
            throw new IndexOutOfBoundsException(index + " not in range [0.." + size(tree) + ")");

        while (!isLeaf(tree))
        {
            final int[] sizeMap = getSizeMap(tree);
            int boundary = Arrays.binarySearch(sizeMap, index);
            if (boundary >= 0)
            {
                // exact match, in this branch node
                assert boundary < sizeMap.length - 1;
                tree[boundary] = replace;
                return;
            }

            boundary = -1 -boundary;
            if (boundary > 0)
            {
                assert boundary < sizeMap.length;
                index -= (1 + sizeMap[boundary - 1]);
            }
            tree = (Object[]) tree[getChildStart(tree) + boundary];
        }
        assert index < getLeafKeyEnd(tree);
        tree[index] = replace;
    }

    /**
     * Modifies the provided btree directly. THIS SHOULD NOT BE USED WITHOUT EXTREME CARE as BTrees are meant to be immutable.
     * Finds and replaces the provided item in the tree. Both should sort as equal to each other (although this is not enforced)
     */
    public static  void replaceInSitu(Object[] node, Comparator comparator, V find, V replace)
    {
        while (true)
        {
            int keyEnd = getKeyEnd(node);
            int i = Arrays.binarySearch((V[]) node, 0, keyEnd, find, comparator);

            if (i >= 0)
            {
                assert find == node[i];
                node[i] = replace;
                return;
            }

            if (isLeaf(node))
                throw new NoSuchElementException();

            i = -1 - i;
            node = (Object[]) node[keyEnd + i];
        }
    }

    /**
     * Honours result semantics of {@link Arrays#binarySearch}, as though it were performed on the tree flattened into an array
     * @return index of item in tree, or (-(insertion point) - 1) if not present
     */
    public static  int findIndex(Object[] node, Comparator comparator, V find)
    {
        int lb = 0;
        while (true)
        {
            int keyEnd = getKeyEnd(node);
            int i = Arrays.binarySearch((V[]) node, 0, keyEnd, find, comparator);
            boolean exact = i >= 0;

            if (isLeaf(node))
                return exact ? lb + i : i - lb;

            if (!exact)
                i = -1 - i;

            int[] sizeMap = getSizeMap(node);
            if (exact)
                return lb + sizeMap[i];
            else if (i > 0)
                lb += sizeMap[i - 1] + 1;

            node = (Object[]) node[keyEnd + i];
        }
    }

    /**
     * @return the value at the index'th position in the tree, in tree order
     */
    public static  V findByIndex(Object[] tree, int index)
    {
        // WARNING: if semantics change, see also InternalCursor.seekTo, which mirrors this implementation
        if ((index < 0) | (index >= size(tree)))
            throw new IndexOutOfBoundsException(index + " not in range [0.." + size(tree) + ")");

        Object[] node = tree;
        while (true)
        {
            if (isLeaf(node))
            {
                int keyEnd = getLeafKeyEnd(node);
                assert index < keyEnd;
                return (V) node[index];
            }

            int[] sizeMap = getSizeMap(node);
            int boundary = Arrays.binarySearch(sizeMap, index);
            if (boundary >= 0)
            {
                // exact match, in this branch node
                assert boundary < sizeMap.length - 1;
                return (V) node[boundary];
            }

            boundary = -1 -boundary;
            if (boundary > 0)
            {
                assert boundary < sizeMap.length;
                index -= (1 + sizeMap[boundary - 1]);
            }
            node = (Object[]) node[getChildStart(node) + boundary];
        }
    }

    /* since we have access to binarySearch semantics within indexOf(), we can use this to implement
     * lower/upper/floor/higher very trivially
     *
     * this implementation is *not* optimal; it requires two logarithmic traversals, although the second is much cheaper
     * (having less height, and operating over only primitive arrays), and the clarity is compelling
     */

    public static  int lowerIndex(Object[] btree, Comparator comparator, V find)
    {
        int i = findIndex(btree, comparator, find);
        if (i < 0)
            i = -1 -i;
        return i - 1;
    }

    public static  V lower(Object[] btree, Comparator comparator, V find)
    {
        int i = lowerIndex(btree, comparator, find);
        return i >= 0 ? findByIndex(btree, i) : null;
    }

    public static  int floorIndex(Object[] btree, Comparator comparator, V find)
    {
        int i = findIndex(btree, comparator, find);
        if (i < 0)
            i = -2 -i;
        return i;
    }

    public static  V floor(Object[] btree, Comparator comparator, V find)
    {
        int i = floorIndex(btree, comparator, find);
        return i >= 0 ? findByIndex(btree, i) : null;
    }

    public static  int higherIndex(Object[] btree, Comparator comparator, V find)
    {
        int i = findIndex(btree, comparator, find);
        if (i < 0) i = -1 -i;
        else i++;
        return i;
    }

    public static  V higher(Object[] btree, Comparator comparator, V find)
    {
        int i = higherIndex(btree, comparator, find);
        return i < size(btree) ? findByIndex(btree, i) : null;
    }

    public static  int ceilIndex(Object[] btree, Comparator comparator, V find)
    {
        int i = findIndex(btree, comparator, find);
        if (i < 0)
            i = -1 -i;
        return i;
    }

    public static  V ceil(Object[] btree, Comparator comparator, V find)
    {
        int i = ceilIndex(btree, comparator, find);
        return i < size(btree) ? findByIndex(btree, i) : null;
    }

    // UTILITY METHODS

    // get the upper bound we should search in for keys in the node
    static int getKeyEnd(Object[] node)
    {
        if (isLeaf(node))
            return getLeafKeyEnd(node);
        else
            return getBranchKeyEnd(node);
    }

    // get the last index that is non-null in the leaf node
    static int getLeafKeyEnd(Object[] node)
    {
        int len = node.length;
        return node[len - 1] == null ? len - 1 : len;
    }

    // return the boundary position between keys/children for the branch node
    // == number of keys, as they are indexed from zero
    static int getBranchKeyEnd(Object[] branchNode)
    {
        return (branchNode.length / 2) - 1;
    }

    /**
     * @return first index in a branch node containing child nodes
     */
    static int getChildStart(Object[] branchNode)
    {
        return getBranchKeyEnd(branchNode);
    }

    /**
     * @return last index + 1 in a branch node containing child nodes
     */
    static int getChildEnd(Object[] branchNode)
    {
        return branchNode.length - 1;
    }

    /**
     * @return number of children in a branch node
     */
    static int getChildCount(Object[] branchNode)
    {
        return branchNode.length / 2;
    }

    /**
     * @return the size map for the branch node
     */
    static int[] getSizeMap(Object[] branchNode)
    {
        return (int[]) branchNode[getChildEnd(branchNode)];
    }

    /**
     * @return the size map for the branch node
     */
    static int lookupSizeMap(Object[] branchNode, int index)
    {
        return getSizeMap(branchNode)[index];
    }

    // get the size from the btree's index (fails if not present)
    public static int size(Object[] tree)
    {
        if (isLeaf(tree))
            return getLeafKeyEnd(tree);
        int length = tree.length;
        // length - 1 == getChildEnd == getPositionOfSizeMap
        // (length / 2) - 1 == getChildCount - 1 == position of full tree size
        // hard code this, as will be used often;
        return ((int[]) tree[length - 1])[(length / 2) - 1];
    }

    public static long sizeOfStructureOnHeap(Object[] tree)
    {
        long size = ObjectSizes.sizeOfArray(tree);
        if (isLeaf(tree))
            return size;
        for (int i = getChildStart(tree) ; i < getChildEnd(tree) ; i++)
            size += sizeOfStructureOnHeap((Object[]) tree[i]);
        return size;
    }

    // returns true if the provided node is a leaf, false if it is a branch
    public static boolean isLeaf(Object[] node)
    {
        return (node.length & 1) == 1;
    }

    public static boolean isEmpty(Object[] tree)
    {
        return tree == EMPTY_LEAF;
    }

    public static int depth(Object[] tree)
    {
        int depth = 1;
        while (!isLeaf(tree))
        {
            depth++;
            tree = (Object[]) tree[getKeyEnd(tree)];
        }
        return depth;
    }

    /**
     * Fill the target array with the contents of the provided subtree, in ascending order, starting at targetOffset
     * @param tree source
     * @param target array
     * @param targetOffset offset in target array
     * @return number of items copied (size of tree)
     */
    public static int toArray(Object[] tree, Object[] target, int targetOffset)
    {
        return toArray(tree, 0, size(tree), target, targetOffset);
    }
    public static int toArray(Object[] tree, int treeStart, int treeEnd, Object[] target, int targetOffset)
    {
        if (isLeaf(tree))
        {
            int count = treeEnd - treeStart;
            System.arraycopy(tree, treeStart, target, targetOffset, count);
            return count;
        }

        int newTargetOffset = targetOffset;
        int childCount = getChildCount(tree);
        int childOffset = getChildStart(tree);
        for (int i = 0 ; i < childCount ; i++)
        {
            int childStart = treeIndexOffsetOfChild(tree, i);
            int childEnd = treeIndexOfBranchKey(tree, i);
            if (childStart <= treeEnd && childEnd >= treeStart)
            {
                newTargetOffset += toArray((Object[]) tree[childOffset + i], max(0, treeStart - childStart), min(childEnd, treeEnd) - childStart,
                                           target, newTargetOffset);
                if (treeStart <= childEnd && treeEnd > childEnd) // this check will always fail for the non-existent key
                    target[newTargetOffset++] = tree[i];
            }
        }
        return newTargetOffset - targetOffset;
    }

    // simple class for avoiding duplicate transformation work
    private static class FiltrationTracker implements Function
    {
        final Function wrapped;
        int index;
        boolean failed;

        private FiltrationTracker(Function wrapped)
        {
            this.wrapped = wrapped;
        }

        public V apply(V i)
        {
            V o = wrapped.apply(i);
            if (o != null) index++;
            else failed = true;
            return o;
        }
    }

    /**
     * Takes a btree and transforms it using the provided function, filtering out any null results.
     * The result of any transformation must sort identically wrt the other results as their originals
     */
    public static  Object[] transformAndFilter(Object[] btree, Function function)
    {
        if (isEmpty(btree))
            return btree;

        // TODO: can be made more efficient
        FiltrationTracker wrapped = new FiltrationTracker<>(function);
        Object[] result = transformAndFilter(btree, wrapped);
        if (!wrapped.failed)
            return result;

        // take the already transformed bits from the head of the partial result
        Iterable head = iterable(result, 0, wrapped.index - 1, Dir.ASC);
        // and concatenate with remainder of original tree, with transformation applied
        Iterable remainder = iterable(btree, wrapped.index + 1, size(btree) - 1, Dir.ASC);
        remainder = filter(transform(remainder, function), (x) -> x != null);
        Iterable build = concat(head, remainder);

        Builder builder = builder((Comparator) naturalOrder());
        builder.auto(false);
        for (V v : build)
            builder.add(v);
        return builder.build();
    }

    private static  Object[] transformAndFilter(Object[] btree, FiltrationTracker function)
    {
        Object[] result = btree;
        boolean isLeaf = isLeaf(btree);
        int childOffset = isLeaf ? Integer.MAX_VALUE : getChildStart(btree);
        int limit = isLeaf ? getLeafKeyEnd(btree) : btree.length - 1;
        for (int i = 0 ; i < limit ; i++)
        {
            // we want to visit in iteration order, so we visit our key nodes inbetween our children
            int idx = isLeaf ? i : (i / 2) + (i % 2 == 0 ? childOffset : 0);
            Object current = btree[idx];
            Object updated = idx < childOffset ? function.apply((V) current) : transformAndFilter((Object[]) current, function);
            if (updated != current)
            {
                if (result == btree)
                    result = btree.clone();
                result[idx] = updated;
            }
            if (function.failed)
                return result;
        }
        return result;
    }

    public static boolean equals(Object[] a, Object[] b)
    {
        return size(a) == size(b) && Iterators.elementsEqual(iterator(a), iterator(b));
    }

    public static int hashCode(Object[] btree)
    {
        // we can't just delegate to Arrays.deepHashCode(),
        // because two equivalent trees may be represented by differently shaped trees
        int result = 1;
        for (Object v : iterable(btree))
            result = 31 * result + Objects.hashCode(v);
        return result;

    }

    /**
     * tree index => index of key wrt all items in the tree laid out serially
     *
     * This version of the method permits requesting out-of-bounds indexes, -1 and size
     * @param root to calculate tree index within
     * @param keyIndex root-local index of key to calculate tree-index
     * @return the number of items preceding the key in the whole tree of root
     */
    public static int treeIndexOfKey(Object[] root, int keyIndex)
    {
        if (isLeaf(root))
            return keyIndex;
        int[] sizeMap = getSizeMap(root);
        if ((keyIndex >= 0) & (keyIndex < sizeMap.length))
            return sizeMap[keyIndex];
        // we support asking for -1 or size, so that we can easily use this for iterator bounds checking
        if (keyIndex < 0)
            return -1;
        return sizeMap[keyIndex - 1] + 1;
    }

    /**
     * @param keyIndex node-local index of the key to calculate index of
     * @return keyIndex; this method is here only for symmetry and clarity
     */
    public static int treeIndexOfLeafKey(int keyIndex)
    {
        return keyIndex;
    }

    /**
     * @param root to calculate tree-index within
     * @param keyIndex root-local index of key to calculate tree-index of
     * @return the number of items preceding the key in the whole tree of root
     */
    public static int treeIndexOfBranchKey(Object[] root, int keyIndex)
    {
        return lookupSizeMap(root, keyIndex);
    }

    /**
     * @param root to calculate tree-index within
     * @param childIndex root-local index of *child* to calculate tree-index of
     * @return the number of items preceding the child in the whole tree of root
     */
    public static int treeIndexOffsetOfChild(Object[] root, int childIndex)
    {
        if (childIndex == 0)
            return 0;
        return 1 + lookupSizeMap(root, childIndex - 1);
    }

    public static  Builder builder(Comparator comparator)
    {
        return new Builder<>(comparator);
    }

    public static  Builder builder(Comparator comparator, int initialCapacity)
    {
        return new Builder<>(comparator, initialCapacity);
    }

    public static class Builder
    {
        // a user-defined bulk resolution, to be applied manually via resolve()
        public static interface Resolver
        {
            // can return a different output type to input, so long as sort order is maintained
            // if a resolver is present, this method will be called for every sequence of equal inputs
            // even those with only one item
            Object resolve(Object[] array, int lb, int ub);
        }

        // a user-defined resolver that is applied automatically on encountering two duplicate values
        public static interface QuickResolver
        {
            // can return a different output type to input, so long as sort order is maintained
            // if a resolver is present, this method will be called for every sequence of equal inputs
            // even those with only one item
            V resolve(V a, V b);
        }

        Comparator comparator;
        Object[] values;
        int count;
        boolean detected = true; // true if we have managed to cheaply ensure sorted (+ filtered, if resolver == null) as we have added
        boolean auto = true; // false if the user has promised to enforce the sort order and resolve any duplicates
        QuickResolver quickResolver;

        protected Builder(Comparator comparator)
        {
            this(comparator, 16);
        }

        protected Builder(Comparator comparator, int initialCapacity)
        {
            if (initialCapacity == 0)
                initialCapacity = 16;
            this.comparator = comparator;
            this.values = new Object[initialCapacity];
        }

        @VisibleForTesting
        public Builder()
        {
            this.values = new Object[16];
        }

        private Builder(Builder builder)
        {
            this.comparator = builder.comparator;
            this.values = Arrays.copyOf(builder.values, builder.values.length);
            this.count = builder.count;
            this.detected = builder.detected;
            this.auto = builder.auto;
            this.quickResolver = builder.quickResolver;
        }

        /**
         * Creates a copy of this {@code Builder}.
         * @return a copy of this {@code Builder}.
         */
        public Builder copy()
        {
            return new Builder<>(this);
        }

        public Builder setQuickResolver(QuickResolver quickResolver)
        {
            this.quickResolver = quickResolver;
            return this;
        }

        public void reuse()
        {
            reuse(comparator);
        }

        public void reuse(Comparator comparator)
        {
            this.comparator = comparator;
            Arrays.fill(values, null);
            count = 0;
            detected = true;
        }

        public Builder auto(boolean auto)
        {
            this.auto = auto;
            return this;
        }

        public Builder add(V v)
        {
            if (count == values.length)
                values = Arrays.copyOf(values, count * 2);

            Object[] values = this.values;
            int prevCount = this.count++;
            values[prevCount] = v;

            if (auto && detected && prevCount > 0)
            {
                V prev = (V) values[prevCount - 1];
                int c = comparator.compare(prev, v);
                if (c == 0 && auto)
                {
                    count = prevCount;
                    if (quickResolver != null)
                        values[prevCount - 1] = quickResolver.resolve(prev, v);
                }
                else if (c > 0)
                {
                    detected = false;
                }
            }

            return this;
        }

        public Builder addAll(Collection add)
        {
            if (auto && add instanceof SortedSet && equalComparators(comparator, ((SortedSet) add).comparator()))
            {
                // if we're a SortedSet, permit quick order-preserving addition of items
                // if we collect all duplicates, don't bother as merge will necessarily be more expensive than sorting at end
                return mergeAll(add, add.size());
            }
            detected = false;
            if (values.length < count + add.size())
                values = Arrays.copyOf(values, max(count + add.size(), count * 2));
            for (V v : add)
                values[count++] = v;
            return this;
        }

        private static boolean equalComparators(Comparator a, Comparator b)
        {
            return a == b || (isNaturalComparator(a) && isNaturalComparator(b));
        }

        private static boolean isNaturalComparator(Comparator a)
        {
            return a == null || a == Comparator.naturalOrder() || a == Ordering.natural();
        }

        // iter must be in sorted order!
        private Builder mergeAll(Iterable add, int addCount)
        {
            assert auto;
            // ensure the existing contents are in order
            autoEnforce();

            int curCount = count;
            // we make room for curCount * 2 + addCount, so that we can copy the current values to the end
            // if necessary for continuing the merge, and have the new values directly after the current value range
            if (values.length < curCount * 2 + addCount)
                values = Arrays.copyOf(values, max(curCount * 2 + addCount, curCount * 3));

            if (add instanceof BTreeSet)
            {
                // use btree set's fast toArray method, to append directly
                ((BTreeSet) add).toArray(values, curCount);
            }
            else
            {
                // consider calling toArray() and System.arraycopy
                int i = curCount;
                for (V v : add)
                    values[i++] = v;
            }
            return mergeAll(addCount);
        }

        private Builder mergeAll(int addCount)
        {
            Object[] a = values;
            int addOffset = count;

            int i = 0, j = addOffset;
            int curEnd = addOffset, addEnd = addOffset + addCount;

            // save time in cases where we already have a subset, by skipping dir
            while (i < curEnd && j < addEnd)
            {
                V ai = (V) a[i], aj = (V) a[j];
                // in some cases, such as Columns, we may have identity supersets, so perform a cheap object-identity check
                int c = ai == aj ? 0 : comparator.compare(ai, aj);
                if (c > 0)
                    break;
                else if (c == 0)
                {
                    if (quickResolver != null)
                        a[i] = quickResolver.resolve(ai, aj);
                    j++;
                }
                i++;
            }

            if (j == addEnd)
                return this; // already a superset of the new values

            // otherwise, copy the remaining existing values to the very end, freeing up space for merge result
            int newCount = i;
            System.arraycopy(a, i, a, addEnd, count - i);
            curEnd = addEnd + (count - i);
            i = addEnd;

            while (i < curEnd && j < addEnd)
            {
                V ai = (V) a[i];
                V aj = (V) a[j];
                // could avoid one comparison if we cared, but would make this ugly
                int c = comparator.compare(ai, aj);
                if (c == 0)
                {
                    Object newValue = quickResolver == null ? ai : quickResolver.resolve(ai, aj);
                    a[newCount++] = newValue;
                    i++;
                    j++;
                }
                else
                {
                    a[newCount++] =  c < 0 ? a[i++] : a[j++];
                }
            }

            // exhausted one of the inputs; fill in remainder of the other
            if (i < curEnd)
            {
                System.arraycopy(a, i, a, newCount, curEnd - i);
                newCount += curEnd - i;
            }
            else if (j < addEnd)
            {
                if (j != newCount)
                    System.arraycopy(a, j, a, newCount, addEnd - j);
                newCount += addEnd - j;
            }
            count = newCount;
            return this;
        }

        public boolean isEmpty()
        {
            return count == 0;
        }

        public Builder reverse()
        {
            assert !auto;
            int mid = count / 2;
            for (int i = 0 ; i < mid ; i++)
            {
                Object t = values[i];
                values[i] = values[count - (1 + i)];
                values[count - (1 + i)] = t;
            }
            return this;
        }

        public Builder sort()
        {
            Arrays.sort((V[]) values, 0, count, comparator);
            return this;
        }

        // automatically enforce sorted+filtered
        private void autoEnforce()
        {
            if (!detected && count > 1)
            {
                sort();
                int prevIdx = 0;
                V prev = (V) values[0];
                for (int i = 1 ; i < count ; i++)
                {
                    V next = (V) values[i];
                    if (comparator.compare(prev, next) != 0)
                        values[++prevIdx] = prev = next;
                    else if (quickResolver != null)
                        values[prevIdx] = prev = quickResolver.resolve(prev, next);
                }
                count = prevIdx + 1;
            }
            detected = true;
        }

        public Builder resolve(Resolver resolver)
        {
            if (count > 0)
            {
                int c = 0;
                int prev = 0;
                for (int i = 1 ; i < count ; i++)
                {
                    if (comparator.compare((V) values[i], (V) values[prev]) != 0)
                    {
                        values[c++] = resolver.resolve((V[]) values, prev, i);
                        prev = i;
                    }
                }
                values[c++] = resolver.resolve((V[]) values, prev, count);
                count = c;
            }
            return this;
        }

        public Object[] build()
        {
            if (auto)
                autoEnforce();
            return BTree.build(values, count, UpdateFunction.noOp());
        }
    }

    /** simple static wrapper to calls to cmp.compare() which checks if either a or b are Special (i.e. represent an infinity) */
    static  int compare(Comparator cmp, Object a, Object b)
    {
        if (a == b)
            return 0;
        if (a == NEGATIVE_INFINITY | b == POSITIVE_INFINITY)
            return -1;
        if (b == NEGATIVE_INFINITY | a == POSITIVE_INFINITY)
            return 1;
        return cmp.compare((V) a, (V) b);
    }

    static Object POSITIVE_INFINITY = new Object();
    static Object NEGATIVE_INFINITY = new Object();

    public static boolean isWellFormed(Object[] btree, Comparator cmp)
    {
        return isWellFormed(cmp, btree, true, NEGATIVE_INFINITY, POSITIVE_INFINITY);
    }

    private static boolean isWellFormed(Comparator cmp, Object[] node, boolean isRoot, Object min, Object max)
    {
        if (cmp != null && !isNodeWellFormed(cmp, node, min, max))
            return false;

        if (isLeaf(node))
        {
            if (isRoot)
                return node.length <= FAN_FACTOR + 1;
            return node.length >= FAN_FACTOR / 2 && node.length <= FAN_FACTOR + 1;
        }

        final int keyCount = getBranchKeyEnd(node);
        if ((!isRoot && keyCount < FAN_FACTOR / 2) || keyCount > FAN_FACTOR + 1)
            return false;

        int type = 0;
        int size = -1;
        int[] sizeMap = getSizeMap(node);
        // compare each child node with the branch element at the head of this node it corresponds with
        for (int i = getChildStart(node); i < getChildEnd(node) ; i++)
        {
            Object[] child = (Object[]) node[i];
            size += size(child) + 1;
            if (sizeMap[i - getChildStart(node)] != size)
                return false;
            Object localmax = i < node.length - 2 ? node[i - getChildStart(node)] : max;
            if (!isWellFormed(cmp, child, false, min, localmax))
                return false;
            type |= isLeaf(child) ? 1 : 2;
            min = localmax;
        }
        return type < 3; // either all leaves or all branches but not a mix
    }

    private static boolean isNodeWellFormed(Comparator cmp, Object[] node, Object min, Object max)
    {
        Object previous = min;
        int end = getKeyEnd(node);
        for (int i = 0; i < end; i++)
        {
            Object current = node[i];
            if (compare(cmp, previous, current) >= 0)
                return false;
            previous = current;
        }
        return compare(cmp, previous, max) < 0;
    }

    private static  void applyValue(V value, BiConsumer function, A argument)
    {
        function.accept(argument, value);
    }

    public static  void applyLeaf(Object[] btree, BiConsumer function, A argument)
    {
        Preconditions.checkArgument(isLeaf(btree));
        int limit = getLeafKeyEnd(btree);
        for (int i=0; i void apply(Object[] btree, BiConsumer function, A argument)
    {
        if (isLeaf(btree))
        {
            applyLeaf(btree, function, argument);
            return;
        }

        int childOffset = getChildStart(btree);
        int limit = btree.length - 1 - childOffset;
        for (int i = 0 ; i < limit ; i++)
        {

            apply((Object[]) btree[childOffset + i], function, argument);

            if (i < childOffset)
                applyValue((V) btree[i], function, argument);
        }
    }

    /**
     * Simple method to walk the btree forwards and apply a function till a stop condition is reached
     *
     * Private method
     *
     * @param btree
     * @param function
     */
    public static  void apply(Object[] btree, Consumer function)
    {
        BTree.>apply(btree, Consumer::accept, function);
    }

    private static  int find(Object[] btree, V from, Comparator comparator)
    {
        // find the start index in iteration order
        Preconditions.checkNotNull(comparator);
        int keyEnd = getKeyEnd(btree);
        return Arrays.binarySearch((V[]) btree, 0, keyEnd, from, comparator);
    }

    private static boolean isStopSentinel(long v)
    {
        return v == Long.MAX_VALUE;
    }

    private static  long accumulateLeaf(Object[] btree, BiLongAccumulator accumulator, A arg, Comparator comparator, V from, long initialValue)
    {
        Preconditions.checkArgument(isLeaf(btree));
        long value = initialValue;
        int limit = getLeafKeyEnd(btree);

        int startIdx = 0;
        if (from != null)
        {
            int i = find(btree, from, comparator);
            boolean isExact = i >= 0;
            startIdx = isExact ? i : (-1 - i);
        }

        for (int i = startIdx; i < limit; i++)
        {
            value = accumulator.apply(arg, (V) btree[i], value);

            if (isStopSentinel(value))
                break;
        }
        return value;
    }

    /**
     * Walk the btree and accumulate a long value using the supplied accumulator function. Iteration will stop if the
     * accumulator function returns the sentinel values Long.MIN_VALUE or Long.MAX_VALUE
     *
     * If the optional from argument is not null, iteration will start from that value (or the one after it's insertion
     * point if an exact match isn't found)
     */
    public static  long accumulate(Object[] btree, BiLongAccumulator accumulator, A arg, Comparator comparator, V from, long initialValue)
    {
        if (isLeaf(btree))
            return accumulateLeaf(btree, accumulator, arg, comparator, from, initialValue);

        long value = initialValue;
        int childOffset = getChildStart(btree);

        int startChild = 0;
        if (from != null)
        {
            int i = find(btree, from, comparator);
            boolean isExact = i >= 0;

            startChild = isExact ? i + 1 : -1 - i;

            if (isExact)
            {
                value = accumulator.apply(arg, (V) btree[i], value);
                if (isStopSentinel(value))
                    return value;
                from = null;
            }
        }

        int limit = btree.length - 1 - childOffset;
        for (int i=startChild; i long accumulate(Object[] btree, LongAccumulator accumulator, Comparator comparator, V from, long initialValue)
    {
        return accumulate(btree, LongAccumulator::apply, accumulator, comparator, from, initialValue);
    }

    public static  long accumulate(Object[] btree, LongAccumulator accumulator, long initialValue)
    {
        return accumulate(btree, accumulator, null, null, initialValue);
    }

    public static  long accumulate(Object[] btree, BiLongAccumulator accumulator, A arg, long initialValue)
    {
        return accumulate(btree, accumulator, arg, null, null, initialValue);
    }
}