org.apache.cassandra.utils.btree.BTree Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.cassandra.utils.btree;
import java.util.*;
import java.util.function.Consumer;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterators;
import com.google.common.collect.Ordering;
import org.apache.cassandra.utils.ObjectSizes;
import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.filter;
import static com.google.common.collect.Iterables.transform;
import static java.lang.Math.max;
import static java.lang.Math.min;
public class BTree
{
/**
* Leaf Nodes are a raw array of values: Object[V1, V1, ...,].
*
* Branch Nodes: Object[V1, V2, ..., child[<V1.key], child[<V2.key], ..., child[< Inf], size], where
* each child is another node, i.e., an Object[]. Thus, the value elements in a branch node are the
* first half of the array (minus one). In our implementation, each value must include its own key;
* we access these via Comparator, rather than directly.
*
* So we can quickly distinguish between leaves and branches, we require that leaf nodes are always an odd number
* of elements (padded with a null, if necessary), and branches are always an even number of elements.
*
* BTrees are immutable; updating one returns a new tree that reuses unmodified nodes.
*
* There are no references back to a parent node from its children. (This would make it impossible to re-use
* subtrees when modifying the tree, since the modified tree would need new parent references.)
* Instead, we store these references in a Path as needed when navigating the tree.
*/
// The maximum fan factor used for B-Trees
static final int FAN_SHIFT;
static
{
int fanfactor = 32;
if (System.getProperty("cassandra.btree.fanfactor") != null)
fanfactor = Integer.parseInt(System.getProperty("cassandra.btree.fanfactor"));
int shift = 1;
while (1 << shift < fanfactor)
shift += 1;
FAN_SHIFT = shift;
}
// NB we encode Path indexes as Bytes, so this needs to be less than Byte.MAX_VALUE / 2
static final int FAN_FACTOR = 1 << FAN_SHIFT;
static final int MINIMAL_NODE_SIZE = FAN_FACTOR >> 1;
// An empty BTree Leaf - which is the same as an empty BTree
static final Object[] EMPTY_LEAF = new Object[1];
// An empty BTree branch - used only for internal purposes in Modifier
static final Object[] EMPTY_BRANCH = new Object[] { null, new int[0] };
// direction of iteration
public static enum Dir
{
ASC, DESC;
public Dir invert() { return this == ASC ? DESC : ASC; }
public static Dir asc(boolean asc) { return asc ? ASC : DESC; }
public static Dir desc(boolean desc) { return desc ? DESC : ASC; }
}
public static Object[] empty()
{
return EMPTY_LEAF;
}
public static Object[] singleton(Object value)
{
return new Object[] { value };
}
public static Object[] build(K[] source, int size, UpdateFunction updateF)
{
return buildInternal(source, size, updateF);
}
public static Object[] build(Collection source, UpdateFunction updateF)
{
return buildInternal(source, source.size(), updateF);
}
public static Object[] build(Iterable source, UpdateFunction updateF)
{
return buildInternal(source, -1, updateF);
}
/**
* Creates a BTree containing all of the objects in the provided collection
*
* @param source the items to build the tree with. MUST BE IN STRICTLY ASCENDING ORDER.
* @param size the size of the source iterable
* @return a btree representing the contents of the provided iterable
*/
public static Object[] build(Iterable source, int size, UpdateFunction updateF)
{
if (size < 0)
throw new IllegalArgumentException(Integer.toString(size));
return buildInternal(source, size, updateF);
}
/**
* As build(), except:
* @param size < 0 if size is unknown
*/
private static Object[] buildInternal(Iterable source, int size, UpdateFunction updateF)
{
if ((size >= 0) & (size < FAN_FACTOR))
{
if (size == 0)
return EMPTY_LEAF;
// pad to odd length to match contract that all leaf nodes are odd
V[] values = (V[]) new Object[size | 1];
{
int i = 0;
for (K k : source)
values[i++] = updateF.apply(k);
}
if (updateF != UpdateFunction.noOp())
updateF.allocated(ObjectSizes.sizeOfArray(values));
return values;
}
TreeBuilder builder = TreeBuilder.newInstance();
Object[] btree = builder.build(source, updateF, size);
return btree;
}
/**
* As build(), except:
* @param size < 0 if size is unknown
*/
private static Object[] buildInternal(K[] source, int size, UpdateFunction updateF)
{
if ((size >= 0) & (size < FAN_FACTOR))
{
if (size == 0)
return EMPTY_LEAF;
// pad to odd length to match contract that all leaf nodes are odd
V[] values = (V[]) new Object[size | 1];
{
for (int i = 0; i < size; i++)
values[i] = updateF.apply(source[i]);
}
if (updateF != UpdateFunction.noOp())
updateF.allocated(ObjectSizes.sizeOfArray(values));
return values;
}
TreeBuilder builder = TreeBuilder.newInstance();
Object[] btree = builder.build(source, updateF, size);
return btree;
}
public static Object[] update(Object[] btree,
Comparator comparator,
Collection updateWith,
UpdateFunction updateF)
{
return update(btree, comparator, updateWith, updateWith.size(), updateF);
}
/**
* Returns a new BTree with the provided collection inserting/replacing as necessary any equal items
*
* @param btree the tree to update
* @param comparator the comparator that defines the ordering over the items in the tree
* @param updateWith the items to either insert / update. MUST BE IN STRICTLY ASCENDING ORDER.
* @param updateWithLength then number of elements in updateWith
* @param updateF the update function to apply to any pairs we are swapping, and maybe abort early
* @param
* @return
*/
public static Object[] update(Object[] btree,
Comparator comparator,
Iterable updateWith,
int updateWithLength,
UpdateFunction updateF)
{
if (isEmpty(btree))
return build(updateWith, updateWithLength, updateF);
TreeBuilder builder = TreeBuilder.newInstance();
btree = builder.update(btree, comparator, updateWith, updateF);
return btree;
}
public static Object[] merge(Object[] tree1, Object[] tree2, Comparator super K> comparator, UpdateFunction updateF)
{
if (size(tree1) < size(tree2))
{
Object[] tmp = tree1;
tree1 = tree2;
tree2 = tmp;
}
return update(tree1, comparator, new BTreeSet<>(tree2, comparator), updateF);
}
public static Iterator iterator(Object[] btree)
{
return iterator(btree, Dir.ASC);
}
public static Iterator iterator(Object[] btree, Dir dir)
{
return new BTreeSearchIterator<>(btree, null, dir);
}
public static Iterator iterator(Object[] btree, int lb, int ub, Dir dir)
{
return new BTreeSearchIterator<>(btree, null, dir, lb, ub);
}
public static Iterable iterable(Object[] btree)
{
return iterable(btree, Dir.ASC);
}
public static Iterable iterable(Object[] btree, Dir dir)
{
return () -> iterator(btree, dir);
}
public static Iterable iterable(Object[] btree, int lb, int ub, Dir dir)
{
return () -> iterator(btree, lb, ub, dir);
}
/**
* Returns an Iterator over the entire tree
*
* @param btree the tree to iterate over
* @param dir direction of iteration
* @param
* @return
*/
public static BTreeSearchIterator slice(Object[] btree, Comparator super K> comparator, Dir dir)
{
return new BTreeSearchIterator<>(btree, comparator, dir);
}
/**
* @param btree the tree to iterate over
* @param comparator the comparator that defines the ordering over the items in the tree
* @param start the beginning of the range to return, inclusive (in ascending order)
* @param end the end of the range to return, exclusive (in ascending order)
* @param dir if false, the iterator will start at the last item and move backwards
* @return an Iterator over the defined sub-range of the tree
*/
public static BTreeSearchIterator slice(Object[] btree, Comparator super K> comparator, K start, K end, Dir dir)
{
return slice(btree, comparator, start, true, end, false, dir);
}
/**
* @param btree the tree to iterate over
* @param comparator the comparator that defines the ordering over the items in the tree
* @param start low bound of the range
* @param startInclusive inclusivity of lower bound
* @param end high bound of the range
* @param endInclusive inclusivity of higher bound
* @param dir direction of iteration
* @return an Iterator over the defined sub-range of the tree
*/
public static BTreeSearchIterator slice(Object[] btree, Comparator super K> comparator, K start, boolean startInclusive, K end, boolean endInclusive, Dir dir)
{
int inclusiveLowerBound = max(0,
start == null ? Integer.MIN_VALUE
: startInclusive ? ceilIndex(btree, comparator, start)
: higherIndex(btree, comparator, start));
int inclusiveUpperBound = min(size(btree) - 1,
end == null ? Integer.MAX_VALUE
: endInclusive ? floorIndex(btree, comparator, end)
: lowerIndex(btree, comparator, end));
return new BTreeSearchIterator<>(btree, comparator, dir, inclusiveLowerBound, inclusiveUpperBound);
}
/**
* @return the item in the tree that sorts as equal to the search argument, or null if no such item
*/
public static V find(Object[] node, Comparator super V> comparator, V find)
{
while (true)
{
int keyEnd = getKeyEnd(node);
int i = Arrays.binarySearch((V[]) node, 0, keyEnd, find, comparator);
if (i >= 0)
return (V) node[i];
if (isLeaf(node))
return null;
i = -1 - i;
node = (Object[]) node[keyEnd + i];
}
}
/**
* Modifies the provided btree directly. THIS SHOULD NOT BE USED WITHOUT EXTREME CARE as BTrees are meant to be immutable.
* Finds and replaces the item provided by index in the tree.
*/
public static void replaceInSitu(Object[] tree, int index, V replace)
{
// WARNING: if semantics change, see also InternalCursor.seekTo, which mirrors this implementation
if ((index < 0) | (index >= size(tree)))
throw new IndexOutOfBoundsException(index + " not in range [0.." + size(tree) + ")");
while (!isLeaf(tree))
{
final int[] sizeMap = getSizeMap(tree);
int boundary = Arrays.binarySearch(sizeMap, index);
if (boundary >= 0)
{
// exact match, in this branch node
assert boundary < sizeMap.length - 1;
tree[boundary] = replace;
return;
}
boundary = -1 -boundary;
if (boundary > 0)
{
assert boundary < sizeMap.length;
index -= (1 + sizeMap[boundary - 1]);
}
tree = (Object[]) tree[getChildStart(tree) + boundary];
}
assert index < getLeafKeyEnd(tree);
tree[index] = replace;
}
/**
* Modifies the provided btree directly. THIS SHOULD NOT BE USED WITHOUT EXTREME CARE as BTrees are meant to be immutable.
* Finds and replaces the provided item in the tree. Both should sort as equal to each other (although this is not enforced)
*/
public static void replaceInSitu(Object[] node, Comparator super V> comparator, V find, V replace)
{
while (true)
{
int keyEnd = getKeyEnd(node);
int i = Arrays.binarySearch((V[]) node, 0, keyEnd, find, comparator);
if (i >= 0)
{
assert find == node[i];
node[i] = replace;
return;
}
if (isLeaf(node))
throw new NoSuchElementException();
i = -1 - i;
node = (Object[]) node[keyEnd + i];
}
}
/**
* Honours result semantics of {@link Arrays#binarySearch}, as though it were performed on the tree flattened into an array
* @return index of item in tree, or (-(insertion point) - 1) if not present
*/
public static int findIndex(Object[] node, Comparator super V> comparator, V find)
{
int lb = 0;
while (true)
{
int keyEnd = getKeyEnd(node);
int i = Arrays.binarySearch((V[]) node, 0, keyEnd, find, comparator);
boolean exact = i >= 0;
if (isLeaf(node))
return exact ? lb + i : i - lb;
if (!exact)
i = -1 - i;
int[] sizeMap = getSizeMap(node);
if (exact)
return lb + sizeMap[i];
else if (i > 0)
lb += sizeMap[i - 1] + 1;
node = (Object[]) node[keyEnd + i];
}
}
/**
* @return the value at the index'th position in the tree, in tree order
*/
public static V findByIndex(Object[] tree, int index)
{
// WARNING: if semantics change, see also InternalCursor.seekTo, which mirrors this implementation
if ((index < 0) | (index >= size(tree)))
throw new IndexOutOfBoundsException(index + " not in range [0.." + size(tree) + ")");
Object[] node = tree;
while (true)
{
if (isLeaf(node))
{
int keyEnd = getLeafKeyEnd(node);
assert index < keyEnd;
return (V) node[index];
}
int[] sizeMap = getSizeMap(node);
int boundary = Arrays.binarySearch(sizeMap, index);
if (boundary >= 0)
{
// exact match, in this branch node
assert boundary < sizeMap.length - 1;
return (V) node[boundary];
}
boundary = -1 -boundary;
if (boundary > 0)
{
assert boundary < sizeMap.length;
index -= (1 + sizeMap[boundary - 1]);
}
node = (Object[]) node[getChildStart(node) + boundary];
}
}
/* since we have access to binarySearch semantics within indexOf(), we can use this to implement
* lower/upper/floor/higher very trivially
*
* this implementation is *not* optimal; it requires two logarithmic traversals, although the second is much cheaper
* (having less height, and operating over only primitive arrays), and the clarity is compelling
*/
public static int lowerIndex(Object[] btree, Comparator super V> comparator, V find)
{
int i = findIndex(btree, comparator, find);
if (i < 0)
i = -1 -i;
return i - 1;
}
public static V lower(Object[] btree, Comparator super V> comparator, V find)
{
int i = lowerIndex(btree, comparator, find);
return i >= 0 ? findByIndex(btree, i) : null;
}
public static int floorIndex(Object[] btree, Comparator super V> comparator, V find)
{
int i = findIndex(btree, comparator, find);
if (i < 0)
i = -2 -i;
return i;
}
public static V floor(Object[] btree, Comparator super V> comparator, V find)
{
int i = floorIndex(btree, comparator, find);
return i >= 0 ? findByIndex(btree, i) : null;
}
public static int higherIndex(Object[] btree, Comparator super V> comparator, V find)
{
int i = findIndex(btree, comparator, find);
if (i < 0) i = -1 -i;
else i++;
return i;
}
public static V higher(Object[] btree, Comparator super V> comparator, V find)
{
int i = higherIndex(btree, comparator, find);
return i < size(btree) ? findByIndex(btree, i) : null;
}
public static int ceilIndex(Object[] btree, Comparator super V> comparator, V find)
{
int i = findIndex(btree, comparator, find);
if (i < 0)
i = -1 -i;
return i;
}
public static V ceil(Object[] btree, Comparator super V> comparator, V find)
{
int i = ceilIndex(btree, comparator, find);
return i < size(btree) ? findByIndex(btree, i) : null;
}
// UTILITY METHODS
// get the upper bound we should search in for keys in the node
static int getKeyEnd(Object[] node)
{
if (isLeaf(node))
return getLeafKeyEnd(node);
else
return getBranchKeyEnd(node);
}
// get the last index that is non-null in the leaf node
static int getLeafKeyEnd(Object[] node)
{
int len = node.length;
return node[len - 1] == null ? len - 1 : len;
}
// return the boundary position between keys/children for the branch node
// == number of keys, as they are indexed from zero
static int getBranchKeyEnd(Object[] branchNode)
{
return (branchNode.length / 2) - 1;
}
/**
* @return first index in a branch node containing child nodes
*/
static int getChildStart(Object[] branchNode)
{
return getBranchKeyEnd(branchNode);
}
/**
* @return last index + 1 in a branch node containing child nodes
*/
static int getChildEnd(Object[] branchNode)
{
return branchNode.length - 1;
}
/**
* @return number of children in a branch node
*/
static int getChildCount(Object[] branchNode)
{
return branchNode.length / 2;
}
/**
* @return the size map for the branch node
*/
static int[] getSizeMap(Object[] branchNode)
{
return (int[]) branchNode[getChildEnd(branchNode)];
}
/**
* @return the size map for the branch node
*/
static int lookupSizeMap(Object[] branchNode, int index)
{
return getSizeMap(branchNode)[index];
}
// get the size from the btree's index (fails if not present)
public static int size(Object[] tree)
{
if (isLeaf(tree))
return getLeafKeyEnd(tree);
int length = tree.length;
// length - 1 == getChildEnd == getPositionOfSizeMap
// (length / 2) - 1 == getChildCount - 1 == position of full tree size
// hard code this, as will be used often;
return ((int[]) tree[length - 1])[(length / 2) - 1];
}
public static long sizeOfStructureOnHeap(Object[] tree)
{
long size = ObjectSizes.sizeOfArray(tree);
if (isLeaf(tree))
return size;
for (int i = getChildStart(tree) ; i < getChildEnd(tree) ; i++)
size += sizeOfStructureOnHeap((Object[]) tree[i]);
return size;
}
// returns true if the provided node is a leaf, false if it is a branch
static boolean isLeaf(Object[] node)
{
return (node.length & 1) == 1;
}
public static boolean isEmpty(Object[] tree)
{
return tree == EMPTY_LEAF;
}
public static int depth(Object[] tree)
{
int depth = 1;
while (!isLeaf(tree))
{
depth++;
tree = (Object[]) tree[getKeyEnd(tree)];
}
return depth;
}
/**
* Fill the target array with the contents of the provided subtree, in ascending order, starting at targetOffset
* @param tree source
* @param target array
* @param targetOffset offset in target array
* @return number of items copied (size of tree)
*/
public static int toArray(Object[] tree, Object[] target, int targetOffset)
{
return toArray(tree, 0, size(tree), target, targetOffset);
}
public static int toArray(Object[] tree, int treeStart, int treeEnd, Object[] target, int targetOffset)
{
if (isLeaf(tree))
{
int count = treeEnd - treeStart;
System.arraycopy(tree, treeStart, target, targetOffset, count);
return count;
}
int newTargetOffset = targetOffset;
int childCount = getChildCount(tree);
int childOffset = getChildStart(tree);
for (int i = 0 ; i < childCount ; i++)
{
int childStart = treeIndexOffsetOfChild(tree, i);
int childEnd = treeIndexOfBranchKey(tree, i);
if (childStart <= treeEnd && childEnd >= treeStart)
{
newTargetOffset += toArray((Object[]) tree[childOffset + i], max(0, treeStart - childStart), min(childEnd, treeEnd) - childStart,
target, newTargetOffset);
if (treeStart <= childEnd && treeEnd > childEnd) // this check will always fail for the non-existent key
target[newTargetOffset++] = tree[i];
}
}
return newTargetOffset - targetOffset;
}
// simple class for avoiding duplicate transformation work
private static class FiltrationTracker implements Function
{
final Function super V, ? extends V> wrapped;
int index;
boolean failed;
private FiltrationTracker(Function super V, ? extends V> wrapped)
{
this.wrapped = wrapped;
}
public V apply(V i)
{
V o = wrapped.apply(i);
if (o != null) index++;
else failed = true;
return o;
}
}
/**
* Takes a btree and transforms it using the provided function, filtering out any null results.
* The result of any transformation must sort identically wrt the other results as their originals
*/
public static Object[] transformAndFilter(Object[] btree, Function super V, ? extends V> function)
{
if (isEmpty(btree))
return btree;
// TODO: can be made more efficient
FiltrationTracker wrapped = new FiltrationTracker<>(function);
Object[] result = transformAndFilter(btree, wrapped);
if (!wrapped.failed)
return result;
// take the already transformed bits from the head of the partial result
Iterable head = iterable(result, 0, wrapped.index - 1, Dir.ASC);
// and concatenate with remainder of original tree, with transformation applied
Iterable remainder = iterable(btree, wrapped.index + 1, size(btree) - 1, Dir.ASC);
remainder = filter(transform(remainder, function), (x) -> x != null);
Iterable build = concat(head, remainder);
return buildInternal(build, -1, UpdateFunction.noOp());
}
private static Object[] transformAndFilter(Object[] btree, FiltrationTracker function)
{
Object[] result = btree;
boolean isLeaf = isLeaf(btree);
int childOffset = isLeaf ? Integer.MAX_VALUE : getChildStart(btree);
int limit = isLeaf ? getLeafKeyEnd(btree) : btree.length - 1;
for (int i = 0 ; i < limit ; i++)
{
// we want to visit in iteration order, so we visit our key nodes inbetween our children
int idx = isLeaf ? i : (i / 2) + (i % 2 == 0 ? childOffset : 0);
Object current = btree[idx];
Object updated = idx < childOffset ? function.apply((V) current) : transformAndFilter((Object[]) current, function);
if (updated != current)
{
if (result == btree)
result = btree.clone();
result[idx] = updated;
}
if (function.failed)
return result;
}
return result;
}
public static boolean equals(Object[] a, Object[] b)
{
return size(a) == size(b) && Iterators.elementsEqual(iterator(a), iterator(b));
}
public static int hashCode(Object[] btree)
{
// we can't just delegate to Arrays.deepHashCode(),
// because two equivalent trees may be represented by differently shaped trees
int result = 1;
for (Object v : iterable(btree))
result = 31 * result + Objects.hashCode(v);
return result;
}
/**
* tree index => index of key wrt all items in the tree laid out serially
*
* This version of the method permits requesting out-of-bounds indexes, -1 and size
* @param root to calculate tree index within
* @param keyIndex root-local index of key to calculate tree-index
* @return the number of items preceding the key in the whole tree of root
*/
public static int treeIndexOfKey(Object[] root, int keyIndex)
{
if (isLeaf(root))
return keyIndex;
int[] sizeMap = getSizeMap(root);
if ((keyIndex >= 0) & (keyIndex < sizeMap.length))
return sizeMap[keyIndex];
// we support asking for -1 or size, so that we can easily use this for iterator bounds checking
if (keyIndex < 0)
return -1;
return sizeMap[keyIndex - 1] + 1;
}
/**
* @param keyIndex node-local index of the key to calculate index of
* @return keyIndex; this method is here only for symmetry and clarity
*/
public static int treeIndexOfLeafKey(int keyIndex)
{
return keyIndex;
}
/**
* @param root to calculate tree-index within
* @param keyIndex root-local index of key to calculate tree-index of
* @return the number of items preceding the key in the whole tree of root
*/
public static int treeIndexOfBranchKey(Object[] root, int keyIndex)
{
return lookupSizeMap(root, keyIndex);
}
/**
* @param root to calculate tree-index within
* @param childIndex root-local index of *child* to calculate tree-index of
* @return the number of items preceding the child in the whole tree of root
*/
public static int treeIndexOffsetOfChild(Object[] root, int childIndex)
{
if (childIndex == 0)
return 0;
return 1 + lookupSizeMap(root, childIndex - 1);
}
public static Builder builder(Comparator super V> comparator)
{
return new Builder<>(comparator);
}
public static Builder builder(Comparator super V> comparator, int initialCapacity)
{
return new Builder<>(comparator, initialCapacity);
}
public static class Builder
{
// a user-defined bulk resolution, to be applied manually via resolve()
public static interface Resolver
{
// can return a different output type to input, so long as sort order is maintained
// if a resolver is present, this method will be called for every sequence of equal inputs
// even those with only one item
Object resolve(Object[] array, int lb, int ub);
}
// a user-defined resolver that is applied automatically on encountering two duplicate values
public static interface QuickResolver
{
// can return a different output type to input, so long as sort order is maintained
// if a resolver is present, this method will be called for every sequence of equal inputs
// even those with only one item
V resolve(V a, V b);
}
Comparator super V> comparator;
Object[] values;
int count;
boolean detected = true; // true if we have managed to cheaply ensure sorted (+ filtered, if resolver == null) as we have added
boolean auto = true; // false if the user has promised to enforce the sort order and resolve any duplicates
QuickResolver quickResolver;
protected Builder(Comparator super V> comparator)
{
this(comparator, 16);
}
protected Builder(Comparator super V> comparator, int initialCapacity)
{
if (initialCapacity == 0)
initialCapacity = 16;
this.comparator = comparator;
this.values = new Object[initialCapacity];
}
@VisibleForTesting
public Builder()
{
this.values = new Object[16];
}
private Builder(Builder builder)
{
this.comparator = builder.comparator;
this.values = Arrays.copyOf(builder.values, builder.values.length);
this.count = builder.count;
this.detected = builder.detected;
this.auto = builder.auto;
this.quickResolver = builder.quickResolver;
}
/**
* Creates a copy of this {@code Builder}.
* @return a copy of this {@code Builder}.
*/
public Builder copy()
{
return new Builder<>(this);
}
public Builder setQuickResolver(QuickResolver quickResolver)
{
this.quickResolver = quickResolver;
return this;
}
public void reuse()
{
reuse(comparator);
}
public void reuse(Comparator super V> comparator)
{
this.comparator = comparator;
Arrays.fill(values, null);
count = 0;
detected = true;
}
public Builder auto(boolean auto)
{
this.auto = auto;
return this;
}
public Builder add(V v)
{
if (count == values.length)
values = Arrays.copyOf(values, count * 2);
Object[] values = this.values;
int prevCount = this.count++;
values[prevCount] = v;
if (auto && detected && prevCount > 0)
{
V prev = (V) values[prevCount - 1];
int c = comparator.compare(prev, v);
if (c == 0 && auto)
{
count = prevCount;
if (quickResolver != null)
values[prevCount - 1] = quickResolver.resolve(prev, v);
}
else if (c > 0)
{
detected = false;
}
}
return this;
}
public Builder addAll(Collection add)
{
if (auto && add instanceof SortedSet && equalComparators(comparator, ((SortedSet) add).comparator()))
{
// if we're a SortedSet, permit quick order-preserving addition of items
// if we collect all duplicates, don't bother as merge will necessarily be more expensive than sorting at end
return mergeAll(add, add.size());
}
detected = false;
if (values.length < count + add.size())
values = Arrays.copyOf(values, max(count + add.size(), count * 2));
for (V v : add)
values[count++] = v;
return this;
}
private static boolean equalComparators(Comparator> a, Comparator> b)
{
return a == b || (isNaturalComparator(a) && isNaturalComparator(b));
}
private static boolean isNaturalComparator(Comparator> a)
{
return a == null || a == Comparator.naturalOrder() || a == Ordering.natural();
}
// iter must be in sorted order!
private Builder mergeAll(Iterable add, int addCount)
{
assert auto;
// ensure the existing contents are in order
autoEnforce();
int curCount = count;
// we make room for curCount * 2 + addCount, so that we can copy the current values to the end
// if necessary for continuing the merge, and have the new values directly after the current value range
if (values.length < curCount * 2 + addCount)
values = Arrays.copyOf(values, max(curCount * 2 + addCount, curCount * 3));
if (add instanceof BTreeSet)
{
// use btree set's fast toArray method, to append directly
((BTreeSet) add).toArray(values, curCount);
}
else
{
// consider calling toArray() and System.arraycopy
int i = curCount;
for (V v : add)
values[i++] = v;
}
return mergeAll(addCount);
}
private Builder mergeAll(int addCount)
{
Object[] a = values;
int addOffset = count;
int i = 0, j = addOffset;
int curEnd = addOffset, addEnd = addOffset + addCount;
// save time in cases where we already have a subset, by skipping dir
while (i < curEnd && j < addEnd)
{
V ai = (V) a[i], aj = (V) a[j];
// in some cases, such as Columns, we may have identity supersets, so perform a cheap object-identity check
int c = ai == aj ? 0 : comparator.compare(ai, aj);
if (c > 0)
break;
else if (c == 0)
{
if (quickResolver != null)
a[i] = quickResolver.resolve(ai, aj);
j++;
}
i++;
}
if (j == addEnd)
return this; // already a superset of the new values
// otherwise, copy the remaining existing values to the very end, freeing up space for merge result
int newCount = i;
System.arraycopy(a, i, a, addEnd, count - i);
curEnd = addEnd + (count - i);
i = addEnd;
while (i < curEnd && j < addEnd)
{
V ai = (V) a[i];
V aj = (V) a[j];
// could avoid one comparison if we cared, but would make this ugly
int c = comparator.compare(ai, aj);
if (c == 0)
{
Object newValue = quickResolver == null ? ai : quickResolver.resolve(ai, aj);
a[newCount++] = newValue;
i++;
j++;
}
else
{
a[newCount++] = c < 0 ? a[i++] : a[j++];
}
}
// exhausted one of the inputs; fill in remainder of the other
if (i < curEnd)
{
System.arraycopy(a, i, a, newCount, curEnd - i);
newCount += curEnd - i;
}
else if (j < addEnd)
{
if (j != newCount)
System.arraycopy(a, j, a, newCount, addEnd - j);
newCount += addEnd - j;
}
count = newCount;
return this;
}
public boolean isEmpty()
{
return count == 0;
}
public Builder reverse()
{
assert !auto;
int mid = count / 2;
for (int i = 0 ; i < mid ; i++)
{
Object t = values[i];
values[i] = values[count - (1 + i)];
values[count - (1 + i)] = t;
}
return this;
}
public Builder sort()
{
Arrays.sort((V[]) values, 0, count, comparator);
return this;
}
// automatically enforce sorted+filtered
private void autoEnforce()
{
if (!detected && count > 1)
{
sort();
int prevIdx = 0;
V prev = (V) values[0];
for (int i = 1 ; i < count ; i++)
{
V next = (V) values[i];
if (comparator.compare(prev, next) != 0)
values[++prevIdx] = prev = next;
else if (quickResolver != null)
values[prevIdx] = prev = quickResolver.resolve(prev, next);
}
count = prevIdx + 1;
}
detected = true;
}
public Builder resolve(Resolver resolver)
{
if (count > 0)
{
int c = 0;
int prev = 0;
for (int i = 1 ; i < count ; i++)
{
if (comparator.compare((V) values[i], (V) values[prev]) != 0)
{
values[c++] = resolver.resolve((V[]) values, prev, i);
prev = i;
}
}
values[c++] = resolver.resolve((V[]) values, prev, count);
count = c;
}
return this;
}
public Object[] build()
{
if (auto)
autoEnforce();
return BTree.build(values, count, UpdateFunction.noOp());
}
}
/** simple static wrapper to calls to cmp.compare() which checks if either a or b are Special (i.e. represent an infinity) */
static int compare(Comparator cmp, Object a, Object b)
{
if (a == b)
return 0;
if (a == NEGATIVE_INFINITY | b == POSITIVE_INFINITY)
return -1;
if (b == NEGATIVE_INFINITY | a == POSITIVE_INFINITY)
return 1;
return cmp.compare((V) a, (V) b);
}
static Object POSITIVE_INFINITY = new Object();
static Object NEGATIVE_INFINITY = new Object();
public static boolean isWellFormed(Object[] btree, Comparator extends Object> cmp)
{
return isWellFormed(cmp, btree, true, NEGATIVE_INFINITY, POSITIVE_INFINITY);
}
private static boolean isWellFormed(Comparator> cmp, Object[] node, boolean isRoot, Object min, Object max)
{
if (cmp != null && !isNodeWellFormed(cmp, node, min, max))
return false;
if (isLeaf(node))
{
if (isRoot)
return node.length <= FAN_FACTOR + 1;
return node.length >= FAN_FACTOR / 2 && node.length <= FAN_FACTOR + 1;
}
final int keyCount = getBranchKeyEnd(node);
if ((!isRoot && keyCount < FAN_FACTOR / 2) || keyCount > FAN_FACTOR + 1)
return false;
int type = 0;
int size = -1;
int[] sizeMap = getSizeMap(node);
// compare each child node with the branch element at the head of this node it corresponds with
for (int i = getChildStart(node); i < getChildEnd(node) ; i++)
{
Object[] child = (Object[]) node[i];
size += size(child) + 1;
if (sizeMap[i - getChildStart(node)] != size)
return false;
Object localmax = i < node.length - 2 ? node[i - getChildStart(node)] : max;
if (!isWellFormed(cmp, child, false, min, localmax))
return false;
type |= isLeaf(child) ? 1 : 2;
min = localmax;
}
return type < 3; // either all leaves or all branches but not a mix
}
private static boolean isNodeWellFormed(Comparator> cmp, Object[] node, Object min, Object max)
{
Object previous = min;
int end = getKeyEnd(node);
for (int i = 0; i < end; i++)
{
Object current = node[i];
if (compare(cmp, previous, current) >= 0)
return false;
previous = current;
}
return compare(cmp, previous, max) < 0;
}
/**
* Simple method to walk the btree forwards or reversed and apply a function to each element
*
* Public method
*
*/
public static void apply(Object[] btree, Consumer function, boolean reversed)
{
if (reversed)
applyReverse(btree, function, null);
else
applyForwards(btree, function, null);
}
/**
* Simple method to walk the btree forwards or reversed and apply a function till a stop condition is reached
*
* Public method
*
*/
public static void apply(Object[] btree, Consumer function, Predicate stopCondition, boolean reversed)
{
if (reversed)
applyReverse(btree, function, stopCondition);
else
applyForwards(btree, function, stopCondition);
}
/**
* Simple method to walk the btree forwards and apply a function till a stop condition is reached
*
* Private method
*
* @param btree
* @param function
* @param stopCondition
*/
private static boolean applyForwards(Object[] btree, Consumer function, Predicate stopCondition)
{
boolean isLeaf = isLeaf(btree);
int childOffset = isLeaf ? Integer.MAX_VALUE : getChildStart(btree);
int limit = isLeaf ? getLeafKeyEnd(btree) : btree.length - 1;
for (int i = 0 ; i < limit ; i++)
{
// we want to visit in iteration order, so we visit our key nodes inbetween our children
int idx = isLeaf ? i : (i / 2) + (i % 2 == 0 ? childOffset : 0);
Object current = btree[idx];
if (idx < childOffset)
{
V castedCurrent = (V) current;
if (stopCondition != null && stopCondition.apply(castedCurrent))
return true;
function.accept(castedCurrent);
}
else
{
if (applyForwards((Object[]) current, function, stopCondition))
return true;
}
}
return false;
}
/**
* Simple method to walk the btree in reverse and apply a function till a stop condition is reached
*
* Private method
*
* @param btree
* @param function
* @param stopCondition
*/
private static boolean applyReverse(Object[] btree, Consumer function, Predicate stopCondition)
{
boolean isLeaf = isLeaf(btree);
int childOffset = isLeaf ? 0 : getChildStart(btree);
int limit = isLeaf ? getLeafKeyEnd(btree) : btree.length - 1;
for (int i = limit - 1, visited = 0; i >= 0 ; i--, visited++)
{
int idx = i;
// we want to visit in reverse iteration order, so we visit our children nodes inbetween our keys
if (!isLeaf)
{
int typeOffset = visited / 2;
if (i % 2 == 0)
{
// This is a child branch. Since children are in the second half of the array, we must
// adjust for the key's we've visited along the way
idx += typeOffset;
}
else
{
// This is a key. Since the keys are in the first half of the array and we are iterating
// in reverse we subtract the childOffset and adjust for children we've walked so far
idx = i - childOffset + typeOffset;
}
}
Object current = btree[idx];
if (isLeaf || idx < childOffset)
{
V castedCurrent = (V) current;
if (stopCondition != null && stopCondition.apply(castedCurrent))
return true;
function.accept(castedCurrent);
}
else
{
if (applyReverse((Object[]) current, function, stopCondition))
return true;
}
}
return false;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy