com.bigdata.btree.Node Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Nov 15, 2006
*/
package com.bigdata.btree;
import java.io.PrintStream;
import java.lang.ref.Reference;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.concurrent.FutureTask;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.atomic.AtomicReferenceArray;
import org.apache.log4j.Level;
import com.bigdata.BigdataStatics;
import com.bigdata.btree.AbstractBTree.ChildMemoizer;
import com.bigdata.btree.AbstractBTree.LoadChildRequest;
import com.bigdata.btree.data.DefaultNodeCoder;
import com.bigdata.btree.data.INodeData;
import com.bigdata.btree.raba.IRaba;
import com.bigdata.btree.raba.MutableKeyBuffer;
import com.bigdata.io.AbstractFixedByteArrayBuffer;
import com.bigdata.journal.Journal;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.util.BytesUtil;
import com.bigdata.util.concurrent.LatchedExecutor;
import com.bigdata.util.concurrent.Memoizer;
import cutthecrap.utils.striterators.EmptyIterator;
import cutthecrap.utils.striterators.Expander;
import cutthecrap.utils.striterators.SingleValueIterator;
import cutthecrap.utils.striterators.Striterator;
/**
*
* A non-leaf node.
*
* Per-child min/max revision timestamps and timestamp revision filtering
*
* In order to track the min/max timestamp on the {@link Node} we must also
* track the min/max timestamp for each direct child of that {@link Node}. While
* this inflates the size of the {@link INodeData} data record considerably, we
* are required to track those per-child data in order to avoid a scan of the
* children when we need to recompute the min/max timestamp for the {@link Node}
* . The IO latency costs of that scan are simply not acceptable, especially for
* large branching factors. The min/max timestamp on the {@link Node} is ONLY
* used for filtering iterators based on a desired tuple revision range. This is
* why the choice to support tuple revision filters is its own configuration
* option.
*
* FIXME An alternative to per-child min/max tuple revision timestamps would be
* the concurrent materialization of the direct children. These data are only
* mutable for B+Tree instances with relatively small branching factors. They
* are immutable for the {@link IndexSegment}. However, the per-{@link Node}
* min/max timestamp also make the tuple revision filtering more efficient since
* we can prune the search before we materialize the child.
*
* @author Bryan Thompson
*/
public class Node extends AbstractNode implements INodeData {
/**
* The data record. {@link MutableNodeData} is used for all mutation
* operations. {@link ReadOnlyNodeData} is used when the {@link Node} is
* made persistent. A read-only data record is automatically converted into
* a {@link MutableNodeData} record when a mutation operation is requested.
*
* Note: This is package private in order to expose it to {@link Leaf}.
*
* @todo consider volatile and private for {@link Node#data} and
* {@link Leaf#data} with accessors and settors at package private
* where necessary.
*/
INodeData data;
/**
*
* Weak references to child nodes (may be nodes or leaves). The capacity of
* this array is m, where m is the {@link #branchingFactor}. Valid indices
* are in [0:nkeys+1] since nchildren := nkeys+1 for a {@link Node}.
*
*
* This array is dimensioned to one more than the maximum capacity so that
* the child reference corresponding to the key that causes overflow and
* forces the split may be inserted. This greatly simplifies the logic for
* computing the split point and performing the split.
*
*
* Note: This should not be marked as volatile. Volatile does not make the
* elements of the array volatile, only the array reference itself. The
* field would be final except that we clear the reference when stealing the
* array or deleting the node.
*
*
* @todo document why package private (AbstractBTree.loadChild uses this but
* maybe that method could be moved to Node).
*/
transient/* volatile */Reference>[] childRefs;
// /**
// * An array of objects used to provide a per-child lock in order to allow
// * maximum concurrency in {@link #getChild(int)}.
// *
// * Note: this array is not allocated for a mutable btree since the caller
// * will be single threaded and locking is therefore not required in
// * {@link #getChild(int)}. We only need locking for read-only btrees since
// * they allow concurrent readers.
// *
// * @todo There is a LOT of overhead to creating all these objects. They
// are
// * only really useful in high concurrent read scenarios such as highly
// * concurrent query against an index. However, those situations
// * typically have a lot of buffered B+Tree nodes so the in-memory
// * footprint for this array is not really worth it.
// *
// * This might be viable if we had an AtomicBitVector class since we
// * could typically get by with 1-2 longs of data in that case.
// */
// transient private Object[] childLocks;
/**
* Return ((branchingFactor + 1) << 1) - 1
*/
protected final int minKeys() {
// /*
// * Compute the minimum #of children/values. This is the same whether
// * this is a Node or a Leaf.
// */
// final int minChildren = (btree.branchingFactor + 1) >> 1;
//
// // this.minKeys = isLeaf() ? minChildren : minChildren - 1;
//
// return minChildren - 1;
return btree.minChildren - 1;
}
/**
* Return branchingFactor - 1
, which is the maximum #of keys
* for a {@link Node}.
*/
protected final int maxKeys() {
// // The maximum #of keys is easy to compute.
// this.maxKeys = isLeaf() ? branchingFactor : branchingFactor - 1;
return btree.branchingFactor - 1;
}
/**
* Range check a child index.
*
* @param index
* The index of a child in [0:nkeys+1].
* @return true
*
* @throws IndexOutOfBoundsException
* if the index is not in the legal range.
*/
final protected boolean rangeCheckChildIndex(final int index) {
if (index < 0 || index > data.getKeyCount() + 1)
throw new IndexOutOfBoundsException();
return true;
}
/**
* Return the {@link Reference} for the child. This is part of the internal
* API.
*
* @param index
* The index of the child.
*
* @return The {@link Reference} for that child. This will be
* null
if the child is not buffered. Note that a non-
* null
return MAY still return null
from
* {@link Reference#get()}.
*/
public final Reference> getChildRef(final int index) {
assert rangeCheckChildIndex(index);
return childRefs[index];
}
final public INodeData getDelegate() {
return data;
}
/*
* INodeData.
*/
/**
* Always returns false
.
*/
final public boolean isLeaf() {
return false;
}
/**
* The result depends on the backing {@link INodeData} implementation. The
* {@link Node} will be mutable when it is first created and is made
* immutable when it is persisted. If there is a mutation operation, the
* backing {@link INodeData} is automatically converted into a mutable
* instance.
*/
final public boolean isReadOnly() {
return data.isReadOnly();
}
final public boolean isCoded() {
return data.isCoded();
}
final public AbstractFixedByteArrayBuffer data() {
return data.data();
}
final public int getKeyCount() {
return data.getKeyCount();
}
final public IRaba getKeys() {
return data.getKeys();
}
final public int getChildCount() {
return data.getChildCount();
}
public final long getSpannedTupleCount() {
return data.getSpannedTupleCount();
}
public final long getChildAddr(final int index) {
return data.getChildAddr(index);
}
final public long getChildEntryCount(final int index) {
return data.getChildEntryCount(index);
}
final public long getMaximumVersionTimestamp() {
return data.getMaximumVersionTimestamp();
}
public long getMinimumVersionTimestamp() {
return data.getMinimumVersionTimestamp();
}
final public boolean hasVersionTimestamps() {
return data.hasVersionTimestamps();
}
/**
* Apply the delta to the per-child count for this node and then recursively
* ascend up the tree applying the delta to all ancestors of this node. This
* is invoked solely by the methods that add and remove entries from a leaf
* as those are the only methods that change the #of entries spanned by a
* parent node. Methods that split, merge, or redistribute keys have a net
* zero effect on the #of entries spanned by the parent.
*
* This also updates {@link #getMinimumVersionTimestamp()} and
* {@link #getMaximumVersionTimestamp()} iff version timestamps are enabled
* and the child has a minimum (maximum) version timestamp GE the minimum
* (maximum) version timestamp of this node.
*
* @param child
* The direct child.
* @param delta
* The change in the #of spanned children.
*/
final protected void updateEntryCount(final AbstractNode> child,
final long delta) {
final int index = getIndexOf(child);
assert !isReadOnly();
final MutableNodeData data = (MutableNodeData) this.data;
data.childEntryCounts[index] += delta;
data.nentries += delta;
if (data.childEntryCounts[index] <= 0) {
// There must be at least one tuple spanned by the child.
throw new RuntimeException();
}
if (data.nentries <= 0) {
// There must be at least one tuple spanned by this node.
throw new RuntimeException();
}
if (child.hasVersionTimestamps()) {
final long cmin = child.getMinimumVersionTimestamp();
final long cmax = child.getMaximumVersionTimestamp();
if (cmin < data.minimumVersionTimestamp)
data.minimumVersionTimestamp = cmin;
if (cmax > data.maximumVersionTimestamp)
data.maximumVersionTimestamp = cmax;
}
if (parent != null) {
parent.get().updateEntryCount(this, delta);
}
}
/**
* Update the {@link #getMinimumVersionTimestamp()} and
* {@link #getMaximumVersionTimestamp()}. This is invoked when the min/max
* in the child has changed without a corresponding change to the #of
* spanned tuples. E.g., when an insert() causes a tuple to be updated
* rather than added.
*
* @param child
* The direct child.
*/
final protected void updateMinMaxVersionTimestamp(
final AbstractNode> child) {
assert !isReadOnly();
final MutableNodeData data = (MutableNodeData) this.data;
final long cmin = child.getMinimumVersionTimestamp();
final long cmax = child.getMaximumVersionTimestamp();
if (cmin < data.minimumVersionTimestamp)
data.minimumVersionTimestamp = cmin;
if (cmax > data.maximumVersionTimestamp)
data.maximumVersionTimestamp = cmax;
if (parent != null) {
parent.get().updateMinMaxVersionTimestamp(child);
}
}
/**
* De-serialization constructor.
*
* Note: The de-serialization constructor (and ONLY the de-serialization
* constructor) ALWAYS creates a clean node. Therefore the {@link PO#dirty}
* flag passed up from this constructor has the value false
.
*
* @param btree
* The tree to which the node belongs.
* @param addr
* The persistent identity of the node.
* @param data
* The data record.
*/
@SuppressWarnings("unchecked")
protected Node(final AbstractBTree btree, final long addr,
final INodeData data) {
super(btree, false /* The node is NOT dirty */);
final int branchingFactor = btree.branchingFactor;
// assert branchingFactor >= Options.MIN_BRANCHING_FACTOR;
//
// assert nkeys < branchingFactor;
assert data != null;
// assert childAddr.length == branchingFactor + 1;
//
// assert childEntryCounts.length == branchingFactor + 1;
setIdentity(addr);
this.data = data;
// this.nentries = nentries;
//
// // this.nkeys = keys.size();
//
// this.keys = keys; // steal reference.
//
// this.childAddr = childAddr;
//
// this.childEntryCounts = childEntryCounts;
childRefs = new Reference[branchingFactor + 1];
// childLocks = newChildLocks(btree, data.getKeys().size());
// // must clear the dirty flag since we just de-serialized this node.
// setDirty(false);
}
/**
* Used to create a new node when a node is split.
*/
@SuppressWarnings("unchecked")
protected Node(final BTree btree) {
super(btree, true /* dirty */);
final int branchingFactor = btree.branchingFactor;
data = new MutableNodeData(branchingFactor, btree.getIndexMetadata()
.getVersionTimestamps());
childRefs = new Reference[branchingFactor + 1];
// childLocks = newChildLocks(btree, 0/* nkeys */);
// nentries = 0;
//
// keys = new MutableKeyBuffer(branchingFactor);
//
// childAddr = new long[branchingFactor + 1];
//
// childEntryCounts = new int[branchingFactor + 1];
}
/**
* This constructor is used when splitting the a root {@link Leaf} or a root
* {@link Node}. The resulting node has a single child reference and NO
* keys. The #of entries allocated to the child is the #of remaining in that
* child after the split.
*
* @param btree
* A mutable btree.
* @param oldRoot
* The node that was previously the root of the tree (either a
* node or a leaf).
* @param nentries
* The #of entries spanned by the oldRoot before the
* split.
*/
@SuppressWarnings("unchecked")
protected Node(final BTree btree, final AbstractNode oldRoot,
final long nentries) {
super(btree, true /* dirty */);
// Verify that this is the root.
assert oldRoot == btree.root;
// The old root must be dirty when it is being split.
assert oldRoot.isDirty();
final int branchingFactor = btree.branchingFactor;
childRefs = new Reference[branchingFactor + 1];
// Note: child locks are only for read-only btrees and this ctor is only
// for a split of the root leaf so we never use child locks for this
// case.
assert !btree.isReadOnly();
// childLocks = null; // newChildLocks(btree);
final MutableNodeData data;
this.data = data = new MutableNodeData(branchingFactor, btree
.getIndexMetadata().getVersionTimestamps());
// #of entries spanned by the old root _before_ this split.
data.nentries = nentries;
// keys = new MutableKeyBuffer(branchingFactor);
//
// childAddr = new long[branchingFactor + 1];
//
// childEntryCounts = new int[branchingFactor + 1];
/*
* Replace the root node on the tree.
*/
final boolean wasDirty = btree.root.dirty;
btree.root = this;
if (!wasDirty) {
btree.fireDirtyEvent();
}
/*
* Attach the old root to this node.
*/
childRefs[0] = oldRoot.self;
// childRefs[0] = btree.newRef(oldRoot);
// #of entries from the old root _after_ the split.
data.childEntryCounts[0] = (oldRoot.isLeaf() ? ((Leaf) oldRoot)
.getKeyCount() : ((Node) oldRoot).getSpannedTupleCount());
// dirtyChildren.add(oldRoot);
oldRoot.parent = this.self;
// oldRoot.parent = btree.newRef(this);
/*
* The tree is deeper since we just split the root node.
*/
btree.height++;
final int requiredQueueCapacity = 2 * (btree.height + 2);
if (requiredQueueCapacity > btree.writeRetentionQueue.capacity()) {
/*
* FIXME Automatically extend the hard reference queue capacity such
* that (a) this constraint described below is never violated; and
* (b) the percentage of distinct nodes (or nodes and leaves) on the
* queue compared to the nodes (or nodes and leaves) in the tree
* either remains constant or does not degrade overly quickly.
*
* Constraint: The capacity of the hard reference queue needs to be
* at least the height of the tree + 2 (or twice that if we touch
* nodes on the way down and then on the way up again) so that a
* split can not cause any dirty node in the path to the leaf or its
* sibling to be evicted while we are processing that split. Note
* that there is no chance of the nodes being swept by the JVM since
* there are hard references to them on the stack, but if the are
* evicted then the copy-on-write mechanism could be defeated since
* a dirty parent could be evicted, forcing the child to become
* immutable right when we are trying to operate on it.
*
* Performance: Given a constant capacity for the hard reference
* queue the percentage of distinct nodes on the queue out of the
* total #of nodes in the tree will drop as the tree grows. This
* translates directly into more (de-)serialization of nodes and
* more disk seeks (if the store is not fully buffered).
*
* However, simply increasing the queue capacity will cause more
* data to be buffered pending serialization and therefore will
* increase the commit latency. Instead, we should probably
* introduce a secondary hard reference retention mechanism based
* more directly on the #of nodes that we want to retain in memory.
* One approach is to say N-1 or N-2 levels of the tree, and that
* might be a good heuristic. However, the select of the specific
* nodes that are retained should probably be somewhat dynamic so
* that we do not force the JVM to hold onto references for nodes
* that we are never or only rarely visiting given the actual access
* patterns in the application.
*/
throw new UnsupportedOperationException(
"writeRetentionQueue: capacity="
+ btree.writeRetentionQueue.capacity()
+ ", but height=" + btree.height);
}
btree.nnodes++;
btree.getBtreeCounters().rootsSplit++;
if (BTree.INFO || BigdataStatics.debug) {
// Note: nnodes and nleaves might not reflect rightSibling yet.
final String msg = "BTree: increasing height: name="
+ btree.metadata.getName() + ", height=" + btree.height
+ ", m=" + btree.getBranchingFactor() + ", nentries="
+ btree.nentries;
if (BTree.INFO)
BTree.log.info(msg);
if (BigdataStatics.debug)
System.err.println(msg);
}
}
/**
* Copy constructor.
*
* @param src
* The source node (must be immutable).
*
* @param triggeredByChildId
* The persistent identity of the child that triggered the copy
* constructor. This should be the immutable child NOT the one
* that was already cloned. This information is used to avoid
* stealing the original child since we already made a copy of
* it. It is {@link #NULL} when this information is not
* available, e.g., when the copyOnWrite action is triggered by a
* join() and we are cloning the sibling before we redistribute a
* key to the node/leaf on which the join was invoked.
*
* @todo We could perhaps replace this with the conversion of the
* INodeData:data field to a mutable field since the code which
* invokes copyOnWrite() no longer needs to operate on a new Node
* reference. However, I need to verify that nothing else depends on
* the new Node, e.g., the dirty flag, addr, etc.
*
* @todo Can't we just test to see if the child already has this node as its
* parent reference and then skip it? If so, then that would remove a
* troublesome parameter from the API.
*/
protected Node(final Node src, final long triggeredByChildId) {
super(src);
assert !src.isDirty();
assert src.isReadOnly();
// assert src.isPersistent();
/*
* Steal/clone the data record.
*
* Note: The copy constructor is invoked when we need to begin mutation
* operations on an immutable node or leaf, so make sure that the data
* record is mutable.
*/
assert src.data != null;
this.data = src.isReadOnly() ? new MutableNodeData(src
.getBranchingFactor(), src.data) : src.data;
assert this.data != null;
// clear reference on source.
src.data = null;
/*
* Steal strongly reachable unmodified children by setting their parent
* fields to the new node. Stealing the child means that it MUST NOT be
* used by its previous ancestor (our source node for this copy).
*/
childRefs = src.childRefs;
src.childRefs = null;
// childLocks = src.childLocks; src.childLocks = null;
final int nkeys = data.getKeyCount();
for (int i = 0; i <= nkeys; i++) {
final AbstractNode child = childRefs[i] == null ? null
: childRefs[i].get();
/*
* Note: Both child.identity and triggeredByChildId will always be
* 0L for a transient B+Tree since we never assign persistent
* identity to the nodes and leaves. Therefore [child.identity !=
* triggeredByChildId] will fail for ALL children, including the
* trigger, and therefore fail to set the parent on any of them. The
* [btree.store==null] test handles this condition and always steals
* the child, setting its parent to this new node.
*
* FIXME It is clear that testing on child.identity is broken in
* some other places for the transient store.
*/
if (child != null
&& (btree.store == null || child.identity != triggeredByChildId)) {
/*
* Copy on write should never trigger for a dirty node and only
* a dirty node can have dirty children.
*/
assert !child.isDirty();
// Steal the child.
child.parent = this.self;
// child.parent = btree.newRef(this);
// // Keep a reference to the clean child.
// childRefs[i] = new WeakReference(child);
}
}
}
@Override
public void delete() {
super.delete();
// clear state.
childRefs = null;
// childLocks = null;
data = null;
}
/**
* This method must be invoked on a parent to notify the parent that the
* child has become persistent. The method scans the weak references for the
* children, finds the index for the specified child, and then sets the
* corresponding index in the array of child keys. The child is then removed
* from the dirty list for this node.
*
* @param child
* The child.
*
* @exception IllegalStateException
* if the child is not persistent.
* @exception IllegalArgumentException
* if the child is not a child of this node.
*/
void setChildAddr(final AbstractNode> child) {
if (!child.isPersistent()) {
// The child does not have persistent identity.
throw new IllegalStateException();
}
final int i = getIndexOf(child);
assert !isReadOnly();
((MutableNodeData) data).childAddr[i] = child.getIdentity();
// if (!dirtyChildren.remove(child)) {
//
// throw new AssertionError("Child was not on dirty list.");
//
// }
}
/**
* Invoked by {@link #copyOnWrite()} to clear the persistent address for a
* child on a cloned parent and set the reference to the cloned child.
*
* @param oldChildAddr
* The persistent address of the old child. The entries to be
* updated are located based on this argument. It is an error if
* this address is not found in the list of child addresses for
* this {@link Node}.
* @param newChild
* The reference to the new child.
*/
void replaceChildRef(final long oldChildAddr, final AbstractNode newChild) {
assert oldChildAddr != NULL || btree.store == null;
assert newChild != null;
// This node MUST have been cloned as a pre-condition, so it can not
// be persistent.
assert !isPersistent();
assert !isReadOnly();
// The newChild MUST have been cloned and therefore MUST NOT be
// persistent.
assert !newChild.isPersistent();
assert !isReadOnly();
final MutableNodeData data = (MutableNodeData) this.data;
final int nkeys = getKeyCount();
// Scan for location in weak references.
for (int i = 0; i <= nkeys; i++) {
if (data.childAddr[i] == oldChildAddr) {
/*
* Note: We can not check anything which depends on
* oldChild.data since that field was cleared when we cloned the
* oldChild to obtain a mutable node/leaf. Since
* oldChild.isPersistent() does not capture the correct
* semantics for a transient B+Tree (we are interested if the
* node was read-only), this entire section has been commented
* out.
*/
// if (true) {
//
// /*
// * Do some paranoia checks.
// */
//
// final AbstractNode oldChild = childRefs[i] != null ?
// childRefs[i]
// .get() : null;
//
// if (oldChild != null) {
//
// // assert oldChild.isPersistent();
//
// // assert !dirtyChildren.contains(oldChild);
//
// }
//
// }
// Clear the old key.
data.childAddr[i] = NULL;
// remove from cache and free the oldChildAddr if the Strategy
// supports it
if (btree.storeCache!=null) {
// remove from cache.
btree.storeCache.remove(oldChildAddr);
}
// free the oldChildAddr if the Strategy supports it
btree.deleteNodeOrLeaf(oldChildAddr);
// System.out.println("Deleting " + oldChildAddr);
// Stash reference to the new child.
// childRefs[i] = btree.newRef(newChild);
childRefs[i] = newChild.self;
// // Add the new child to the dirty list.
// dirtyChildren.add(newChild);
// Set the parent on the new child.
// newChild.parent = btree.newRef(this);
newChild.parent = this.self;
return;
}
}
// System.err.println("this: "); dump(Level.DEBUG,System.err);
// System.err.println("newChild: ");
// newChild.dump(Level.DEBUG,System.err);
throw new IllegalArgumentException("Not our child : oldChildAddr="
+ oldChildAddr);
}
@Override
public Tuple insert(final byte[] key, final byte[] value,
final boolean delete, final boolean putIfAbsent, final long timestamp, final Tuple tuple) {
assert !deleted;
if (btree.debug)
assertInvariants();
btree.touch(this);
final int childIndex = findChild(key);
final AbstractNode> child = getChild(childIndex);
return child.insert(key, value, delete, putIfAbsent, timestamp, tuple);
}
@Override
public Tuple lookup(final byte[] key, final Tuple tuple) {
assert !deleted;
if (btree.debug)
assertInvariants();
btree.touch(this);
final int childIndex = findChild(key);
final AbstractNode> child = getChild(childIndex);
return child.lookup(key, tuple);
}
@Override
public Tuple remove(final byte[] key, final Tuple tuple) {
assert !deleted;
if (btree.debug)
assertInvariants();
btree.touch(this);
final int childIndex = findChild(key);
final AbstractNode> child = getChild(childIndex);
return child.remove(key, tuple);
}
@Override
public long indexOf(final byte[] key) {
assert !deleted;
btree.touch(this);
final int childIndex = findChild(key);
final AbstractNode> child = getChild(childIndex);
/*
* Compute running total to this child index plus [n], possible iff
* successful search at the key level in which case we do not need to
* pass n down.
*/
long offset = 0;
for (int i = 0; i < childIndex; i++) {
offset += getChildEntryCount(i);
}
// recursive invocation, eventually grounds out on a leaf.
long ret = child.indexOf(key);
if (ret < 0) {
// obtain "insert position".
ret = -ret - 1;
// add in the offset.
ret += offset;
// convert back to the "not found" form.
return (-(ret) - 1);
}
// add in the offset.
ret += offset;
// return the index position of the key relative to this node.
return ret;
}
/**
* Range check an index into the keys of the node.
*
* @param entryIndex
* The key index.
*
* @return true
*
* @throws IndexOutOfBoundsException
* if the index is LT ZERO (0) -or- GTE the
* {@link #getSpannedTupleCount()}
*/
protected boolean rangeCheckSpannedTupleIndex(final long entryIndex) {
final long nentries = data.getSpannedTupleCount();
if (entryIndex < 0)
throw new IndexOutOfBoundsException("negative: " + entryIndex);
if (entryIndex >= nentries) {
throw new IndexOutOfBoundsException("too large: entryIndex="
+ entryIndex + ", but nentries=" + nentries);
}
return true;
}
/**
* Recursive search for the key at the specified entry index.
*
* @param entryIndex
* The index of the entry (relative to the first entry spanned by
* this node).
*
* @return The key at that entry index.
*/
@Override
final public byte[] keyAt(final long entryIndex) {
/* assert */rangeCheckSpannedTupleIndex(entryIndex);
// index of the child that spans the desired entry.
int childIndex = 0;
// corrects the #of spanned entries by #skipped over.
long remaining = entryIndex;
final int nkeys = getKeyCount();
// search for child spanning the desired entry index.
for (; childIndex <= nkeys; childIndex++) {
final long nspanned = getChildEntryCount(childIndex);
if (remaining < nspanned) {
// found the child index spanning the desired entry.
break;
}
remaining -= nspanned;
assert remaining >= 0;
}
final AbstractNode> child = getChild(childIndex);
return child.keyAt(remaining);
}
/**
* Recursive search for the value at the specified entry index.
*
* @param entryIndex
* The index of the entry (relative to the first entry spanned by
* this node).
*
* @return The value at that entry index.
*/
@Override
final public void valueAt(final long entryIndex, final Tuple tuple) {
// Note: Made non-conditional since unit tests verify this.
/* assert */rangeCheckSpannedTupleIndex(entryIndex);
// index of the child that spans the desired entry.
int childIndex = 0;
// corrects the #of spanned entries by #skipped over.
long remaining = entryIndex;
final int nkeys = getKeyCount();
// search for child spanning the desired entry index.
for (; childIndex <= nkeys; childIndex++) {
final long nspanned = getChildEntryCount(childIndex);
if (remaining < nspanned) {
// found the child index spanning the desired entry.
break;
}
remaining -= nspanned;
assert remaining >= 0;
}
final AbstractNode> child = getChild(childIndex);
child.valueAt(remaining, tuple);
}
/**
* Return the index of the child to be searched.
*
* The interpretation of the key index for a node is as follows. When
* searching nodes of the tree, we search for the index in keys[] of the
* first key value greater than or equal (GTE) to the probe key. If the
* match is equal, then we choose the child at index + 1. Otherwise we
* choose the child having the same index as the GTE key match. For example,
*
*
* keys[] : [ 5 9 12 ]
* child[] : [ a b c d ]
*
*
* A probe with keys up to 4
matches at index zero (0) and we
* choose the 1st child, a, which is at index zero (0).
*
* A probe whose key is 5
matches at index zero (0) exactly and
* we choose the child at index + 1
, b, which is at index one
* (1).
*
* A probe with keys in [6:8] matches at index one (1) and we choose the 2nd
* child, b, which is at index one (1). A probe with 9
also
* matches at index one (1), but we choose index+1
equals two
* (2) since this is an exact key match.
*
* A probe with keys in [10:11] matches at index two (2) and we choose the
* 3rd child, c, which is at index two (2). A probe with 12
* also matches at index two (2), but we choose index+1
equals
* three (3) since this is an exact key match.
*
* A probe with keys greater than 12 exceeds all keys in the node and always
* matches the last child in that node. In this case, d, which is at index
* three (3).
*
* Note that we never stop a search on a node, even when there is an exact
* match on a key. All values are stored in the leaves and we always descend
* until we reach the leaf in which a value for the key would be stored. A
* test on the keys of that leaf is then conclusive - either a value is
* stored in the leaf for that key or it is not stored in the tree.
*
* @param searchkey
* The probe key.
*
* @return The child to be searched next for that key.
*/
final protected int findChild(final byte[] searchKey) {
int childIndex = this.getKeys().search(searchKey);
if (childIndex >= 0) {
/*
* exact match - use the next child.
*/
return childIndex + 1;
} else {
/*
* There is no exact match on the key, so we convert the search
* result to find the insertion point. The insertion point is always
* an index whose current key (iff it is defined) is greater than
* the probe key.
*
* keys[] : [ 5 9 12 ]
*
* The insertion point for key == 4 is zero.
*
* The insertion point for key == 6 is one.
*
* etc.
*
* When the probe key is greater than any existing key, then the
* insertion point is nkeys. E.g., the insertion point for key == 20
* is 3.
*/
/*
* Convert the return by search to obtain the index of the child
* that covers this key (a non-negative integer).
*/
childIndex = -childIndex - 1;
return childIndex;
}
}
/**
*
* Split an over-capacity node (a node with maxKeys+1
keys),
* creating a new rightSibling. The splitIndex is (maxKeys+1)/2
* . The key at the splitIndex is the separatorKey. Unlike when we split a
* {@link Leaf}, the separatorKey is lifted into the parent and does not
* appear in either this node or the rightSibling after the split. All keys
* and child references from splitIndex+1
(inclusive) are moved
* to the new rightSibling. The child reference at splitIndex
* remains in this node.
*
*
* If this node is the root of the tree (no parent), then a new root
* {@link Node} is created without any keys and is made the parent of this
* node.
*
*
* In any case, we then insert( separatorKey, rightSibling ) into the parent
* node, which may cause the parent node itself to split.
*
*
* Note: splitting a node causes entry counts for the relocated childen to
* be relocated to the new rightSibling but it does NOT change the #of
* entries on the parent.
*
*/
@Override
protected IAbstractNode split() {
assert isDirty(); // MUST be mutable.
assert getKeyCount() == maxKeys() + 1; // MUST be over capacity by one.
// cast to mutable implementation class.
final BTree btree = (BTree) this.btree;
btree.getBtreeCounters().nodesSplit++;
/*
* The #of entries spanned by this node _before_ the split.
*/
final long nentriesBeforeSplit = getSpannedTupleCount();
/*
* The #of child references. (this is +1 since the node is over capacity
* by one).
*/
final int nchildren = btree.branchingFactor + 1;
/*
* Index at which to split the leaf. This is (maxKeys+1)/2, but that can
* be simplified to branchingFactor/2 for a Node.
*/
// final int splitIndex = (maxKeys() + 1) / 2;
final int splitIndex = btree.branchingFactor >>> 1;
/*
* The key at that index, which becomes the separator key in the parent.
*
* Note: Unlike a leaf, we are not free to choose the shortest separator
* key in a node. This is because separator keys describe separations
* among the leaves of the tree. This issue is covered by Bayer's
* article on prefix trees.
*/
final byte[] separatorKey = getKeys().get(splitIndex);
// Create the new rightSibling node. It will be mutable.
final Node rightSibling = new Node(btree);
// Tunnel through to the mutable objects.
final MutableNodeData data = (MutableNodeData) this.data;
final MutableNodeData sdata = (MutableNodeData) rightSibling.data;
final MutableKeyBuffer keys = data.keys;
final MutableKeyBuffer skeys = sdata.keys;
if (DEBUG) {
log.debug("this=" + this + ", nkeys=" + getKeyCount()
+ ", splitIndex=" + splitIndex + ", separatorKey="
+ keyAsString(separatorKey));
// if(DEBUG) dump(Level.DEBUG,System.err);
}
/*
* copy keys and values to the new rightSibling.
*/
int j = 0;
// // #of spanned entries being moved to the new rightSibling.
// int nentriesMoved = 0;
for (int i = splitIndex + 1; i < nchildren; i++, j++) {
if (i + 1 < nchildren) {
/*
* Note: keys[nchildren-1] is undefined.
*/
// rightSibling.setKey(j, getKey(i));
rightSibling.copyKey(j, this.getKeys(), i);
}
rightSibling.childRefs[j] = childRefs[i];
sdata.childAddr[j] = data.childAddr[i];
final long childEntryCount = data.childEntryCounts[i];
sdata.childEntryCounts[j] = childEntryCount;
sdata.nentries += childEntryCount;
data.nentries -= childEntryCount;
// nentriesMoved += childEntryCounts[i];
final AbstractNode tmp = (childRefs[i] == null ? null
: childRefs[i].get());
if (tmp != null) {
/*
* The child node is in memory.
*
* Update its parent reference.
*/
// tmp.parent = btree.newRef(rightSibling);
tmp.parent = rightSibling.self;
}
/*
* Clear out the old keys and values, including keys[splitIndex]
* which is being moved to the parent.
*/
if (i + 1 < nchildren) {
keys.keys[i] = null;
/* nkeys--; */keys.nkeys--; // one less key here.
/* rightSibling.nkeys++; */skeys.nkeys++; // more more key
// there.
}
childRefs[i] = null;
data.childAddr[i] = NULL;
data.childEntryCounts[i] = 0;
}
/*
* Clear the key that is being move into the parent.
*/
keys.keys[splitIndex] = null;
/* nkeys--; */keys.nkeys--;
Node p = getParent();
if (p == null) {
/*
* Use a special constructor to split the root. The result is a new
* node with zero keys and one child (this node).
*/
p = new Node(btree, this, nentriesBeforeSplit);
} else {
assert !p.isReadOnly();
// this node now has fewer entries
((MutableNodeData) p.data).childEntryCounts[p.getIndexOf(this)] -= sdata.nentries;
}
/*
* insert(separatorKey,rightSibling) into the parent node. This may
* cause the parent node itself to split.
*/
p.insertChild(separatorKey, rightSibling);
btree.nnodes++;
// Return the high node.
return rightSibling;
}
/**
* Redistributes a key from the specified sibling into this node in order to
* bring this node up to the minimum #of keys. This also updates a separator
* key in the parent for the right most of (this, sibling).
*
* When a key is redistributed from a sibling, the key in the sibling is
* rotated into the parent where it replaces the current separatorKey and
* that separatorKey is brought down into this node. The child corresponding
* to the key is simply moved from the sibling into this node (rather than
* rotating it through the parent).
*
* While redistribution changes the #of entries spanned by the node and the
* sibling and therefore must update {@link #childEntryCounts} on the shared
* parent, it does not change the #of entries spanned by the parent.
*
* @param sibling
* A direct sibling of this node (either the left or right
* sibling). The sibling MUST be mutable.
*
* @todo change to redistribute keys until the node and the sibling have an
* equal number of keys. if the other sibling exists and is also
* materialized then redistribute keys with it as well? This is the
* B*-Tree variation - it has strengths and weaknesses. I do not think
* that it will be a big win here since the expected scenario is heavy
* writes, good retention of nodes, and de-serialization of nodes from
* a fully buffered journal (hence, no IOs even when we need to
* materialize a sibling).
*/
@Override
protected void redistributeKeys(final AbstractNode sibling,
final boolean isRightSibling) {
// the sibling of a Node must be a Node.
final Node s = (Node) sibling;
assert s != null;
final int nkeys = getKeyCount();
final int snkeys = s.getKeyCount();
assert dirty;
assert !deleted;
assert !isPersistent();
// verify that this leaf is deficient.
assert nkeys < minKeys();
// verify that this leaf is under minimum capacity by one key.
assert nkeys == minKeys() - 1;
// the sibling MUST be _OVER_ the minimum #of keys/values.
assert snkeys > minKeys();
assert s.dirty;
assert !s.deleted;
assert !s.isPersistent();
final Node p = getParent();
// children of the same node.
assert s.getParent() == p;
if (DEBUG) {
log.debug("this=" + this + ", sibling=" + sibling
+ ", rightSibling=" + isRightSibling);
// if(DEBUG) {
// System.err.println("this"); dump(Level.DEBUG,System.err);
// System.err.println("sibling");
// sibling.dump(Level.DEBUG,System.err);
// System.err.println("parent"); p.dump(Level.DEBUG,System.err);
// }
}
/*
* The index of this leaf in its parent. we note this before we start
* mucking with the keys.
*/
final int index = p.getIndexOf(this);
// Tunnel through to the mutable keys object.
final MutableKeyBuffer keys = (MutableKeyBuffer) this.getKeys();
final MutableKeyBuffer skeys = (MutableKeyBuffer) s.getKeys();
// Tunnel through to the mutable data records.
final MutableNodeData data = (MutableNodeData) this.data;
final MutableNodeData sdata = (MutableNodeData) s.data;
final MutableNodeData pdata = (MutableNodeData) p.data;
/*
* determine which leaf is earlier in the key ordering and get the index
* of the sibling.
*/
if (isRightSibling/* keys[nkeys-1] < s.keys[0] */) {
/*
* redistributeKeys(this,rightSibling). all we have to do is replace
* the separatorKey in the parent with the first key from the
* rightSibling and copy the old separatorKey from the parent to the
* end of the keys in this node. we then close up the hole that this
* left at index 0 in the rightSibling.
*/
// Mopy the first key/child from the rightSibling.
// setKey(nkeys, p.getKey(index)); // copy the separatorKey from the
// parent.
copyKey(nkeys, p.getKeys(), index); // copy the separatorKey from
// the parent.
// p.setKey(index, s.getKey(0)); // update the separatorKey from the
// rightSibling.
p.copyKey(index, s.getKeys(), 0); // update the separatorKey from
// the rightSibling.
childRefs[nkeys + 1] = s.childRefs[0]; // copy the child from the
// rightSibling.
data.childAddr[nkeys + 1] = sdata.childAddr[0];
final long siblingChildCount = sdata.childEntryCounts[0]; // #of
// spanned
// entries
// being
// moved.
data.childEntryCounts[nkeys + 1] = siblingChildCount;
final AbstractNode child = childRefs[nkeys + 1] == null ? null
: childRefs[nkeys + 1].get();
if (child != null) {
child.parent = this.self;
// child.parent = btree.newRef(this);
// if( child.isDirty() ) {
// if(!s.dirtyChildren.remove(child)) throw new
// AssertionError();
// if(!dirtyChildren.add(child)) throw new AssertionError();
// }
}
// copy down the keys on the right sibling to cover up the hole.
System.arraycopy(skeys.keys, 1, skeys.keys, 0, snkeys - 1);
System.arraycopy(s.childRefs, 1, s.childRefs, 0, snkeys);
System.arraycopy(sdata.childAddr, 1, sdata.childAddr, 0, snkeys);
System.arraycopy(sdata.childEntryCounts, 1, sdata.childEntryCounts,
0, snkeys);
// erase exposed key/value on rightSibling that is no longer
// defined.
skeys.keys[snkeys - 1] = null;
s.childRefs[snkeys] = null;
sdata.childAddr[snkeys] = NULL;
sdata.childEntryCounts[snkeys] = 0;
// update parent : N more entries spanned by this child.
pdata.childEntryCounts[index] += siblingChildCount;
// update parent : N fewer entries spanned by our right sibling.
pdata.childEntryCounts[index + 1] -= siblingChildCount;
// update #of entries spanned by this node.
data.nentries += siblingChildCount;
// update #of entries spanned by our rightSibling.
sdata.nentries -= siblingChildCount;
/* s.nkeys--; */skeys.nkeys--;
/* this.nkeys++; */keys.nkeys++;
if (btree.debug) {
assertInvariants();
s.assertInvariants();
}
} else {
/*
* redistributeKeys(leftSibling,this). all we have to do is copy
* down the keys in this node by one position, copy the separatorKey
* from the parent into the hole that we just opened up, copy the
* child data across, and update the separatorKey in the parent with
* the last key from the leftSibling.
*/
// copy down by one.
System.arraycopy(keys.keys, 0, keys.keys, 1, nkeys);
System.arraycopy(childRefs, 0, childRefs, 1, nkeys + 1);
System.arraycopy(data.childAddr, 0, data.childAddr, 1, nkeys + 1);
System.arraycopy(data.childEntryCounts, 0, data.childEntryCounts,
1, nkeys + 1);
// move the last key/child from the leftSibling to this node.
// setKey(0, p.getKey(index-1)); // copy the separatorKey from the
// parent.
copyKey(0, p.getKeys(), index - 1); // copy the separatorKey from
// the parent.
// p.setKey(index-1, s.getKey(s.nkeys-1)); // update the
// separatorKey
p.copyKey(index - 1, s.getKeys(), snkeys - 1); // update the
// separatorKey
childRefs[0] = s.childRefs[snkeys];
data.childAddr[0] = sdata.childAddr[snkeys];
final long siblingChildCount = sdata.childEntryCounts[snkeys];
data.childEntryCounts[0] = siblingChildCount;
final AbstractNode child = childRefs[0] == null ? null
: childRefs[0].get();
if (child != null) {
child.parent = this.self;
// child.parent = btree.newRef(this);
// if(child.isDirty()) {
// if(!s.dirtyChildren.remove(child)) throw new
// AssertionError();
// if(!dirtyChildren.add(child)) throw new AssertionError();
// }
}
skeys.keys[snkeys - 1] = null;
s.childRefs[snkeys] = null;
sdata.childAddr[snkeys] = NULL;
sdata.childEntryCounts[snkeys] = 0;
/* s.nkeys--; */skeys.nkeys--;
/* this.nkeys++; */keys.nkeys++;
// update parent : N more entries spanned by this child.
pdata.childEntryCounts[index] += siblingChildCount;
// update parent : N fewer entries spanned by our leftSibling.
pdata.childEntryCounts[index - 1] -= siblingChildCount;
// update #of entries spanned by this node.
data.nentries += siblingChildCount;
// update #of entries spanned by our leftSibling.
sdata.nentries -= siblingChildCount;
if (btree.debug) {
assertInvariants();
s.assertInvariants();
}
}
}
/**
* Merge the keys and values from the sibling into this node, delete the
* sibling from the store and remove the sibling from the parent. This will
* trigger recursive {@link AbstractNode#join()} if the parent node is now
* deficient. While this changes the #of entries spanned by the current node
* it does NOT effect the #of entries spanned by the parent.
*
* @param sibling
* A direct sibling of this node (does NOT need to be mutable).
* The sibling MUST have exactly the minimum #of keys.
*/
@Override
protected void merge(final AbstractNode sibling,
final boolean isRightSibling) {
// The sibling of a Node must be a Node.
final Node s = (Node) sibling;
assert s != null;
assert !s.deleted;
// Note: local var is updated within this method!
int nkeys = getKeyCount();
final int snkeys = s.getKeyCount();
// verify that this node is deficient.
assert nkeys < minKeys();
// verify that this node is under minimum capacity by one key.
assert nkeys == minKeys() - 1;
// the sibling MUST at the minimum #of keys/values.
assert snkeys == s.minKeys();
final Node p = getParent();
// children of the same node.
assert s.getParent() == p;
if (DEBUG) {
log.debug("this=" + this + ", sibling=" + sibling
+ ", rightSibling=" + isRightSibling);
// if(DEBUG) {
// System.err.println("this"); dump(Level.DEBUG,System.err);
// System.err.println("sibling");
// sibling.dump(Level.DEBUG,System.err);
// System.err.println("parent"); p.dump(Level.DEBUG,System.err);
// }
}
final long siblingEntryCount = s.getSpannedTupleCount();
/*
* The index of this node in its parent. we note this before we start
* mucking with the keys.
*/
final int index = p.getIndexOf(this);
/*
* Tunnel through to the mutable data records.
*
* Note: We do not require the sibling to be mutable. If it is not, then
* we create a mutable copy of the sibling for use during this method.
*/
final MutableNodeData data = (MutableNodeData) this.data;
final MutableNodeData sdata = s.isReadOnly() ? new MutableNodeData(
getBranchingFactor(), s.data) : (MutableNodeData) s.data;
final MutableNodeData pdata = (MutableNodeData) p.data;
// Tunnel through to the mutable keys objects.
final MutableKeyBuffer keys = data.keys;
final MutableKeyBuffer skeys = sdata.keys;
// /*
// * Tunnel through to the mutable keys object.
// *
// * Note: since we do not require the sibling to be mutable we have to
// * test and convert the key buffer for the sibling to a mutable key
// * buffer if the sibling is immutable. Also note that the sibling MUST
// * have the minimum #of keys for a merge so we set the capacity of the
// * mutable key buffer to that when we have to convert the siblings
// keys
// * into mutable form in order to perform the merge operation.
// */
// final MutableKeyBuffer keys = (MutableKeyBuffer) this.getKeys();
// final MutableKeyBuffer skeys = (s.getKeys() instanceof
// MutableKeyBuffer ? (MutableKeyBuffer) s.getKeys()
// : new MutableKeyBuffer(getBranchingFactor(), s.getKeys()));
/*
* determine which node is earlier in the key ordering so that we know
* whether the sibling's keys will be inserted at the front of this
* nodes's keys or appended to this nodes's keys.
*/
if (isRightSibling/* keys[nkeys-1] < s.keys[0] */) {
/*
* merge( this, rightSibling ). the keys and values from this node
* will appear in their current position, the separatorKey from the
* parent is appended after the last key in this node, and the keys
* and children from the rightSibling are then appended as well.
*/
/*
* Get the separator key in the parent and append it to the keys in
* this node.
*/
// this.setKey(nkeys++, p.getKey(index));
this.copyKey(nkeys, p.getKeys(), index);
nkeys++; // update local var!
keys.nkeys++;
/*
* Copy in the keys and children from the sibling. Note that the
* children are copied to the position nkeys NOT nkeys+1 since the
* first child needs to appear at the same position as the
* separatorKey that we copied from the parent.
*/
System.arraycopy(skeys.keys, 0, keys.keys, nkeys, snkeys);
System.arraycopy(s.childRefs, 0, this.childRefs, nkeys, snkeys + 1);
System.arraycopy(sdata.childAddr, 0, data.childAddr, nkeys,
snkeys + 1);
System.arraycopy(sdata.childEntryCounts, 0, data.childEntryCounts,
nkeys, snkeys + 1);
// update parent on children
final Reference ref = (Reference) this.self;
// final Reference weakRef = btree.newRef(this);
for (int i = 0; i < snkeys + 1; i++) {
final AbstractNode child = s.childRefs[i] == null ? null
: s.childRefs[i].get();
if (child != null) {
child.parent = ref;
// if( child.isDirty() ) {
// // record hard references for dirty children.
// dirtyChildren.add(child);
// }
}
}
/*
* Adjust the #of keys in this leaf.
*/
/* this.nkeys += s.nkeys; */keys.nkeys += snkeys;
/*
* Note: in this case we have to replace the separator key for this
* node with the separator key for its right sibling.
*
* Note: This temporarily causes the duplication of a separator key
* in the parent. However, the separator key for the right sibling
* will be deleted when the sibling is removed from the parent
* below.
*/
// p.setKey(index, p.getKey(index+1));
p.copyKey(index, p.getKeys(), index + 1);
// reallocate spanned entries from the sibling to this node.
pdata.childEntryCounts[index] += siblingEntryCount;
data.nentries += siblingEntryCount;
if (btree.debug)
assertInvariants();
} else {
/*
* merge( leftSibling, this ). the keys and values from this node
* will be move down by sibling.nkeys+1 positions, the keys and
* values from the sibling will be copied into this node starting at
* index zero(0), and finally the separatorKey from the parent will
* be copied into the position after the last sibling key and before
* the position of the first key copied down in this node to avoid
* overwrite (that is, we copy the separatorKey from the parent to
* this.keys[s.nkeys]).
*
* Note: we do not update the separator key in the parent because
* the separatorKey will be removed when we remove the leftSibling
* from the parent at the end of this method.
*/
// move keys and children down by sibling.nkeys+1 positions.
System.arraycopy(keys.keys, 0, keys.keys, snkeys + 1, nkeys);
System.arraycopy(this.childRefs, 0, this.childRefs, snkeys + 1,
nkeys + 1);
System.arraycopy(data.childAddr, 0, data.childAddr, snkeys + 1,
nkeys + 1);
System.arraycopy(data.childEntryCounts, 0, data.childEntryCounts,
snkeys + 1, nkeys + 1);
// copy keys and values from the sibling to index 0 of this leaf.
System.arraycopy(skeys.keys, 0, keys.keys, 0, snkeys);
System.arraycopy(s.childRefs, 0, this.childRefs, 0, snkeys + 1);
System.arraycopy(sdata.childAddr, 0, data.childAddr, 0, snkeys + 1);
System.arraycopy(sdata.childEntryCounts, 0, data.childEntryCounts,
0, snkeys + 1);
// copy the separatorKey from the parent.
// this.setKey(s.nkeys, p.getKey(index - 1));
this.copyKey(snkeys, p.getKeys(), index - 1);
// update parent on children.
final Reference ref = (Reference) this.self;
// final Reference weakRef = btree.newRef(this);
for (int i = 0; i < snkeys + 1; i++) {
final AbstractNode child = s.childRefs[i] == null ? null
: s.childRefs[i].get();
if (child != null) {
child.parent = ref;
// if( child.isDirty() ) {
// // record hard references for dirty children.
// dirtyChildren.add(child);
// }
}
}
// we gain nkeys from the sibling and one key from the parent.
/* this.nkeys += s.nkeys + 1; */keys.nkeys += snkeys + 1;
// reallocate spanned entries from the sibling to this node.
pdata.childEntryCounts[index] += s.getSpannedTupleCount();
data.nentries += siblingEntryCount;
if (btree.debug)
assertInvariants();
}
/*
* The sibling is now empty. We need to detach the sibling from its
* parent node and then delete the sibling from the store.
*/
p.removeChild(s);
}
/**
* Invoked by {@link AbstractNode#split()} to insert a key and reference for
* a child created when another child of this node is split. This method has
* no effect on the #of entries spanned by the parent. *
*
* Note: This operation is invoked only when a node or leaf is split. As
* such, it can not cause the min/max tuple revision timestamp on this
* {@link Node} to change since no tuples have been added or removed.
* However, this method does need to record the min/max for the new
* rightSibling.
*
* @param key
* The key on which the old node was split.
* @param child
* The new node.
*
* FIXME set min/max for the new child.
*/
protected void insertChild(final byte[] key, final AbstractNode child) {
if (btree.debug)
assertInvariants();
// assert key > IIndex.NEGINF && key < IIndex.POSINF;
assert child != null;
assert child.isDirty() : "child not dirty"; // always dirty since it was
// just created.
assert isDirty() : "not dirty"; // must be dirty to permit mutation.
/*
* Find the location where this key belongs. When a new node is created,
* the constructor sticks the child that demanded the split into
* childRef[0]. So, when nkeys == 1, we have nchildren==1 and the key
* goes into keys[0] but we have to copyDown by one anyway to avoid
* stepping on the existing child.
*/
int childIndex = this.getKeys().search(key);
if (childIndex >= 0) {
/*
* The key is already present. This is an error.
*/
// btree.dump(Level.DEBUG,System.err);
throw new AssertionError("Split on existing key: childIndex="
+ childIndex + ", key=" + keyAsString(key) + "\nthis="
+ this + "\nchild=" + child);
}
final int nkeys = getKeyCount();
// Convert the position to obtain the insertion point.
childIndex = -childIndex - 1;
assert childIndex >= 0 && childIndex <= nkeys;
/*
* copy down per-key data.
*/
assert !isReadOnly();
final MutableNodeData data = (MutableNodeData) this.data;
final MutableKeyBuffer keys = (MutableKeyBuffer) this.getKeys();
final int length = nkeys - childIndex;
if (length > 0) {
System.arraycopy(keys.keys, childIndex, keys.keys,
(childIndex + 1), length);
}
/*
* copy down per-child data. #children == nkeys+1. child[0] is always
* defined.
*/
System.arraycopy(childRefs, childIndex + 1, childRefs, childIndex + 2,
length);
System.arraycopy(data.childAddr, childIndex + 1, data.childAddr,
childIndex + 2, length);
System.arraycopy(data.childEntryCounts, childIndex + 1,
data.childEntryCounts, childIndex + 2, length);
/*
* Insert key at index.
*/
// setKey(childIndex, key);
keys.keys[childIndex] = key;
// System.arraycopy(key, 0, keys.keys, childIndex, 1);
/*
* Insert child at index+1.
*/
childRefs[childIndex + 1] = child.self;
// childRefs[childIndex + 1] = btree.newRef(child);
data.childAddr[childIndex + 1] = NULL;
final long childEntryCount = // child.getSpannedTupleCount();
(child.isLeaf() ? ((Leaf) child).getKeyCount() : ((Node) child)
.getSpannedTupleCount());
data.childEntryCounts[childIndex + 1] = childEntryCount;
// if( parent != null ) {
//
// parent.get().updateEntryCount(this, childEntryCount);
//
// }
// nentries += childEntryCount;
// dirtyChildren.add(child);
child.parent = this.self;
// child.parent = btree.newRef(this);
/* nkeys++; */keys.nkeys++;
// Note: this tests the post-condition of the split.
if (keys.nkeys == maxKeys() + 1) {
/*
* The node is over capacity so we split the node, creating a new
* rightSibling and insert( separatorKey, rightSibling ) into the
* parent.
*/
final Node rightSibling = (Node) split();
// assert additional post-split invariants.
if (btree.debug) {
getParent().assertInvariants();
rightSibling.assertInvariants();
}
return;
}
if (btree.debug)
assertInvariants();
}
/**
* Return the left sibling. This is used by implementations of
* {@link AbstractNode#join()} to explore their left sibling.
*
* @param child
* The child (must be dirty).
* @param materialize
* When true, the left sibling will be materialized if it exists
* but is not resident.
*
* @return The left sibling or null
if it does not exist -or-
* if it is not materialized and materialized == false
.
* If the sibling is returned, then is NOT guaranteed to be mutable
* and the caller MUST invoke copy-on-write before attempting to
* modify the returned sibling.
*/
protected AbstractNode getLeftSibling(final AbstractNode child,
final boolean materialize) {
final int i = getIndexOf(child);
if (i == 0) {
/*
* There is no left sibling for this child that is a child of the
* same parent.
*/
return null;
} else {
final int index = i - 1;
AbstractNode sibling = childRefs[index] == null ? null
: childRefs[index].get();
if (sibling == null) {
if (materialize) {
sibling = getChild(index);
}
} else {
btree.touch(sibling);
}
return sibling;
}
}
/**
* Return the right sibling of the specified child of a common parent. This
* method is invoked on the parent, passing in one child and returning its
* right sibling under that common parent (if any). This is used by
* implementations of {@link AbstractNode#join()} to explore their right
* sibling.
*
* @param child
* The child (must be dirty).
* @param materialize
* When true, the left sibling will be materialized if it exists
* but is not resident.
*
* @return The left sibling or null
if it does not exist -or-
* if it is not materialized and materialized == false
.
* If the sibling is returned, then is NOT guaranteed to be mutable
* and the caller MUST invoke copy-on-write before attempting to
* modify the returned sibling.
*/
protected AbstractNode getRightSibling(final AbstractNode child,
final boolean materialize) {
final int i = getIndexOf(child);
if (i == getKeyCount()) {
/*
* There is no right sibling for this child that is a child of the
* same parent.
*/
return null;
} else {
final int index = i + 1;
AbstractNode sibling = childRefs[index] == null ? null
: childRefs[index].get();
if (sibling == null) {
if (materialize) {
sibling = getChild(index);
}
} else {
btree.touch(sibling);
}
return sibling;
}
}
/**
* Return the index of the child among the direct children of this node.
*
* @param child
* The child.
*
* @return The index in {@link #childRefs} where that child is found. This
* may also be used as an index into {@link #childAddr} and
* {@link #childEntryCounts}.
*
* @exception IllegalArgumentException
* iff child is not a child of this node.
*/
protected int getIndexOf(final AbstractNode child) {
assert child != null;
assert child.parent.get() == this;
/*
* Scan for location in weak references.
*
* Note: during reads, this method is used for range counts. During
* writes it is used to update the entry counts on which the range
* counts are based.
*
* @todo Can this be made more efficient by considering the last key on
* the child and searching the parent for the index that must correspond
* to that child? Note that when merging two children the keys in the
* parent might not be coherent depending on exactly when this method is
* called - for things to be coherent you would have to discover the
* index of the children before modifying their keys.
*
* @todo for writes, 85% of the use of this method is
* updateEntryCount(). Since that method is only called on update, we
* would do well to buffer hard references during descent and test the
* buffer in this method before performing a full search. Since
* concurrent writers are not allowed, we only need a single buffer
* whose height is the height of the tree. This should prove especially
* beneficial for larger branching factors. For smaller branching
* factors the cost might be so small as to be ignorable.
*
* @see Leaf#merge(Leaf sibling,boolean isRightSibling)
*/
final int nkeys = getKeyCount();
for (int i = 0; i <= nkeys; i++) {
if (childRefs[i] != null && childRefs[i].get() == child) {
return i;
}
}
throw new IllegalArgumentException("Not our child : child=" + child);
}
/**
* Invoked when a non-root node or leaf has no more keys to detach the child
* from its parent. If the node becomes deficient, then the node is joined
* with one of its immediate siblings. If the node is the root of the tree,
* then the root of the tree is also updated. The child is deleted as a
* post-condition.
*
* @param child
* The child (does NOT need to be mutable).
*
* @todo I am a bit suspicious of this method. it appears to be removing the
* key and child at the same index rather than the key at index-1 and
* the child at index. This interacts with how the separator key gets
* updated (or appears to get updated) when a child is removed. That
* logic occurs in {@link Leaf#merge(AbstractNode, boolean)} and in
* {@link Node#merge(AbstractNode, boolean)}. It may be that I can
* simplify things a bit further by making this adjustment here and in
* those merge() methods.
*
* FIXME This should clear the min/max for the child in this node's
* data record. This is invoked by merge() on a leaf, then recursively
* if we need to join nodes. The caller should have already updated
* the min/max for the leaf's own data record and on this node's data
* record for that leaf. This method only needs to clear the min/max
* entry associated with the child that is being removed.
*/
protected void removeChild(final AbstractNode child) {
assert child != null;
assert !child.deleted;
assert child.parent.get() == this;
assert dirty;
assert !deleted;
assert !isPersistent();
assert !isReadOnly();
// cast to mutable implementation class.
final BTree btree = (BTree) this.btree;
if (btree.debug)
assertInvariants();
if (DEBUG) {
log.debug("this=" + this + ", child=" + child);
/*
* Note: dumping [this] or the [child] will throw false exceptions
* at this point - they are in an intermediate state.
*/
// if(DEBUG) {
// System.err.println("this"); dump(Level.DEBUG,System.err);
// System.err.println("child"); child.dump(Level.DEBUG,System.err);
// }
}
final int i = getIndexOf(child);
/*
* Note: these comments may be dated and need review.
*
* Copy over the hole created when the child is removed from the node.
*
* Given: v-- remove @ index = 0 index: 0 1 2 root keys : [ 21 24 0 ]
* index leaf1 keys : [ 1 2 7 - ] 0 <--remove @ index = 0 leaf2 keys : [
* 21 22 23 - ] 1 leaf4 keys : [ 24 31 - - ] 2
*
* This can also be represented as
*
* ( leaf1, 21, leaf2, 24, leaf4 )
*
* and we remove the sequence ( leaf1, 21 ) leaving a well-formed node.
*
* Remove(leaf1): index := 0 nkeys = 2 nchildren := nkeys(2) + 1 = 3
* lenChildCopy := #children(3) - index(0) - 1 = 2 lenKeyCopy :=
* lengthChildCopy - 1 = 1 copyChildren from index+1(1) to index(0)
* lengthChildCopy(2) copyKeys from index+1(1) to index(0)
* lengthKeyCopy(1) erase keys[ nkeys - 1 = 1 ] erase children[ nkeys =
* 2 ]
*
* post-condition: index: 0 1 2 root keys : [ 24 0 0 ] index leaf2 keys
* : [ 21 22 23 - ] 0 leaf4 keys : [ 24 31 - - ] 1
*/
{
/*
* Copy down to cover up the hole.
*/
final int index = i;
final int nkeys = getKeyCount();
// #of children to copy (equivalent to nchildren - index - 1)
final int lengthChildCopy = nkeys - index;
// #of keys to copy.
final int lengthKeyCopy = lengthChildCopy - 1;
// Tunnel through to the mutable keys object.
final MutableKeyBuffer keys = (MutableKeyBuffer) this.getKeys();
final MutableNodeData data = (MutableNodeData) this.data;
// check for persistent storage to be recycled for the removed child
if (data.childAddr[index] != 0) {
btree.recycle(data.childAddr[index]);
}
if (lengthKeyCopy > 0) {
System.arraycopy(keys.keys, index + 1, keys.keys, index,
lengthKeyCopy);
}
if (lengthChildCopy > 0) {
System.arraycopy(childRefs, index + 1, childRefs, index,
lengthChildCopy);
System.arraycopy(data.childAddr, index + 1, data.childAddr,
index, lengthChildCopy);
System.arraycopy(data.childEntryCounts, index + 1,
data.childEntryCounts, index, lengthChildCopy);
}
/*
* Erase the data that were exposed by this operation. Note that
* there is one fewer keys than children so ....
*/
if (nkeys > 0) {
// erase the last key position.
keys.keys[nkeys - 1] = null;
}
// erase the last child position.
childRefs[nkeys] = null;
data.childAddr[nkeys] = NULL;
data.childEntryCounts[nkeys] = 0;
// Clear the parent on the old child.
child.parent = null;
// one less the key in this node.
/* nkeys--; */keys.nkeys--;
if (child.isLeaf()) {
btree.nleaves--;
} else {
btree.nnodes--;
}
// Deallocate the child.
child.delete();
}
if (btree.root == this) {
/*
* The root node is allowed to become deficient, but once we are
* reduced to having no more keys in the root node it is replaced by
* the last remaining child.
*/
if (getKeyCount() == 0 && !isLeaf()) {
final AbstractNode> lastChild = getChild(0);
if (btree.debug)
lastChild.assertInvariants();
if (DEBUG) {
log.debug("replacing root: root=" + btree.root + ", node="
+ this + ", lastChild=" + lastChild);
// System.err.println("root");
// btree.root.dump(Level.DEBUG,System.err);
// System.err.println("this");
// this.dump(Level.DEBUG,System.err);
// System.err.println("lastChild");
// lastChild.dump(Level.DEBUG,System.err);
}
final boolean wasDirty = btree.root.dirty;
assert lastChild != null;
// replace the root node with a root leaf.
btree.root = lastChild;
if (!wasDirty) {
btree.fireDirtyEvent();
}
// clear the parent reference since this is now the root.
lastChild.parent = null;
// one less level in the btree.
btree.height--;
// deallocate this node.
this.delete();
// one less node in the tree.
btree.nnodes--;
if (BTree.INFO) {
BTree.log.info("reduced tree height: height="
+ btree.height + ", newRoot=" + btree.root);
}
btree.getBtreeCounters().rootsJoined++;
}
} else {
/*
* If a non-root node becomes deficient then it is joined with a
* direct sibling. If this forces a merge with a sibling, then the
* merged sibling will be removed from the parent which may force
* the parent to become deficient in turn, and thereby trigger a
* join() of the parent.
*/
if (data.getKeyCount() < minKeys()) {
join();
}
}
}
// /**
// * This is invoked by {@link #removeChild(AbstractNode)} when the node is
// * reduced to a single child in order to replace the reference to the node
// * on its parent with the reference to the node's sole remaining child.
// *
// * @param oldChild
// * The node.
// * @param newChild
// * The node's sole remaining child. This MAY be persistent since
// * this operation does NOT change the persistent state of the
// * newChild but only updates its transient state (e.g., its
// * parent reference).
// */
// protected void replaceChild(AbstractNode oldChild,AbstractNode newChild)
// {
//
// assert oldChild != null;
// assert !oldChild.isDeleted();
// assert !oldChild.isPersistent();
// assert oldChild.parent.get() == this;
// assert oldChild.nkeys == 0;
// assertInvariants();
// oldChild.assertInvariants();
// newChild.assertInvariants();
//
// assert newChild != null;
// assert !newChild.isDeleted();
// // assert !newChild.isPersistent(); // MAY be persistent - does not
// matter.
// assert newChild.parent.get() == oldChild;
//
// assert oldChild != newChild;
//
// assert !isDeleted();
// assert !isPersistent();
//
// int i = getIndexOf( oldChild );
//
// // dirtyChildren.remove(oldChild);
// //
// // if (newChild.isDirty()) {
// //
// // dirtyChildren.add(newChild);
// //
// // }
//
// // set the persistent key for the new child.
// childAddr[i] = (newChild.isPersistent() ? newChild.getIdentity() : NULL);
//
// // set the reference to the new child.
// childRefs[i] = new WeakReference(newChild);
//
// // Reuse the weak reference from the oldChild.
// newChild.parent = oldChild.parent;
//
// }
/**
* Return the child node or leaf at the specified index in this node. If the
* node is not in memory then it is read from the store.
*
* Note: This implementation DOES NOT cause concurrent threads to block
* unless they are performing IO for the same child. A {@link Memoizer}
* pattern is used to assign each concurrent thread a {@link FutureTask} on
* which it waits for the result. Once the result is available, there is a
* small synchronized
block during which the concurrent
* requests for a child will content to update the appropriate element in
* {@link #childRefs}.
*
* I believe the contention to update {@link #childRefs} is unavoidable. If
* this object was made into an {@link AtomicReferenceArray} then we would
* have difficulty when inserting and removing tuples since the backing
* array is not visible. An array of {@link AtomicReference} objects would
* not help since it would not ensure "publication" when the element was
* changed from a null
to an {@link AtomicReference}, only when
* {@link AtomicReference#compareAndSet(Object, Object)} was used. Thus it
* could only help if we pre-populated the array with
* {@link AtomicReference} objects, which seems wasteful.
*
* As always, the mutable B+Tree is single threaded so there are not added
* synchronization costs. Concurrent readers can only arise for read-only
* {@link BTree}s and for {@link IndexSegment}s.
*
* @param index
* The index of the child to be read from the store (in
* [0:nkeys]).
*
* @return The child node or leaf and never null.
*
* @throws IndexOutOfBoundsException
* if index is negative.
* @throws IndexOutOfBoundsException
* if index is GT the #of keys in the node (there is one more
* child than keys in a node).
*/
final public AbstractNode getChild(final int index) {
// See BLZG-1657 (Add BTreeCounters for cache hit and cache miss)
btree.getBtreeCounters().cacheTests.increment();
/*
* I've take out this test since it turns out to be relatively
* expensive!?! The interrupt status of the thread is now checked
* exclusively when reading on the store.
*/
// if(Thread.interrupted()) {
// /*
// * This method is called relatively often - it is used each time we
// * descend the tree. We check whether or not the thread has been
// * interrupted so that we can abort running tasks quickly.
// */
// throw new RuntimeException(new InterruptedException());
// }
if (index < 0 || index > data.getKeyCount()) {
throw new IndexOutOfBoundsException("index=" + index + ", nkeys="
+ data.getKeyCount());
}
// if (!btree.isReadOnly()) {
if (btree.memo == null) {
/*
* Optimization for the mutable B+Tree.
*
* Note: This optimization depends on the assumption that concurrent
* operations are never submitted to the mutable B+Tree. In fact,
* the UnisolatedReadWriteIndex *DOES* allow concurrent readers (it
* uses a ReentrantReadWriteLock). Therefore this code path is now
* expressed conditionally on whether or not the Memoizer object is
* initialized by AbstractBTree.
*
* Note: Since the caller is single-threaded for the mutable B+Tree
* we do not need to use the Memoizer, which just delegates to
* _getChild(index). This saves us some object creation and overhead
* for this case.
*/
// See BLZG-1657 (Add BTreeCounters for cache hit and cache miss).
btree.getBtreeCounters().cacheMisses.increment();
return _getChild(index, null/* req */);
}
/*
* If we can resolve a hard reference to the child then we do not need
* to look any further.
*/
// synchronized (childRefs)
{
/*
* Note: we need to synchronize on here to ensure visibility for
* childRefs[index] (in case it was updated in another thread). This
* is true even for the mutable B+Tree since the caller could use
* different threads for different operations. However, this
* synchronization will never be contended for the mutable B+Tree.
*/
final Reference> childRef = childRefs[index];
final AbstractNode child = childRef == null ? null : childRef.get();
if (child != null) {
// Already materialized.
return child;
}
}
/*
* Otherwise we need to go through the Memoizer pattern to achieve
* non-blocking access. It will wind up delegating to _getChild(int),
* which is immediately below. However, it will ensure that one and only
* one thread executes _getChild(int) for a given parent and child
* index. That thread will update childRefs[index]. Any concurrent
* requests for the same child will wait for the FutureTask inside of
* the Memoizer and then return the new value of childRefs[index].
*/
/*
* See BLZG-1657 (Add BTreeCounters for cache hit and cache miss)
*
* Note: This is done in the caller rather than _getChild() since the
* latter may be called from the memoizer, in which case only one thread
* will actually invoke _getChild() while the others will just obtain
* the child through the memoized Future.
*/
btree.getBtreeCounters().cacheMisses.increment();
return btree.loadChild(this, index);
}
/**
* Method conditionally reads the child at the specified index from the
* backing store and sets its reference on the appropriate element of
* {@link #childRefs}. This method assumes that external mechanisms
* guarantee that no other thread is requesting the same child via this
* method at the same time. For the mutable B+Tree, that guarantee is
* trivially given by its single-threaded constraint. For the read-only
* B+Tree, {@link AbstractBTree#loadChild(Node, int)} provides this
* guarantee using a {@link Memoizer} pattern. This method explicitly
* handshakes with the {@link ChildMemoizer} to clear the {@link FutureTask}
* from the memoizer's internal cache as soon as the reference to the child
* has been set on the appropriate element of {@link #childRefs}.
*
* @param index
* The index of the child.
* @param req
* The key we need to remove the request from the
* {@link ChildMemoizer} cache (and null
if this
* method is not invoked by the memoizer pattern).
*
* @return The child and never null
.
*/
AbstractNode _getChild(final int index, final LoadChildRequest req) {
/*
* Make sure that the child is not reachable. It could have been
* concurrently set even if the caller had tested this and we do not
* want to read through to the backing store unless we need to.
*
* Note: synchronizing on childRefs[] should not be necessary. For a
* read-only B+Tree, the synchronization is provided by the Memoizer
* pattern. For a mutable B+Tree, the synchronization is provided by the
* single-threaded contract for mutation and by the requirement to use a
* construct, such as a Queue or the UnisolatedReadWriteIndex, which
* imposes a memory barrier when passing a B+Tree instance between
* threads.
*
* See http://www.cs.umd.edu/~pugh/java/memoryModel/archive/1096.html
*/
AbstractNode child;
synchronized (childRefs) {
/*
* Note: we need to synchronize on here to ensure visibility for
* childRefs[index] (in case it was updated in another thread).
*/
final Reference> childRef = childRefs[index];
child = childRef == null ? null : childRef.get();
if (child != null) {
// Already materialized.
return child;
}
}
/*
* The child needs to be read from the backing store.
*/
// See BLZG-1657 (Add BTreeCounters for cache hit and cache miss)
btree.getBtreeCounters().cacheMisses.increment();
final long addr = data.getChildAddr(index);
if (addr == IRawStore.NULL) {
// dump(Level.DEBUG, System.err);
/*
* Note: It appears that this can be triggered by a full disk, but I
* am not quite certain how a full disk leads to this condition.
* Presumably the full disk would cause a write of the child to
* fail. In turn, that should cause the thread writing on the B+Tree
* to fail. If group commit is being used, the B+Tree should then be
* discarded and reloaded from its last commit point.
*/
throw new AssertionError(
"Child does not have persistent identity: this=" + this
+ ", index=" + index);
}
/*
* Read the child from the backing store (potentially reads through to
* the disk).
*
* Note: This is guaranteed to not do duplicate reads. There are two
* cases. (A) The mutable B+Tree. Since the mutable B+Tree is single
* threaded, this case is trivial. (B) The read-only B+Tree. Here our
* guarantee is that the caller is in ft.run() inside of the Memoizer,
* and that ensures that only one thread is executing for a given
* LoadChildRequest object (the input to the Computable). Note that
* LoadChildRequest MUST meet the criteria for a hash map for this
* guarantee to obtain.
*/
child = btree.readNodeOrLeaf(addr);
/*
* Update of the childRefs[index] element.
*
* Note: This code block is synchronized in order to facilitate the safe
* publication of the change in childRefs[index] to other threads.
*/
synchronized (childRefs) {
/*
* Since the childRefs[index] element has not been updated we do so
* now while we are synchronized.
*
* Note: This paranoia test could be tripped if the caller allowed
* concurrent requests to enter this method for the same child. In
* that case childRefs[index] could have an uncleared reference to
* the child. This would indicate a breakdown in the guarantee we
* require of the caller.
*/
assert childRefs[index] == null || childRefs[index].get() == null : "Child is already set: this="
+ this + ", index=" + index;
// patch parent reference since loaded from store.
child.parent = this.self;
// patch the child reference.
childRefs[index] = child.self;
}
/*
* Clear the future task from the memoizer cache.
*
* Note: This is necessary in order to prevent the cache from retaining
* a hard reference to each child materialized for the B+Tree.
*
* Note: This does not depend on any additional synchronization. The
* Memoizer pattern guarantees that only one thread actually call
* ft.run() and hence runs this code.
*/
if (req != null) {
btree.memo.removeFromCache(req);
}
return child;
}
// /**
// * Static helper method allocates the per-child lock objects.
// *
// * Note that the mutable {@link BTree} imposes a single-threaded
// constraint
// * on its API so we do not need to do any locking for that case and this
// * method will therefore return null
if the owning B+Tree is
// * mutable.
// *
// * @param btree
// * The owning B+Tree.
// *
// * @param nkeys
// * The #of keys, which is used to dimension the array.
// *
// * @return The array of lock objects -or- null
if the btree
// is
// * mutable.
// *
// * @see #childLocks
// *
// * @todo Per-child locks will only be useful on nodes with a relatively
// high
// * probability of concurrent access. Therefore they should be
// * conditionally enabled only to a depth of 0 (for the root's direct
// * children) or 1 (for the children of the root's direct children).
// * There is just not going to be any utility to this beyond that
// * point, especially not on an {@link IndexSegment} with a relatively
// * high branching factor. We could directly compute the probability of
// * access to any given child based on the branching factor and the
// * depth of the node in the B+Tree and the assumption of a uniform
// * distribution of reads by concurrent threads [in fact, in many
// * benchmark situations we are more likely to content for the same
// * child unless the queries are parameterized].
// */
// static private final Object[] newChildLocks(final AbstractBTree btree,
// final int nkeys) {
//
// /*
// * Note: Uncommenting this has the effect of disabling per-child
// * locking.
// */
// // if(true) return null;
//
// if (!btree.isReadOnly() || !btree.getIndexMetadata().getChildLocks()) {
//
// /*
// * Either The mutable B+Tree has a single threaded constraint so we
// * do not need to do any locking for that case and therefore we do
// * not allocate the per-child locks here -or- child locks were
// * disabled as a configuration option.
// */
//
// return null;
//
// }
//
// /*
// * Note: The array is dimensioned to [branchingFactor] and not
// * [branchingFactor+1]. The "overflow" slot of the various arrays are
// * only used during node overflow/underflow operations. Those operations
// * do not occur for a read-only B+Tree.
// */
// // final int n = btree.branchingFactor + 1;
//
// // Note: We only need locks for the child entries that exist!
// final int n = nkeys + 1;
//
// final Object[] a = new Object[n];
//
// for (int i = 0; i < n; i++) {
//
// a[i] = new Object();
//
// }
//
// return a;
//
// }
/**
* Return the right-most child of this node.
*
* @param nodesOnly
* when true
the search will halt at the right-most
* non-leaf. Otherwise it will return the right-most leaf.
*
* @return The right-most child of this node.
*/
protected AbstractNode getRightMostChild(final boolean nodesOnly) {
final AbstractNode> child = getChild(getKeyCount());
assert child != null;
if (child.isLeaf()) {
if (nodesOnly) {
return this;
} else {
return child;
}
}
return ((Node) child).getRightMostChild(nodesOnly);
}
/**
* Iterator visits children, recursively expanding each child with a
* post-order traversal of its children and finally visits this node itself.
*/
@Override
@SuppressWarnings("unchecked")
public Iterator postOrderNodeIterator(
final boolean dirtyNodesOnly, final boolean nodesOnly) {
if (dirtyNodesOnly && !dirty) {
return EmptyIterator.DEFAULT;
}
/*
* Iterator append this node to the iterator in the post-order position.
*/
return new Striterator(postOrderIterator1(dirtyNodesOnly,nodesOnly))
.append(new SingleValueIterator(this));
}
/**
* Iterator visits children in the specified half-open key range,
* recursively expanding each child with a post-order traversal of its
* children and finally visits this node itself.
*/
@Override
@SuppressWarnings("unchecked")
public Iterator postOrderIterator(final byte[] fromKey,
final byte[] toKey) {
/*
* Iterator append this node to the iterator in the post-order position.
*/
return new Striterator(postOrderIterator2(fromKey, toKey))
.append(new SingleValueIterator(this));
}
/**
* Visits the children (recursively) using post-order traversal, but does
* NOT visit this node.
*/
@SuppressWarnings("unchecked")
private Iterator postOrderIterator1(
final boolean dirtyNodesOnly,final boolean nodesOnly) {
/*
* Iterator visits the direct children, expanding them in turn with a
* recursive application of the post-order iterator.
*
* When dirtyNodesOnly is true we use a child iterator that makes a best
* effort to only visit dirty nodes. Especially, the iterator MUST NOT
* force children to be loaded from disk if the are not resident since
* dirty nodes are always resident.
*
* The iterator must touch the node in order to guarantee that a node
* will still be dirty by the time that the caller visits it. This
* places the node onto the hard reference queue and increments its
* reference counter. Evictions do NOT cause IO when the reference is
* non-zero, so the node will not be made persistent as a result of
* other node touches. However, the node can still be made persistent if
* the caller explicitly writes the node onto the store.
*/
// BTree.log.debug("node: " + this);
return new Striterator(childIterator(dirtyNodesOnly))
.addFilter(new Expander() {
private static final long serialVersionUID = 1L;
/*
* Expand each child in turn.
*/
protected Iterator expand(Object childObj) {
/*
* A child of this node.
*/
final AbstractNode child = (AbstractNode) childObj;
if (dirtyNodesOnly && !child.dirty) {
return EmptyIterator.DEFAULT;
}
if (child instanceof Node) {
/*
* The child is a Node (has children).
*/
// BTree.log.debug("child is node: " + child);
// visit the children (recursive post-order
// traversal).
final Striterator itr = new Striterator(
((Node) child).postOrderIterator1(
dirtyNodesOnly, nodesOnly));
// append this node in post-order position.
itr.append(new SingleValueIterator(child));
return itr;
} else {
/*
* The child is a leaf.
*/
// BTree.log.debug("child is leaf: " + child);
// Visit the leaf itself.
if (nodesOnly)
return EmptyIterator.DEFAULT;
return new SingleValueIterator(child);
}
}
});
}
/**
* Visits the children (recursively) using post-order traversal, but does
* NOT visit this node.
*/
@SuppressWarnings("unchecked")
private Iterator postOrderIterator2(final byte[] fromKey,
final byte[] toKey) {
/*
* Iterator visits the direct children, expanding them in turn with a
* recursive application of the post-order iterator.
*
* When dirtyNodesOnly is true we use a child iterator that makes a best
* effort to only visit dirty nodes. Especially, the iterator MUST NOT
* force children to be loaded from disk if the are not resident since
* dirty nodes are always resident.
*
* The iterator must touch the node in order to guarentee that a node
* will still be dirty by the time that the caller visits it. This
* places the node onto the hard reference queue and increments its
* reference counter. Evictions do NOT cause IO when the reference is
* non-zero, so the node will not be made persistent as a result of
* other node touches. However, the node can still be made persistent if
* the caller explicitly writes the node onto the store.
*/
// BTree.log.debug("node: " + this);
return new Striterator(childIterator(fromKey, toKey))
.addFilter(new Expander() {
private static final long serialVersionUID = 1L;
/*
* Expand each child in turn.
*/
protected Iterator expand(final Object childObj) {
/*
* A child of this node.
*/
final AbstractNode child = (AbstractNode) childObj;
if (child instanceof Node) {
/*
* The child is a Node (has children).
*
* Visit the children (recursive post-order
* traversal).
*/
// BTree.log.debug("child is node: " + child);
final Striterator itr = new Striterator(
((Node) child).postOrderIterator2(fromKey,
toKey));
// append this node in post-order position.
itr.append(new SingleValueIterator(child));
/*
* Note: getReadExecutor() is not defined for IJournal. If
* we want to support the read executor pre-fetch pattern
* then the code needs to be updated to use IJournal and
* IJournal needs to expose getReadExecutor.
*/
if ((btree.store instanceof Journal)
&& (((Journal) btree.store)
.getReadExecutor() != null)) {
/*
* Prefetch any child leaves we need to visit
* and prefetch the right sibling of the node we
* are about to visit if the iterator will span
* that node as well.
*/
prefetchChildLeaves((Node) child, fromKey,
toKey);
}
return itr;
} else {
/*
* The child is a leaf.
*/
// BTree.log.debug("child is leaf: " + child);
// Visit the leaf itself.
return new SingleValueIterator(child);
}
}
});
}
/**
* When we visit a node whose children are leaves, schedule memoization of
* those leaves whose separator key in the node is LT the toKey
* (non-blocking). If the rightSibling of the node would be visited by the
* iterator, then prefetch of the rightSibling is also scheduled.
*
* @param node
* A node whose children are leaves.
* @param toKey
* The exclusive upper bound of some iterator.
*
* @todo All memoization should go through the same thread pool in order to
* bound the #of threads actually reading on the disk. Right now, the
* memoizer runs in the caller's thread. If we modified it to submit a
* task to the readService to handle the memoization then we would
* bound the #of threads involved.
*
* @todo PREFETCH : JSR 166 Fork/join would also be a good choice here.
*
* @todo PREFETCH : Prefetch will not break if the iterator is closed. This
* is not a bug per se, but it might be something we want to support
* in the future.
*
* @todo PREFETCH : Prefetch should be cancelled if the btree is closed. We
* could do this with an Executor wrapping the {@link LatchedExecutor}
* that kept track of the work queue for that executor and allowed us
* to cancel just the tasks submitted against it. This would also give
* us a means to report on the prefetch work queue length.
*
* @todo PREFETCH : Only journal is supported right now.
*
* @todo PREFETCH : The {@link IRangeQuery#CURSOR} mode is not supported yet.
*/
protected void prefetchChildLeaves(final Node node, final byte[] fromKey,
final byte[] toKey) {
final int nkeys = node.getKeyCount();
// figure out the first index to visit.
final int fromIndex;
{
int index;
if (fromKey != null) {
index = node.getKeys().search(fromKey);
if (index < 0) {
index = -index - 1;
}
} else {
index = 0;
}
fromIndex = index;
}
// figure out the first index to NOT visit.
final int toIndex;
{
int index;
if (toKey != null) {
index = node.getKeys().search(toKey);
if (index < 0) {
index = -index - 1;
}
} else {
index = nkeys;
}
toIndex = index;
}
/*
* Submit taskS which will materialize the children in
* [fromIndex,toIndex).
*
* Note: We do not track the futures of these tasks. The tasks have a
* side effect on the parent/child weak references among the nodes in
* the B+Tree, on the backing hard reference ring buffer, and on the
* cache of materialized disk records. That side effect is all that we
* are seeking.
*
* Note: If the B+Tree is concurrently closed, then these tasks will
* error out. That is fine.
*/
final Executor s = ((Journal) btree.store).getReadExecutor();
for (int i = fromIndex; i < toIndex; i++) {
final int index = i;
s.execute(new Runnable() {
public void run() {
if (!node.btree.isOpen()) {
// No longer open.
return;
}
// Materialize the child.
node.getChild(index);
}
});
}
if (toIndex == nkeys - 1) {
/*
* Prefetch the right sibling.
*/
prefetchRightSibling(node, toKey);
}
}
/**
* If the caller's toKey is GT the separator keys for the children of
* this node then the iterator will need to visit the rightSibling of the
* node and this method will schedule the memoization of the node's
* rightSibling in order to reduce the IO latency when the iterator visit
* that rightSibling (non-blocking).
*
* @param node
* A node.
* @param toKey
* The exclusive upper bound of some iterator.
*/
protected void prefetchRightSibling(final Node node, final byte[] toKey) {
final int nkeys = node.getKeyCount();
final byte[] lastSeparatorKey = node.getKeys().get(nkeys - 1);
if (BytesUtil.compareBytes(toKey, lastSeparatorKey) <= 0) {
/*
* Since the toKey is LTE to the lastSeparatorKey on this node the
* last tuple to be visited by the iterator is spanned by this node
* and we will not visit the node's rightSibling.
*
* @todo This test could be optimized if IRaba exposed a method for
* unsigned byte[] comparisons against the coded representation of
* the keys.
*/
return;
}
// The parent of this node.
final Node p = node.parent.get();
// /*
// * Test to see if the rightSibling is already materialized.
// */
//
// Note: Don't bother testing as we will test in the task below anyway.
//
// Node rightSibling = (Node) p.getRightSibling(node,
// false/* materialize */);
//
// if (rightSibling != null) {
//
// /*
// * The rightSibling is already materialized and getRightSibling()
// * touches the rightSibling as a side-effect so it will be retained
// * longer. Return now as there is nothing to do.
// */
//
// return;
//
// }
/*
* Submit a task which will materialize that right sibling.
*
* Note: This task will only materialize a rightSibling of a common
* parent. If [node] is the last child of the parent [p] then you would
* need to ascend to the parent of [p] and then desend again, which is
* not the specified behavior for getRightSibling(). Since this is just
* an optimization for IO scheduling, I think that it is fine as it is.
*
* Note: We do not track the future of this task. The task will have a
* side effect on the parent/child weak references among the nodes in
* the B+Tree, on the backing hard reference ring buffer, and on the
* cache of materialized disk records. That side effect is all that we
* are seeking.
*
* Note: If the B+Tree is concurrently closed, then this task will error
* out. That is fine.
*/
final Executor s = ((Journal) btree.store).getReadExecutor();
s.execute(new Runnable() {
public void run() {
if (!p.btree.isOpen()) {
// No longer open.
return;
}
// Materialize the right sibling.
p.getRightSibling(node, true/* materialize */);
}
});
}
/**
* Iterator visits the direct child nodes in the external key ordering.
*
* @param dirtyNodesOnly
* When true, only the direct dirty child nodes will be visited.
*/
public Iterator childIterator(final boolean dirtyNodesOnly) {
if (dirtyNodesOnly) {
return new DirtyChildIterator(this);
} else {
return new ChildIterator(this);
}
}
/**
* Iterator visits the direct child nodes in the external key ordering.
*/
public Iterator childIterator(final byte[] fromKey,
final byte[] toKey) {
return new ChildIterator(this, fromKey, toKey);
}
@Override
public boolean dump(final Level level, final PrintStream out,
final int height, final boolean recursive) {
// True iff we will write out the node structure.
final boolean debug = level.toInt() <= Level.DEBUG.toInt();
// Set true iff an inconsistency is detected.
boolean ok = true;
final int branchingFactor = this.getBranchingFactor();
final int nkeys = getKeyCount();
final int minKeys = this.minKeys();
final int maxKeys = this.maxKeys();
if (parent != null && nkeys < minKeys) {
// min keys failure.
out.println(indent(height) + "ERROR: too few keys: m="
+ branchingFactor + ", minKeys=" + minKeys + ", nkeys="
+ nkeys + ", isLeaf=" + isLeaf());
ok = false;
}
if (nkeys > maxKeys) {
// max keys failure.
out.println(indent(height) + "ERROR: too many keys: m="
+ branchingFactor + ", maxKeys=" + maxKeys + ", nkeys="
+ nkeys + ", isLeaf=" + isLeaf());
ok = false;
}
{ // nentries
if (this == btree.root) {
if (getSpannedTupleCount() != btree.getEntryCount()) {
out.println(indent(height)
+ "ERROR: root node has nentries="
+ getSpannedTupleCount()
+ ", but btree has nentries="
+ btree.getEntryCount());
ok = false;
}
}
{
int nentries = 0;
for (int i = 0; i <= nkeys; i++) {
nentries += getChildEntryCount(i);
if (nentries <= 0) {
out.println(indent(height) + "ERROR: childEntryCount["
+ i + "] is non-positive");
ok = false;
}
}
if (nentries != getSpannedTupleCount()) {
out.println(indent(height) + "ERROR: nentries("
+ getSpannedTupleCount()
+ ") does not agree with sum of per-child counts("
+ nentries + ")");
ok = false;
}
}
}
if (this == btree.root) {
if (parent != null) {
out
.println(indent(height)
+ "ERROR: this is the root, but the parent is not null.");
ok = false;
}
} else {
/*
* Note: there is a difference between having a parent reference and
* having the parent be stronly reachable. However, we actually want
* to maintain both -- a parent MUST always be strongly reachable
* ... UNLESS you are doing a fast forward or reverse leaf scan
* since the node hierarchy is not being traversed in that case.
*
* @todo Should we keep leaves using a fast forward or reverse scan
* out of the hard reference cache since their parents are not
* strongly reachable?
*/
if (parent == null) {
out
.println(indent(height)
+ "ERROR: the parent reference MUST be defined for a non-root node.");
ok = false;
} else if (parent.get() == null) {
out.println(indent(height)
+ "ERROR: the parent is not strongly reachable.");
ok = false;
}
}
// verify keys are monotonically increasing.
try {
assertKeysMonotonic();
} catch (AssertionError ex) {
out.println(indent(height) + " ERROR: " + ex);
ok = false;
}
if (debug) {
out.println(indent(height) + toString());
// out.println(indent(height) + " parent="
// + (parent == null ? null : parent.get()));
// out.println(indent(height) + " dirty=" + isDirty() + ", nkeys="
// + nkeys + ", nchildren=" + (nkeys + 1) + ", minKeys="
// + minKeys + ", maxKeys=" + maxKeys + ", branchingFactor="
// + branchingFactor+", #entries="+getSpannedTupleCount());
// out.println(indent(height) + " keys=" + getKeys());
// // out.println(indent(height) + " childAddr="
// // + Arrays.toString(childAddr));
// out.print(indent(height) + " childAddr/Refs=[");
// for (int i = 0; i <= nkeys + 1; i++) {
// if (i > 0)
// out.print(", ");
// out.print(getChildAddr(i));
// out.print('(');
// if (childRefs[i] == null) {
// out.print("null");
// } else {
// // Non-recursive print of the reference.
// final AbstractNode> child = childRefs[i].get();
// out.print(child.getClass().getName() + "@"
// + Integer.toHexString(child.hashCode()));
// }
// out.print(')');
// }
// out.println("]");
// out.print(indent(height) + " childEntryCounts=[");
// for (int i = 0; i <= nkeys; i++) {
// if (i > 0)
// out.print(", ");
// out.print(getChildEntryCount(i));
// }
// out.println("]");
// // + Arrays.toString(childEntryCounts));
}
/*
* Look for inconsistencies for children. A dirty child MUST NOT have an
* entry in childAddr[] since it is not persistent and MUST show up in
* dirtyChildren. Likewise if a child is NOT dirty, then it MUST have an
* entry in childAddr and MUST NOT show up in dirtyChildren.
*
* This also verifies that all entries beyond nchildren (nkeys+1) are
* unused.
*/
for (int i = 0; i < branchingFactor + 1; i++) {
if (i > nkeys) {
/*
* Scanning past the last valid child index.
*/
if (!isReadOnly()
&& ((MutableNodeData) data).childAddr[i] != NULL) {
out.println(indent(height) + " ERROR childAddr[" + i
+ "] should be " + NULL + ", not "
+ ((MutableNodeData) data).childAddr[i]);
ok = false;
}
if (childRefs[i] != null) {
out.println(indent(height) + " ERROR childRefs[" + i
+ "] should be null, not " + childRefs[i]);
ok = false;
}
} else {
/*
* Scanning a valid child index.
*
* Note: This is not fetching the child if it is not in memory
* -- perhaps it should using its persistent id?
*/
final AbstractNode> child = (childRefs[i] == null ? null
: childRefs[i].get());
if (child != null) {
if (child.parent == null || child.parent.get() == null) {
/*
* the reference to the parent MUST exist since the we
* are the parent and therefore the parent is strongly
* reachable.
*/
out.println(indent(height) + " ERROR child[" + i
+ "] does not have parent reference.");
ok = false;
}
if (child.parent.get() != this) {
out.println(indent(height) + " ERROR child[" + i
+ "] has wrong parent.");
ok = false;
// // some extra stuff used to track down a bug.
if (!ok) {
if (level == Level.DEBUG) {
// dump the child also and exit.
System.err.println("child");
child.dump(Level.DEBUG, System.err);
throw new AssertionError();
} else {
// recursive call to get debug level dump.
System.err.println("this");
this.dump(Level.DEBUG, System.err);
}
}
}
final long childSpannedEntryCount = (child.isLeaf() ? ((Leaf) child)
.getKeyCount()
: ((Node) child).getSpannedTupleCount());
if (getChildEntryCount(i) != childSpannedEntryCount) {
out.println(indent(height) + " ERROR child[" + i
+ "] spans " + childSpannedEntryCount
+ " entries, but childEntryCount[" + i + "]="
+ getChildEntryCount(i));
ok = false;
}
if (child.isDirty()) {
/*
* Dirty child. The parent of a dirty child MUST also be
* dirty.
*/
if (!isDirty()) {
out.println(indent(height) + " ERROR child[" + i
+ "] is dirty, but its parent is clean");
ok = false;
}
if (childRefs[i] == null) {
out.println(indent(height) + " ERROR childRefs["
+ i + "] is null, but the child is dirty");
ok = false;
}
if (getChildAddr(i) != NULL) {
out.println(indent(height) + " ERROR childAddr["
+ i + "]=" + getChildAddr(i)
+ ", but MUST be " + NULL
+ " since the child is dirty");
ok = false;
}
// if (!dirtyChildren.contains(child)) {
// out
// .println(indent(height + 1)
// + " ERROR child at index="
// + i
// + " is dirty, but not on the dirty list: child="
// + child);
// ok = false;
// }
} else {
/*
* Clean child (ie, persistent). The parent of a clean
* child may be either clear or dirty.
*/
if (getChildAddr(i) == NULL) {
out.println(indent(height) + " ERROR childKey["
+ i + "] is " + NULL
+ ", but child is not dirty");
ok = false;
}
// if (dirtyChildren.contains(child)) {
// out
// .println(indent(height)
// + " ERROR child at index="
// + i
// + " is not dirty, but is on the dirty list: child="
// + child);
// ok = false;
// }
}
}
}
}
if (!ok && !debug) {
// @todo show the node structure with the errors since we would not
// have seen it otherwise.
}
if (recursive) {
/*
* Dump children using pre-order traversal.
*/
final Set> dirty = new HashSet>();
for (int i = 0; i <= /* nkeys */branchingFactor; i++) {
if (childRefs[i] == null && !isReadOnly()
&& ((MutableNodeData) data).childAddr[i] == 0) {
if (i <= nkeys) {
/*
* This let's us dump a tree with some kinds of
* structural problems (missing child reference or key).
*/
out.println(indent(height + 1)
+ "ERROR can not find child at index=" + i
+ ", skipping this index.");
ok = false;
} else {
/*
* We expect null child entries beyond nkeys+1.
*/
}
continue;
}
/*
* Note: this works around the assert test for the index in
* getChild(index) but is not able/willing to follow a childKey
* to a child that is not memory resident.
*/
// AbstractNode child = getChild(i);
final AbstractNode> child = childRefs[i] == null ? null
: childRefs[i].get();
if (child != null) {
if (child.parent == null) {
out
.println(indent(height + 1)
+ "ERROR child does not have parent reference at index="
+ i);
ok = false;
}
if (child.parent.get() != this) {
out
.println(indent(height + 1)
+ "ERROR child has incorrect parent reference at index="
+ i);
ok = false;
}
// if (child.isDirty() && !dirtyChildren.contains(child)) {
//
// out
// .println(indent(height + 1)
// + "ERROR dirty child not in node's dirty list at index="
// + i);
//
// ok = false;
//
// }
//
// if (!child.isDirty() && dirtyChildren.contains(child)) {
//
// out
// .println(indent(height + 1)
// +
// "ERROR clear child found in node's dirty list at index="
// + i);
//
// ok = false;
//
// }
if (child.isDirty()) {
dirty.add(child);
}
if (i == 0) {
if (nkeys == 0) {
/*
* Note: a node with zero keys is valid. It MUST
* have a single child. Such nodes arise when
* splitting a node in a btree of order m := 3 when
* the splitIndex is computed as m/2-1 = 0. This is
* perfectly normal.
*/
} else {
/*
* Note: All keys on the first child MUST be LT the
* first key on this node.
*/
final byte[] k0 = getKeys().get(0);
final byte[] ck0 = child.getKeys().get(0);
if (BytesUtil.compareBytes(ck0, k0) >= 0) {
// if( child.compare(0,keys,0) >= 0 ) {
out
.println(indent(height + 1)
+ "ERROR first key on first child must be LT "
+ keyAsString(k0)
+ ", but found "
+ keyAsString(ck0));
ok = false;
}
if (child.getKeyCount() >= 1) {
final byte[] ckn = child.getKeys().get(
child.getKeyCount() - 1);
if (BytesUtil.compareBytes(ckn, k0) >= 0) {
// if (child.compare(child.nkeys-1, keys, 0)
// >= 0) {
out
.println(indent(height + 1)
+ "ERROR last key on first child must be LT "
+ keyAsString(k0)
+ ", but found "
+ keyAsString(ckn));
ok = false;
}
}
}
} else if (i < nkeys) {
// Note: The delete rule does not preserve this
// characteristic since we do not
// update the separatorKey for a leaf when removing its
// left most key.
//
// if (child.isLeaf() && keys[i - 1] != child.keys[0]) {
//
// /*
// * While each key in a node always is the first key of
// * some leaf, we are only testing the direct children
// * here. Therefore if the children are not leaves then
// * we can not cross check their first key with the
// keys
// * on this node.
// */
// out.println(indent(height + 1)
// + "ERROR first key on child leaf must be "
// + keys[i - 1] + ", not " + child.keys[0]
// + " at index=" + i);
//
// ok = false;
//
// }
} else {
/*
* While there is a child for the last index of a node,
* there is no key for that index.
*/
}
if (!child.dump(level, out, height + 1, true)) {
ok = false;
}
}
}
// if (dirty.size() != dirtyChildren.size()) {
//
// out.println(indent(height + 1) + "ERROR found " + dirty.size()
// + " dirty children, but " + dirtyChildren.size()
// + " in node's dirty list");
//
// ok = false;
//
// }
}
return ok;
}
/**
* Human readable representation of the {@link Node}.
*/
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
// sb.append(getClass().getName());
sb.append(super.toString());
sb.append("{ isDirty=" + isDirty());
sb.append(", isDeleted=" + isDeleted());
sb.append(", addr=" + identity);
final Node p = (parent == null ? null : parent.get());
sb.append(", parent=" + (p == null ? "N/A" : p.toShortString()));
if (data == null) {
// No data record? (Generally, this means it was stolen by copy on
// write).
sb.append(", data=NA}");
return sb.toString();
}
sb.append(", nkeys=" + getKeyCount());
sb.append(", minKeys=" + minKeys());
sb.append(", maxKeys=" + maxKeys());
DefaultNodeCoder.toString(this, sb);
// indicate if each child is loaded or unloaded.
{
final int nchildren = getChildCount();
sb.append(", children=[");
for (int i = 0; i < nchildren; i++) {
if (i > 0)
sb.append(", ");
final AbstractNode> child = childRefs[i] == null ? null
: childRefs[i].get();
sb.append(child == null ? "U" : "L");
}
sb.append("]");
}
sb.append("}");
return sb.toString();
}
}