org.apache.jena.tdb.index.bplustree.BPTreeNode Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jena-tdb Show documentation
Show all versions of jena-tdb Show documentation
TDB is a storage subsystem for Jena and ARQ, it is a native triple store providing persistent storage of triples/quads.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.tdb.index.bplustree;
import static java.lang.String.format ;
import static org.apache.jena.atlas.lib.Alg.decodeIndex ;
import static org.apache.jena.tdb.base.record.Record.keyGT ;
import static org.apache.jena.tdb.base.record.Record.keyLT ;
import static org.apache.jena.tdb.base.record.Record.keyNE ;
import static org.apache.jena.tdb.index.bplustree.BPlusTreeParams.CheckingNode ;
import static org.apache.jena.tdb.index.bplustree.BPlusTreeParams.CheckingTree ;
import static org.apache.jena.tdb.index.bplustree.BPlusTreeParams.DumpTree ;
import org.apache.jena.atlas.io.IndentedLineBuffer ;
import org.apache.jena.atlas.io.IndentedWriter ;
import org.apache.jena.tdb.base.block.Block ;
import org.apache.jena.tdb.base.buffer.PtrBuffer ;
import org.apache.jena.tdb.base.buffer.RecordBuffer ;
import org.apache.jena.tdb.base.record.Record ;
import org.apache.jena.tdb.sys.SystemTDB ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
public final class BPTreeNode extends BPTreePage
{
private static final short READ = 1 ;
private static final short WRITE = 2 ;
// Only "public" for external very low level tools in development to access this class.
// Assume package access.
private static Logger log = LoggerFactory.getLogger(BPTreeNode.class) ;
private Block block ;
private int id ;
private short blockState = READ ;
int parent ;
int count ; // Number of records. Number of pointers is +1
// "Leaf" of the BPTree is the lowest level of ptr/key splits, not the data blocks.
// We need to know this to know which block manager the block pointers refer to.
boolean isLeaf ;
private RecordBuffer records ;
void setRecordBuffer(RecordBuffer r) { records = r ; }
PtrBuffer ptrs ;
/* B+Tree
*
* Two block managers :
* one for Nodes (BPlusTreePages => BPlusTreeNode)
* one for Leaves (RecordBufferPages)
* The split key is the held in the highest in the block
*
* A "leaf" node is a leaf of the B+Tree part, and points to
* highest record in a RecordBuffer
*
* The Gap is usually zero.
* N = 2, Gap = 1 =>
* 2*N+Gap: MaxRec = 4, MaxPtr = 5,
* Max-1: HighRec = 3, HighPtr = 4
* N-1: MinRec = 1, MinPtr = 2
*
* BPTreeNode:
*
* +------------------------+
* |-| K0 | K1 | K2 | K3 |--|
* +------------------------+
* | P0 | P1 | P2 | P3 | P4 |
* +------------------------+
*
* +------------------------+
* | | K0 | K1 | ** | ** |--|
* +------------------------+
* | P0 | P1 | P2 | ** | ** |
* +------------------------+
*
* BPTreeRecords -> RecordBuffer:
*
* +------------------------+
* | K0 | K1 | K2 | ** | ** |
* +------------------------+
*
* The size of records blocks and size of tree nodes don't have to be the same.
* They use different page managers, and are in different files.
*
* The minimal tree is one, leaf, root BPTreeNode and one BPTreeRecords page.
*
* Pictures:
* /--\ \--\
* means a block with free space introduced between records[i] and records[i+1], ptrs[i+1]/ptrs[i+2]
* Lower half is a valid structure (except for overall size)
*
* /--/ /--\
* means a block with free space introduced between records[i] and records[i+1], ptrs[i]/ptrs[i+1]
* Upper half is a valid structure (except for overall size)
*/
// Branch nodes only need create branch nodes (splitting sideways)
// Leaf nodes only create leaf nodes.
// The root is an exception.
private BPTreeNode create(int parent, boolean isLeaf)
{
return create(bpTree, parent, isLeaf) ;
}
private static BPTreeNode create(BPlusTree bpTree, int parent, boolean isLeaf)
{
BPTreeNode n = bpTree.getNodeManager().createNode(parent) ;
n.isLeaf = isLeaf ;
return n ;
}
/*package*/ BPTreeNode(BPlusTree bpTree, Block block)
{
super(bpTree) ;
this.block = block ;
this.id = block.getId().intValue() ;
}
@Override
public void reset(Block block)
{
this.block = block ;
// reformat block (sets record and pointer buffers)
BPTreeNodeMgr.formatBPTreeNode(this, bpTree, block, isLeaf, parent, count) ;
}
// [TxTDB:PATCH-UP] REMOVE
//private BPTreePage get(int idx) { return get(idx, WRITE) ; }
/** Get the page at slot idx - switch between B+Tree and records files */
private BPTreePage get(int idx, short state)
{
int subId = ptrs.get(idx) ;
if ( state == READ )
return getMgrRead(subId) ;
if ( state == WRITE )
return getMgrWrite(subId) ;
log.error("Unknown state: "+state) ;
return null ;
}
private BPTreePage getMgrRead(int subId)
{
if ( isLeaf )
return bpTree.getRecordsMgr().getRead(subId) ;
else
return bpTree.getNodeManager().getRead(subId, this.id) ;
}
private BPTreePage getMgrWrite(int subId)
{
// [TxTDB:PATCH-UP]
if ( isLeaf )
return bpTree.getRecordsMgr().getWrite(subId) ;
else
return bpTree.getNodeManager().getWrite(subId, this.id) ;
}
// ---------- Public calls.
// None of these are called recursively.
/** Find a record, using the active comparator */
public static Record search(BPTreeNode root, Record rec)
{
root.internalCheckNodeDeep() ;
if ( root.id != 0 )
throw new BPTreeException("Search not starting from the root: "+root) ;
Record r = root.internalSearch(rec) ;
return r ;
}
/** Insert a record - return existing value if any, else null */
public static Record insert(BPTreeNode root, Record record)
{
// [TxTDB:PATCH-UP] - put in BPlusTree.
if ( logging() )
{
log.debug(format("** insert(%s) / start", record)) ;
if ( DumpTree ) root.dump() ;
}
if ( ! root.isRoot() )
throw new BPTreeException("Insert begins but this is not the root") ;
if ( root.isFull() )
{
// Root full - root split is a special case.
splitRoot(root) ;
if ( DumpTree ) root.dump() ;
}
// Root ready - call insert proper.
Record result = root.internalInsert(record) ;
root.internalCheckNodeDeep() ;
if ( logging() )
{
log.debug(format("** insert(%s) / finish", record)) ;
if ( DumpTree ) root.dump() ;
}
return result ;
}
/** Delete a record - return the old value if there was one, else null*/
public static Record delete(BPTreeNode root, Record rec)
{
if ( logging() )
{
log.debug(format("** delete(%s) / start", rec)) ;
if ( DumpTree ) root.dump() ;
}
if ( ! root.isRoot() )
throw new BPTreeException("Delete begins but this is not the root") ;
if ( root.isLeaf && root.count == 0 )
{
// Special case. Just a records block. Allow that to go too small.
BPTreePage page = root.get(0, WRITE) ;
if ( CheckingNode && ! ( page instanceof BPTreeRecords ) )
root.error("Zero size leaf root but not pointing a records block") ;
Record r = page.internalDelete(rec) ;
page.release() ;
return r ;
}
// Entry: checkNodeDeep() ;
Record v = root.internalDelete(rec) ;
// Fix root in case it became empty in deletion process.
if ( ! root.isLeaf && root.count == 0 )
{
root.reduceRoot() ;
root.internalCheckNodeDeep() ;
}
if ( logging() )
{
log.debug(format("** delete(%s) / finish", rec)) ;
if ( DumpTree ) root.dump() ;
}
return v ;
}
/** Returns the id of the records buffer page for this record. Records Buffer Page NOT read; record may not exist */
static int recordsPageId(BPTreeNode node, Record fromRec)
{
// Walk down the B+tree part of the structure ...
while ( !node.isLeaf() )
{
BPTreePage page = (fromRec == null ) ? node.get(0, READ) : node.findHere(fromRec) ;
// Not a leaf so we can cast safely.
BPTreeNode n = (BPTreeNode)page ;
// Release if not root.
if ( ! node.isRoot() )
node.release() ;
node = n ;
}
// ... then find the id of the next step down, but do not touch the records buffer page.
int id ;
if ( fromRec == null )
{
// Just get the lowest starting place.
id = node.getPtrBuffer().getLow() ;
}
else
{
// Get the right id based on starting record.
int idx = node.findSlot(fromRec) ;
idx = convert(idx) ;
id = node.getPtrBuffer().get(idx) ;
}
if ( ! node.isRoot() )
node.release() ;
return id ;
}
@Override
protected Record maxRecord()
{
BPTreePage page = get(count, READ) ;
Record r = page.maxRecord() ;
page.release() ;
return r ;
}
@Override
protected Record minRecord()
{
BPTreePage page = get(0, READ) ;
Record r = page.minRecord() ;
page.release() ;
return r ;
}
// @Override
// protected BPTreeRecords findPage(Record rec)
// {
// if ( CheckingNode ) internalCheckNode() ;
//
// BPTreePage page = findHere(rec) ;
// if ( page == null )
// return null ;
// BPTreeRecords bpr = page.findPage(rec) ;
// page.release() ;
// return bpr ;
// }
//
// // Find first page.
// @Override
// BPTreeRecords findFirstPage()
// {
// BPTreePage page = get(0, READ) ;
// BPTreeRecords records = page.findFirstPage() ;
// page.release() ;
// // Err - records is released!
// return records ;
// }
@Override final
Record getLowRecord()
{
return records.getLow() ;
}
@Override final
Record getHighRecord()
{
return records.getHigh() ;
}
// count is the number of pointers.
@Override
final int getMaxSize() { return params.getOrder() ; }
@Override
final int getCount() { return count ; }
@Override
final void setCount(int count) { this.count = count ; }
@Override
// public ByteBuffer getBackingByteBuffer() { return byteBuffer ; }
public Block getBackingBlock() { return block ; }
/** Do not use without great care */
RecordBuffer getRecordBuffer() { return records ; }
/** Do not use without great care */
PtrBuffer getPtrBuffer() { return ptrs ; }
void setIsLeaf(boolean isLeaf) { this.isLeaf = isLeaf ; }
boolean isLeaf() { return this.isLeaf ; }
@Override
public final int getId() { return id ; }
@Override
final void write() { bpTree.getNodeManager().write(this) ; }
@Override
final void promote() { bpTree.getNodeManager().promote(this) ; }
@Override
final void release() { bpTree.getNodeManager().release(this) ; }
@Override
final void free() { bpTree.getNodeManager().free(this) ; }
// ============ SEARCH
/*
* Do a (binary) search of the node to find the record.
* Returns:
* +ve or 0 => the index of the record
* -ve => The insertion point : the immediate higher record or length as (-i-1)
* Convert to +ve and decend to find the RecordBuffer with the record in it.
*/
@Override final
Record internalSearch(Record rec)
{
if ( CheckingNode ) internalCheckNode() ;
BPTreePage page = findHere(rec) ;
Record r = page.internalSearch(rec) ;
page.release() ;
return r ;
}
/** Find the next page to look at as we walk down the tree */
private final BPTreePage findHere(Record rec)
{
int idx = findSlot(rec) ;
idx = convert(idx) ;
// Find index, or insertion point (immediate higher slot) as (-i-1)
// A key is the highest element of the records up to this point
// so we search down at slot idx (between something smaller and something
// larger.
BPTreePage page = get(idx, READ) ;
return page ;
}
// ============ INSERT
/* Traverse this page, ensuring the node below is not full before
* decending. Therefore there is always space to do the actual insert.
*/
@Override final
Record internalInsert(Record record)
{
if ( logging() )
log.debug(format("internalInsert: %s [%s]", record, this)) ;
internalCheckNode() ;
int idx = findSlot(record) ;
if ( logging() )
log.debug(format("internalInsert: idx=%d=>%d", idx, convert(idx))) ;
idx = convert(idx) ;
BPTreePage page = get(idx, READ) ;
if ( logging() )
log.debug(format("internalInsert: next: %s",page));
if ( page.isFull() )
{
// Need to split the page before descending.
split(idx, page) ;
// Did it shift the insert index?
// Examine the record we pulled up in the split.
if ( Record.keyGT(record, records.get(idx)) )
{
page.release() ;
// Yes. Get the new (upper) page
idx = idx+1 ;
page = get(idx, READ) ;
}
internalCheckNode() ;
}
Record r = page.internalInsert(record) ;
page.release() ;
return r ;
}
private static int convert(int idx)
{
if ( idx >= 0 ) return idx ;
return decodeIndex(idx) ;
}
// **** Old documentation
/* Split a non-root node y, held at slot idx.
* Do this by splitting the node in two (call to BPTree.split)
* and insertting the new key/pointer pair.
* WRITE(y)
* WRITE(z)
* WRITE(this)
*/
private void split(int idx, BPTreePage y)
{
boolean logging = logging() ;
//logging = true ;
if ( logging )
{
log.debug(format("split >> y.id=%d this.id=%d idx=%d", y.getId(), this.id, idx)) ;
log.debug("split -- "+y) ;
}
internalCheckNode() ;
if ( CheckingNode )
{
if ( ! y.isFull() ) error("Node is not full") ;
if ( this.ptrs.get(idx) != y.getId() )
{
int a = this.ptrs.get(idx) ;
int b = y.getId();
error("Node to be split isn't in right place [%d/%d]", a, b) ;
}
}
internalCheckNodeDeep() ;
promote() ;
y.promote() ;
Record splitKey = y.getSplitKey() ;
splitKey = keyRecord(splitKey) ;
if ( logging )
log.debug(format("Split key: %s", splitKey)) ;
BPTreePage z = y.split();
if ( logging )
{
log.debug(format("Split: %s", y)) ;
log.debug(format("Split: %s", z)) ;
}
// Key only.
if ( splitKey.hasSeparateValue() )
{
// [Issue: FREC]
// This creates a empty (null-byte-initialized) value array.
splitKey = params.getKeyFactory().create(splitKey.getKey()) ;
// Better: but an on-disk change. This is key only.
// splitKey = params.getKeyFactory().createKeyOnly(splitKey) ;
}
// Insert new node. "add" shuffle's up as well.
records.add(idx, splitKey) ;
ptrs.add(idx+1, z.getId()) ;
count++ ;
if ( logging )
{
log.debug("split << "+this) ;
log.debug("split << "+y) ;
log.debug("split << "+z) ;
}
y.write();
z.write();
z.release() ;
// y.release() ; y release management done by caller.
this.write();
if ( CheckingTree )
{
if ( Record.keyNE(splitKey, y.maxRecord()) )
error("Split key %d but max subtree %s", splitKey, y.maxRecord()) ;
internalCheckNodeDeep() ;
}
}
@Override final
Record getSplitKey()
{
int ix = params.SplitIndex ;
Record split = records.get(ix) ;
return split ;
}
/** Split this block - return the split record (key only needed) */
@Override final
BPTreePage split()
{
// Median record : will go in parent.
int ix = params.SplitIndex ;
// New block.
BPTreeNode z = create(this.parent, isLeaf) ;
// Leave the low end untouched and copy, and clear the high end.
// z becomes the new upper node, not the lower node.
// 'this' is the lower block.
int maxRec = maxRecords() ;
// Copy from top of y into z.
records.copy(ix+1, z.records, 0, maxRec-(ix+1)) ;
records.clear(ix, maxRec-ix) ; // Clear copied and median slot
records.setSize(ix) ; // Reset size
ptrs.copy(ix+1, z.ptrs, 0, params.MaxPtr-(ix+1)) ;
ptrs.clear(ix+1, params.MaxPtr-(ix+1)) ;
ptrs.setSize(ix+1) ;
// Set sizes of subnodes
setCount(ix) ; // Median is ix
internalCheckNode() ; // y finished
z.isLeaf = isLeaf ;
z.setCount(maxRec - (ix+1)) ; // Number copied into z
// Caller puts the blocks in split(int, BTreePage)
z.internalCheckNode() ;
return z ;
}
/* Split the root and leave the root block as the root.
* This is the only point the height of the tree increases.
*
* Allocate new blocks.
* Copy root low into left
* Copy root high into right
* Set counts.
* Create new root settings (two pointers, one key record)
* WRITE(left)
* WRITE(right)
* WRITE(root)
*/
private static void splitRoot(BPTreeNode root)
{
BPlusTree bpTree = root.bpTree ;
if ( CheckingNode )
if ( root.id != 0 ) root.error("Not root: %d (root is id zero)", root.id) ;
root.internalCheckNode() ;
root.promote() ;
// Median record
int splitIdx = root.params.SplitIndex ;
Record rec = root.records.get(splitIdx) ;
if ( logging() )
{
log.debug(format("** Split root %d (%s)", splitIdx, rec)) ;
log.debug("splitRoot >> "+root) ;
}
// New blocks.
BPTreeNode left = create(bpTree, root.id, root.isLeaf) ;
BPTreeNode right = create(bpTree, root.id, root.isLeaf) ;
//int maxRecords = maxRecords() ;
// New left
root.records.copy(0, left.records, 0, splitIdx) ;
root.ptrs.copy(0, left.ptrs, 0, splitIdx+1) ;
left.count = splitIdx ;
// New right
root.records.copy(splitIdx+1, right.records, 0, root.maxRecords()-(splitIdx+1)) ;
root.ptrs.copy(splitIdx+1, right.ptrs, 0, root.params.MaxPtr-(splitIdx+1)) ;
right.count = root.maxRecords()-(splitIdx+1) ;
if ( logging() )
{
log.debug("splitRoot -- left: "+left) ;
log.debug("splitRoot -- right: "+right) ;
}
// So left.count+right.count = bTree.NumRec-1
// Clear root by reformatting. New root not a leaf. Has count of 1 after formatting.
BPTreeNodeMgr.formatForRoot(root, false) ;
// Make a non-leaf.
// Insert two subnodes, divided by the median record
root.count = 1 ;
root.records.add(0, rec) ;
root.ptrs.setSize(2) ;
root.ptrs.set(0, left.id) ; // slot 0
root.ptrs.set(1, right.id) ; // slot 1
if ( logging())
{
log.debug("splitRoot << "+root) ;
log.debug("splitRoot << "+left) ;
log.debug("splitRoot << "+right) ;
}
left.write() ;
right.write() ;
left.release() ;
right.release() ;
root.write() ;
if ( CheckingTree )
root.checkNodeDeep() ;
else
if ( CheckingNode )
{
root.internalCheckNode() ;
left.internalCheckNode() ;
right.internalCheckNode() ;
}
}
// ============ DELETE
/* Delete
* Descend, making sure that the node is not minimum size at each descend.
* If it is, rebalenace.
*/
@Override final
Record internalDelete(Record rec)
{
internalCheckNode() ;
if ( logging() )
log.debug(format("internalDelete(%s) : %s", rec, this)) ;
int x = findSlot(rec) ;
// If x is >= 0, may need to adjust this
int y = convert(x) ;
BPTreePage page = get(y, READ) ;
boolean thisWriteNeeded = false ;
if ( page.isMinSize() ) // Can't be root - we decended in the get().
{
promote() ;
page = rebalance(page, y) ;
thisWriteNeeded = true ;
// May have moved/removed at x. Find again. YUK.
x = findSlot(rec) ;
if ( CheckingNode )
{
internalCheckNode() ;
page.checkNode() ;
}
this.write() ;
}
// Go to bottom
// Need to return the new key.
Record r2 = page.internalDelete(rec) ;
if ( x >= 0 )
{
promote() ;
// YUK
records.set(x, keyRecord(page.maxRecord())) ;
this.write() ;
}
page.release() ;
return r2 ;
}
/* Reduce the root when it has only one pointer and no records.
* Keep the root as id 0 so this is just a copy-up of the one child node.
* WRITE(root)
* RELEASE(old child)
* This is the only point the height of the tree decreases.
*/
private void reduceRoot()
{
if ( logging() )
log.debug(format("reduceRoot >> %s", this)) ;
if ( CheckingNode && ( ! isRoot() || count != 0 ) ) error("Not an empty root") ;
if ( isLeaf )
{
if ( logging() )
log.debug(format("reduceRoot << leaf root")) ;
// Now empty leaf root.
return ;
}
BPTreePage sub = get(0, WRITE) ;
BPTreeNode n = cast(sub) ;
// Can pull up into the root.
// Leave root node in same block (rather than swap to new root).
BPTreeNodeMgr.formatForRoot(this, n.isLeaf) ;
n.records.copy(0, this.records, 0, n.count) ;
n.ptrs.copy(0, ptrs, 0, n.count+1) ;
isLeaf = n.isLeaf ;
count = n.count ;
this.write();
// Free up.
n.free() ;
internalCheckNodeDeep() ;
if ( logging() )
log.debug(format("reduceRoot << %s", this)) ;
}
/* Rebalance node n at slot idx in parent (this)
* The node will then be greater than the minimum size
* and one-pass delete is then possible.
*
* try to shift right, from the left sibling (if exists)
* WRITE(left)
* WRITE(n)
* WRITE(this)
* try to shift left, from the right sibling (if exists)
* WRITE(right)
* WRITE(n)
* WRITE(this)
* else
* merge with left or right sibling
* Suboperations do all the write-back of nodes.
*/
private BPTreePage rebalance(final BPTreePage node, int idx)
{
if ( logging() )
{
log.debug(format("rebalance(id=%d, idx=%d)", node.getId(), idx)) ;
log.debug(format(">> this: %s", this)) ;
log.debug(format(">> node: %s", node)) ;
}
internalCheckNode() ;
promote() ;
node.promote() ;
BPTreePage left = null ;
if ( idx > 0 )
// [TxTDB:PATCH-UP]
// release on left
left = get(idx-1, WRITE) ;
// *** SHIFTING : need to change the marker record in the parent.
// *** getHighRecord of lower block.
if ( left != null && ! left.isMinSize() )
{
if ( logging() )
log.debug("rebalance/shiftRight") ;
// Move elements around.
// Has not done "this.put()" yet.
shiftRight(left, node, idx-1) ;
if ( logging() )
log.debug("<< rebalance: "+this) ;
if ( CheckingNode )
{
left.checkNode() ;
node.checkNode();
this.internalCheckNode() ;
}
left.release() ;
return node ;
}
BPTreePage right = null ;
if ( idx < count )
right = get(idx+1, WRITE) ;
if ( right != null && ! right.isMinSize() )
{
if ( logging() )
log.debug("rebalance/shiftLeft") ;
shiftLeft(node, right, idx) ;
if ( logging() )
log.debug("<< rebalance: "+this) ;
if ( CheckingNode )
{
right.checkNode();
node.checkNode();
this.internalCheckNode() ;
}
if ( left != null ) left.release() ;
right.release() ;
return node ;
}
// Couldn't shift. Collapse two pages.
if ( CheckingNode && left == null && right == null) error("No siblings") ;
if ( left != null )
{
if ( logging() )
log.debug(format("rebalance/merge/left: left=%d n=%d [%d]", left.getId(), node.getId(), idx-1)) ;
if ( CheckingNode && left.getId() == node.getId() )
error("Left and n the same: %s", left) ;
BPTreePage page = merge(left, node, idx-1) ;
if ( right != null )
// HACK : We didn't use it.
right.release() ;
return page ;
}
else
{
// left == null
// rigth != null
if ( logging() )
log.debug(format("rebalance/merge/right: n=%d right=%d [%d]", node.getId(), right.getId(), idx)) ;
if ( CheckingNode && right.getId() == node.getId() )
error("N and right the same: %s",right ) ;
BPTreePage page = merge(node, right, idx) ;
return page ;
}
}
/** Merge left with right ; fills left, frees right */
private BPTreePage merge(BPTreePage left, BPTreePage right, int dividingSlot)
{
if ( logging() )
{
log.debug(format(">> merge(@%d): %s", dividingSlot, this)) ;
log.debug(">> left: "+left) ;
log.debug(">> right: "+right) ;
}
// /==\ + key + /==\ ==> /====\
Record splitKey = records.get(dividingSlot) ;
BPTreePage page = left.merge(right, splitKey) ;
// Must release right (not done in merge)
if ( logging() )
log.debug("-- merge: "+page) ;
left.write();
right.free() ;
if ( page == right )
error("Returned page is not the left") ;
// Depending on whether there is a gap or not.
if ( CheckingNode )
{
if ( isLeaf )
{
// If two data blocks, then the split key is not inlcuded (it's alread ythere, with it value)
// Size is N+N and max could be odd so N+N and N+N+1 are possible.
if ( left.getCount()+1 != left.getMaxSize() && left.getCount() != left.getMaxSize() )
error("Inconsistent data node size: %d/%d", left.getCount(), left.getMaxSize()) ;
}
else if ( ! left.isFull() )
{
// If not two data blocks, the left side should now be full (N+N+split)
error("Inconsistent node size: %d/%d", left.getCount(), left.getMaxSize()) ;
}
}
// Remove from parent (which is "this")
shuffleDown(dividingSlot) ;
this.write();
internalCheckNodeDeep() ;
if ( logging() )
{
log.debug("<< merge: "+this) ;
log.debug("<< left: "+left) ;
}
return left ;
// }
// else if ( page == right )
// {
// // Never happnes?
// // Depending on whether there is a gap or not.
// if ( CheckingNode && ! right.isFull() )
// error("Inconsistent node size: %d", right.getCount()) ;
// // Remove from parent (which is "this")
// shuffleDown(dividingSlot) ;
// right.put() ;
// left.release() ;
// this.put() ;
// internalCheckNodeDeep() ;
// if ( logging() )
// {
// log.debug("<< merge: "+this) ;
// log.debug("<< right: "+right) ;
// }
// return right ;
// }
// else
// {
// error("merge: returned page is neither left nor right") ;
// return null ;
// }
}
@Override
BPTreePage merge(BPTreePage right, Record splitKey)
{
return merge(this, splitKey, cast(right)) ;
}
private static BPTreeNode merge(BPTreeNode left, Record splitKey, BPTreeNode right)
{
// Merge blocks - does not adjust the parent.
// Copy right to top of left.
// Caller releases 'right' (needed for testing code).
left.records.add(splitKey) ;
// Copy over right to top of left.
right.records.copyToTop(left.records) ;
right.ptrs.copyToTop(left.ptrs) ;
// Update count
left.count = left.count + right.count + 1 ;
left.internalCheckNode() ;
right.records.clear();
right.ptrs.clear();
return left ;
}
private void shiftRight(BPTreePage left, BPTreePage right, int i)
{
if ( logging() )
{
log.debug(">> shiftRight: this: "+this) ;
log.debug(">> shiftRight: left: "+left) ;
log.debug(">> shiftRight: right: "+right) ;
}
Record r1 = records.get(i) ;
Record r2 = left.shiftRight(right, r1) ;
r2 = keyRecord(r2) ;
this.records.set(i, r2) ;
left.write() ;
right.write() ;
// Do later -- this.put();
if ( logging() )
{
log.debug("<< shiftRight: this: "+this) ;
log.debug("<< shiftRight: left: "+left) ;
log.debug("<< shiftRight: right: "+right) ;
}
}
private void shiftLeft(BPTreePage left, BPTreePage right, int i)
{
if ( logging() )
{
log.debug(">> shiftLeft: this: "+this) ;
log.debug(">> shiftLeft: left: "+left) ;
log.debug(">> shiftLeft: right: "+right) ;
}
Record r1 = records.get(i) ;
Record r2 = left.shiftLeft(right, r1) ;
r2 = keyRecord(r2) ;
this.records.set(i, r2) ;
left.write() ;
right.write() ;
// Do this later - this.put();
if ( logging() )
{
log.debug("<< shiftLeft: this: "+this) ;
log.debug("<< shiftLeft: left: "+left) ;
log.debug("<< shiftLeft: right: "+right) ;
}
}
@Override
Record shiftRight(BPTreePage other, Record splitKey)
{
BPTreeNode node = cast(other) ;
if ( CheckingNode )
{
if (count == 0 ) error("Node is empty - can't shift a slot out") ;
if ( node.isFull() ) error("Destination node is full") ;
}
// Records: promote moving element, replace with splitKey
Record r = this.records.getHigh() ;
this.records.removeTop() ;
node.records.add(0, splitKey) ;
// Pointers just shift
this.ptrs.shiftRight(node.ptrs) ;
this.count -- ;
node.count ++ ;
this.internalCheckNode() ;
node.internalCheckNode() ;
return r ;
}
@Override
Record shiftLeft(BPTreePage other, Record splitKey)
{
BPTreeNode node = cast(other) ;
if ( CheckingNode )
{
if ( count == 0 ) error("Node is empty - can't shift a slot out") ;
if ( isFull() ) error("Destination node is full") ;
}
Record r = node.records.getLow() ;
// Records: promote moving element, replace with splitKey
this.records.add(splitKey) ;
node.records.shiftDown(0) ;
// Pointers just shift
this.ptrs.shiftLeft(node.ptrs) ;
this.count ++ ;
node.count -- ;
return r ;
}
private void shuffleDown(int x)
{
// x is the index in the parent and may be on eover the end.
if ( logging() )
{
log.debug(format("ShuffleDown: i=%d count=%d MaxRec=%d", x, count, maxRecords())) ;
log.debug("shuffleDown >> "+this) ;
}
if ( CheckingNode && x >= count ) error("shuffleDown out of bounds") ;
// Just the top to clear
if ( x == count-1 )
{
records.removeTop() ;
ptrs.removeTop() ;
count-- ;
if ( logging() )
{
log.debug("shuffleDown << Clear top") ;
log.debug("shuffleDown << "+this) ;
}
internalCheckNode() ;
return ;
}
// Shuffle down. Removes key and pointer just above key.
records.shiftDown(x) ;
ptrs.shiftDown(x+1) ;
count -- ;
if ( logging() )
log.debug("shuffleDown << "+this) ;
internalCheckNode() ;
}
// ---- Utilities
private final BPTreeNode cast(BPTreePage other)
{
try { return (BPTreeNode)other ; }
catch (ClassCastException ex) { error("Wrong type: "+other) ; return null ; }
}
final int findSlot(Record rec)
{
int x = records.find(rec) ;
return x ;
}
final boolean isRoot()
{
// No BPT remembered root node currently
//if ( bpTree.root == this ) return true ;
return this.id == BPlusTreeParams.RootId ;
}
private Record keyRecord(Record record)
{
return bpTree.getRecordFactory().createKeyOnly(record) ;
}
// Fixup/remove?
private final int maxRecords() { return params.MaxRec ; }
@Override
final boolean isFull()
{
if ( CheckingNode && count > maxRecords() )
error("isFull: Moby block: %s", this) ;
// Count is of records.
return count >= maxRecords() ;
}
/** Return true if there are no keys here or below this node */
@Override
final boolean hasAnyKeys()
{
if ( this.count > 0 )
return true ;
if ( ! isRoot() )
return false ;
// The root can be zero size and point to a single data block.
int id = this.getPtrBuffer().getLow() ;
BPTreePage page = get(id, READ) ;
boolean b = page.hasAnyKeys() ;
page.release() ;
return b ;
}
@Override
final boolean isMinSize()
{
int min = params.getMinRec() ;
if ( CheckingNode && count < min )
error("isMinSize: Dwarf block: %s", this) ;
return count <= min ;
}
// ========== Other
@Override
public String toString()
{
StringBuilder b = new StringBuilder() ;
if ( isLeaf )
b.append("LEAF: ") ;
else
b.append("NODE: ") ;
String labelStr = "??" ;
if ( parent >= 0 )
labelStr = Integer.toString(parent) ;
else if ( parent == BPlusTreeParams.RootParent )
labelStr = "root" ;
if ( isLeaf )
labelStr = labelStr+"/leaf" ;
b.append(String.format("%d [%s] (size %d) -- ", id, labelStr, count)) ;
for ( int i = 0 ; i < maxRecords() ; i++ )
{
b.append(childStr(i)) ;
b.append(" (") ;
b.append(recstr(records, i)) ;
b.append(") ") ;
}
b.append(childStr(params.HighPtr)) ;
return b.toString() ;
}
private final String recstr(RecordBuffer records, int idx)
{
if ( records.isClear(idx) )
return "----" ;
Record r = records._get(idx) ;
return r.toString() ;
}
public void dump()
{
dump(IndentedWriter.stdout) ;
}
public void dump(IndentedWriter out)
{
output(out) ;
out.ensureStartOfLine() ;
out.flush();
}
public String dumpToString()
{
IndentedLineBuffer buff = new IndentedLineBuffer() ;
output(buff) ;
return buff.asString() ;
}
@Override
public void output(IndentedWriter out)
{
out.print(toString()) ;
out.incIndent() ;
for ( int i = 0 ; i < count+1 ; i++ )
{
out.println();
BPTreePage page = get(i, READ) ;
page.output(out) ;
page.release() ;
}
out.decIndent() ;
}
// private void indent(PrintStream out, int x)
// {
// for ( int i = 0 ; i < x ; i++ )
// out.print(" ") ;
// }
private String childStr(int i)
{
if ( i >= ptrs.size() )
return "*" ;
int x = ptrs.get(i) ;
return Integer.toString(x) ;
}
// =========== Checking
// internal checks - only if checking
// Check node does not assume a valid tree - may be in mid-operation.
private final void internalCheckNode()
{
if ( CheckingNode )
checkNode(null, null) ;
}
private final void internalCheckNodeDeep()
{
if ( ! CheckingTree )
return ;
checkNodeDeep() ;
}
@Override
final void checkNode()
{
checkNode(null, null) ;
}
@Override
final void checkNodeDeep()
{
if ( isRoot() )
{
// if ( !isLeaf && count == 0 )
// error("Root is of size zero (one pointer) but not a leaf") ;
if ( parent != BPlusTreeParams.RootParent )
error("Root parent is wrong") ;
// if ( count == 0 )
// return ;
}
checkNodeDeep(null, null) ;
}
// Checks of a single node - no looking at children
// min - inclusive; max - inclusive (allows for duplicates)
final private void checkNode(Record min, Record max)
{
if ( count != records.size() )
error("Inconsistent: id=%d, count=%d, records.size()=%d : %s", id, count, records.size(), this) ;
if ( ! isLeaf && count+1 != ptrs.size() )
error("Inconsistent: id=%d, count+1=%d, ptrs.size()=%d ; %s", id, count+1, ptrs.size(), this) ;
// No BPT remembered root node currently
//if ( bpTree.root != null && !isRoot() && count < params.MinRec)
if ( !isRoot() && count < params.MinRec)
{
//warning("Runt node: %s", this) ;
error("Runt node: %s", this) ;
}
if ( !isRoot() && count > maxRecords() ) error("Over full node: %s", this) ;
if ( ! isLeaf && parent == id ) error("Parent same as id: %s", this) ;
Record k = min ;
// Test records in the allocated area
for ( int i = 0 ; i < count ; i++ )
{
if ( records.get(i) == null ) error("Node: %d : Invalid record @%d :: %s",id, i, this) ;
if ( k != null && keyGT(k, records.get(i)) )
{
Record r = records.get(i) ;
//keyGT(k, r) ;
error("Node: %d: Not sorted (%d) (%s, %s) :: %s ", id, i, k, r, this) ;
}
k = records.get(i) ;
}
if ( k != null && max != null && keyGT(k,max) )
error("Node: %d - Record is too high (max=%s):: %s", id, max, this) ;
if ( SystemTDB.NullOut )
{
// Test records in the free area
for ( int i = count ; i < maxRecords() ; i++ )
{
if ( ! records.isClear(i) )
error("Node: %d - not clear (idx=%d) :: %s", id, i, this) ;
}
}
// Pointer checks.
int i = 0 ;
// Check not empty at bottom.
for ( ; i < count+1 ; i++ )
{
if ( ptrs.get(i) < 0 )
error("Node: %d: Invalid child pointer @%d :: %s", id, i , this) ;
// This does BlockIO so distrubs tracking.
if ( CheckingTree && isLeaf )
{
int ptr = ptrs.get(i) ;
BPTreeRecords records = bpTree.getRecordsMgr().getRead(ptr) ;
int id = records.getId() ;
if ( id != ptrs.get(i) )
error("Records: Block @%d has a different id: %d :: %s", id, i, this) ;
int link = records.getLink() ;
// Don't check if +1 does not exist.
if ( i != count )
{
BPTreeRecords page = bpTree.getRecordsMgr().getRead(ptrs.get(i)) ;
int id2 = page.getLink() ;
if ( link != id2 )
error("Records: Link not to next block @%d/@%d has a different id: %d :: %s", id, id2, i, records) ;
bpTree.getRecordsMgr().release(page) ;
}
records.release() ;
}
}
// Check empty is empty
if ( SystemTDB.NullOut )
{
int x = params.MaxPtr ;
for ( ; i < x ; i ++ )
{
if ( ! ptrs.isClear(i) )
error("Node: %d: Unexpected pointer @%d :: %s", id, i, this) ;
}
}
}
private void checkNodeDeep(Record min, Record max)
{
checkNode(min, max) ;
// Check pointers.
int limit = (count == 0) ? 0 : count+1 ;
for ( int i = 0 ; i < limit ; i++ )
{
Record min1 = min ;
Record max1 = max ;
BPTreePage n = get(i, READ) ;
if ( i != count )
{
Record keySubTree = n.getHighRecord() ; // high key in immediate child
Record keyHere = records.get(i) ; // key in this
if ( keySubTree == null )
error("Node: %d: Can't get high record from %d", id, n.getId()) ;
if ( keySubTree.getKey() == null )
error("Node: %d: Can't get high record is missing it's key from %d", id, n.getId()) ;
if ( keyHere == null )
error("Node: %d: record is null", id) ;
if ( keyHere.getKey() == null )
error("Node: %d: Record key is null", id) ;
if ( keyGT(keySubTree, keyHere) )
error("Node: %d: Child key %s is greater than this key %s", id, keySubTree, keyHere) ;
Record keyMax = n.maxRecord() ; // max key in subTree
Record keyMin = n.minRecord() ;
if ( keyNE(keyHere, keyMax) )
error("Node: %d: Key %s is not the max [%s] of the sub-tree idx=%d", id, keyHere, keyMax, i) ;
if ( min != null && keyGT(min, keyMin) )
error("Node: %d: Minimun for this node should be %s but it's %s", id, min, keyMin) ;
if ( max != null && keyLT(max, keyMax) )
error("Node: %d: Maximum for this node should be %s but it's %s", id, max, keyMax) ;
if ( min != null && keyGT(min, keyHere) )
error("Node: %d: Key too small: %s - min should be %s", id, keyHere, min) ;
// keyHere == keyMax ??
if ( max != null && keyLT(max, keyHere) )
error("Node: %d: Key too large: %s - max should be %s", id, keyHere, max) ;
}
// Look deeper.
if ( ! ( n instanceof BPTreeNode ) )
{
// Records.
n.checkNodeDeep() ;
n.release() ;
continue ;
}
// Valid pointer?
if ( isLeaf )
{
if ( ! bpTree.getRecordsMgr().getBlockMgr().valid(ptrs.get(i)) )
error("Node: %d: Dangling ptr (records) in block @%d :: %s", id, i, this) ;
}
else
{
if ( ! bpTree.getNodeManager().valid(ptrs.get(i)) )
error("Node: %d: Dangling ptr in block @%d :: %s", id, i, this) ;
}
// Calc new min/max.
if ( i == 0 )
max1 = records.get(0) ;
else if ( i == count )
{
min1 = records.get(count-1) ;
max1 = null ;
}
else
{
min1 = records.get(i-1) ;
max1 = records.get(i) ;
}
// if ( n.parent != id )
// error("Node: %d [%d]: Parent/child mismatch :: %s", id, n.parent, this) ;
((BPTreeNode)n).checkNodeDeep(min1, max1) ;
n.release() ;
}
}
private static boolean logging()
{
return BPlusTreeParams.logging(log) ;
}
private void warning(String msg, Object... args)
{
msg = format(msg, args) ;
System.out.println("Warning: "+msg) ;
System.out.flush();
}
private void error(String msg, Object... args)
{
msg = format(msg, args) ;
System.out.println() ;
System.out.println(msg) ;
System.out.flush();
try { dumpBlocks() ; } catch (Exception ex) {}
throw new BPTreeException(msg) ;
}
private void dumpBlocks()
{
System.out.println("---Nodes") ;
bpTree.getNodeManager().dump() ;
System.out.println("---Records") ;
bpTree.getRecordsMgr().dump() ;
System.out.println("---") ;
System.out.flush();
}
}