All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.tdb.index.bplustree.BPTreeNode Maven / Gradle / Ivy

Go to download

TDB is a storage subsystem for Jena and ARQ, it is a native triple store providing persistent storage of triples/quads.

There is a newer version: 4.10.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.tdb.index.bplustree;

import static java.lang.String.format ;
import static org.apache.jena.atlas.lib.Alg.decodeIndex ;
import static org.apache.jena.tdb.base.record.Record.keyGT ;
import static org.apache.jena.tdb.base.record.Record.keyLT ;
import static org.apache.jena.tdb.base.record.Record.keyNE ;
import static org.apache.jena.tdb.index.bplustree.BPlusTreeParams.CheckingNode ;
import static org.apache.jena.tdb.index.bplustree.BPlusTreeParams.CheckingTree ;
import static org.apache.jena.tdb.index.bplustree.BPlusTreeParams.DumpTree ;
import org.apache.jena.atlas.io.IndentedLineBuffer ;
import org.apache.jena.atlas.io.IndentedWriter ;
import org.apache.jena.tdb.base.block.Block ;
import org.apache.jena.tdb.base.buffer.PtrBuffer ;
import org.apache.jena.tdb.base.buffer.RecordBuffer ;
import org.apache.jena.tdb.base.record.Record ;
import org.apache.jena.tdb.sys.SystemTDB ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;

public final class BPTreeNode extends BPTreePage
{
    private static final short READ = 1 ;
    private static final short WRITE = 2 ;

    // Only "public" for external very low level tools in development to access this class.
    // Assume package access.

    private static Logger log = LoggerFactory.getLogger(BPTreeNode.class) ;
    
    private Block block ;
    private int id ;
    private short blockState = READ ;  
    
    int parent ;
    int count ;             // Number of records.  Number of pointers is +1
    
    // "Leaf" of the BPTree is the lowest level of ptr/key splits, not the data blocks.
    // We need to know this to know which block manager the block pointers refer to.
    boolean isLeaf ;        
    private RecordBuffer records ;
    void setRecordBuffer(RecordBuffer r) { records = r ; }
    PtrBuffer ptrs ;

    /* B+Tree
     * 
     * Two block managers : 
     *   one for Nodes (BPlusTreePages => BPlusTreeNode)
     *   one for Leaves (RecordBufferPages)
     * The split key is the held in the highest in the block  
     * 
     * A "leaf" node is a leaf of the B+Tree part, and points to 
     * highest record in a RecordBuffer 
     *
     * The Gap is usually zero.
     * N = 2, Gap = 1 =>
     *  2*N+Gap:  MaxRec = 4, MaxPtr = 5,
     *  Max-1:    HighRec = 3, HighPtr = 4
     *  N-1:      MinRec = 1, MinPtr = 2
     *
     * BPTreeNode:
     * 
     *      +------------------------+
     *      |-| K0 | K1 | K2 | K3 |--|
     *      +------------------------+
     *      | P0 | P1 | P2 | P3 | P4 |
     *      +------------------------+
     *
     *      +------------------------+
     *      | | K0 | K1 | ** | ** |--|
     *      +------------------------+
     *      | P0 | P1 | P2 | ** | ** |
     *      +------------------------+
     *      
     * BPTreeRecords -> RecordBuffer:
     *      
     *      +------------------------+
     *      | K0 | K1 | K2 | ** | ** |
     *      +------------------------+
     *      
     * The size of records blocks and size of tree nodes don't have to be the same.
     * They use different page managers, and are in different files.  
     *
     * The minimal tree is one, leaf, root BPTreeNode and one BPTreeRecords page.
     * 
     * Pictures:      
     *      /--\ \--\
     * means a block with free space introduced between records[i] and records[i+1], ptrs[i+1]/ptrs[i+2]
     * Lower half is a valid structure (except for overall size) 
     *       
     *      /--/ /--\
     * means a block with free space introduced between records[i] and records[i+1], ptrs[i]/ptrs[i+1]
     * Upper half is a valid structure (except for overall size) 
     */

    // Branch nodes only need create branch nodes (splitting sideways)
    // Leaf nodes only create leaf nodes.
    // The root is an exception.
    
    private BPTreeNode create(int parent, boolean isLeaf)
    {
        return create(bpTree, parent, isLeaf) ;
    }
    
    private static BPTreeNode create(BPlusTree bpTree, int parent, boolean isLeaf)
    {
        BPTreeNode n = bpTree.getNodeManager().createNode(parent) ;
        n.isLeaf = isLeaf ;
        return n ;
    }
    
    /*package*/ BPTreeNode(BPlusTree bpTree, Block block)
    {
        super(bpTree) ;
        this.block = block ;
        this.id = block.getId().intValue() ;
    }

    @Override
    public void reset(Block block) 
    { 
        this.block = block ;
        // reformat block (sets record and pointer buffers)
        BPTreeNodeMgr.formatBPTreeNode(this, bpTree, block, isLeaf, parent, count) ;
    }
    
    // [TxTDB:PATCH-UP] REMOVE
    //private BPTreePage get(int idx) { return get(idx, WRITE) ; }
    
    /** Get the page at slot idx - switch between B+Tree and records files */ 
    private BPTreePage get(int idx, short state)
    {
        int subId = ptrs.get(idx) ;
        if ( state == READ )
            return getMgrRead(subId) ;
        if ( state == WRITE )
            return getMgrWrite(subId) ;
        log.error("Unknown state: "+state) ;
        return null ;   
    }
    
    private BPTreePage getMgrRead(int subId)
    {
        if ( isLeaf )
            return bpTree.getRecordsMgr().getRead(subId) ;
        else
            return bpTree.getNodeManager().getRead(subId, this.id) ;
    }
    
    private BPTreePage getMgrWrite(int subId)
    {
        // [TxTDB:PATCH-UP]
        if ( isLeaf )
            return bpTree.getRecordsMgr().getWrite(subId) ;
        else
            return bpTree.getNodeManager().getWrite(subId, this.id) ;
    }

    // ---------- Public calls.
    // None of these are called recursively.
    
    /** Find a record, using the active comparator */
    public static Record search(BPTreeNode root, Record rec)
    {
        root.internalCheckNodeDeep() ;
        if ( root.id != 0 )
            throw new BPTreeException("Search not starting from the root: "+root) ;
        Record r = root.internalSearch(rec) ;
        return r ;
    }

    /** Insert a record - return existing value if any, else null */
    public static Record insert(BPTreeNode root, Record record)
    {
        // [TxTDB:PATCH-UP] - put in BPlusTree.
        if ( logging() )
        {
            log.debug(format("** insert(%s) / start", record)) ;
            if ( DumpTree ) root.dump() ;
        }
     
        if ( ! root.isRoot() )
            throw new BPTreeException("Insert begins but this is not the root") ;
        
        if ( root.isFull() )
        {
            // Root full - root split is a special case.
            splitRoot(root) ;
            if ( DumpTree ) root.dump() ;
        }
        
        // Root ready - call insert proper.
        Record result = root.internalInsert(record) ;
        
        root.internalCheckNodeDeep() ;
    
        if ( logging() )
        {
            log.debug(format("** insert(%s) / finish", record)) ;
            if ( DumpTree ) root.dump() ;
        }
        return result ;
    }

    /** Delete a record - return the old value if there was one, else null*/
    public static Record delete(BPTreeNode root, Record rec)
    { 
        if ( logging() )
        {
            log.debug(format("** delete(%s) / start", rec)) ;
            if ( DumpTree ) root.dump() ;
        }
        if ( ! root.isRoot() )
            throw new BPTreeException("Delete begins but this is not the root") ;
    
        if ( root.isLeaf && root.count == 0 )
        {
            // Special case.  Just a records block.  Allow that to go too small.
            BPTreePage page = root.get(0, WRITE) ;
            if ( CheckingNode && ! ( page instanceof BPTreeRecords ) )
                root.error("Zero size leaf root but not pointing a records block") ;
            Record r = page.internalDelete(rec) ;
            page.release() ;
            return r ;
        }
        
        // Entry: checkNodeDeep() ;
        Record v = root.internalDelete(rec) ;

        // Fix root in case it became empty in deletion process.
        if ( ! root.isLeaf && root.count == 0 )
        {
            root.reduceRoot() ;
            root.internalCheckNodeDeep() ;
        }
        
        if ( logging() )
        {
            log.debug(format("** delete(%s) / finish", rec)) ;
            if ( DumpTree ) root.dump() ;
        }
        return v ;
    }
    
    /** Returns the id of the records buffer page for this record.  Records Buffer Page NOT read; record may not exist */ 
    static int recordsPageId(BPTreeNode node, Record fromRec)
    {
        // Walk down the B+tree part of the structure ...
        while ( !node.isLeaf() )
        {
            BPTreePage page = (fromRec == null ) ? node.get(0, READ) : node.findHere(fromRec) ;
            // Not a leaf so we can cast safely.
            BPTreeNode n = (BPTreeNode)page ;
            // Release if not root.
            if ( ! node.isRoot() )
                node.release() ;
            node = n ;
        }
        // ... then find the id of the next step down, but do not touch the records buffer page. 
        int id ;
        if ( fromRec == null )
        {
            // Just get the lowest starting place.
            id = node.getPtrBuffer().getLow() ;
        }
        else
        {
            // Get the right id based on starting record.
            int idx = node.findSlot(fromRec) ; 
            idx = convert(idx) ;
            id = node.getPtrBuffer().get(idx) ;
        }
        if ( ! node.isRoot() )
            node.release() ;
        return id ;
    }



    @Override
    protected Record maxRecord()
    {
        BPTreePage page = get(count, READ) ;
        Record r = page.maxRecord() ;
        page.release() ;
        return r ;
    }

    @Override
    protected Record minRecord()
    {
        BPTreePage page = get(0, READ) ;
        Record r = page.minRecord() ;
        page.release() ;
        return r ;
    }

//    @Override
//    protected BPTreeRecords findPage(Record rec)
//    {
//        if ( CheckingNode ) internalCheckNode() ;
//        
//        BPTreePage page = findHere(rec) ;
//        if ( page == null )
//            return null ;
//        BPTreeRecords bpr = page.findPage(rec) ;
//        page.release() ;
//        return bpr ;
//    }
//    
//    // Find first page.
//    @Override
//    BPTreeRecords findFirstPage()
//    {
//        BPTreePage page = get(0, READ) ;
//        BPTreeRecords records = page.findFirstPage() ;
//        page.release() ;
//        // Err - records is released!
//        return records ;
//    }

    @Override final
    Record getLowRecord()
    {
        return records.getLow() ;
    }

    @Override final
    Record getHighRecord()
    {
        return records.getHigh() ; 
    }
    
    // count is the number of pointers.
    
    @Override
    final int getMaxSize()           { return params.getOrder() ; }
    
    @Override
    final int getCount()             { return count ; }
 
    @Override
    final void setCount(int count)   { this.count = count ; }
    
    @Override
//    public ByteBuffer getBackingByteBuffer()       { return byteBuffer ; }
    public Block getBackingBlock()       { return block ; }
    
    /** Do not use without great care */
    RecordBuffer getRecordBuffer()   { return records ; }
    /** Do not use without great care */
    PtrBuffer getPtrBuffer()         { return ptrs ; }
    
    void setIsLeaf(boolean isLeaf)   { this.isLeaf = isLeaf ; }

    boolean isLeaf()                 { return this.isLeaf ; }
    
    @Override
    public final int getId()                { return id ; }

    @Override
    final void write()          { bpTree.getNodeManager().write(this) ; } 
    
    @Override
    final void promote()        { bpTree.getNodeManager().promote(this) ; }

    @Override
    final void release()        { bpTree.getNodeManager().release(this) ; } 

    @Override
    final void free()           { bpTree.getNodeManager().free(this) ; } 
    
    
    // ============ SEARCH
    
    /* 
     * Do a (binary) search of the node to find the record.
     *   Returns: 
     *     +ve or 0 => the index of the record 
     *     -ve => The insertion point : the immediate higher record or length as (-i-1)
     *  Convert to +ve and decend to find the RecordBuffer with the record in it. 
     */
    
    @Override final
    Record internalSearch(Record rec)
    {
        if ( CheckingNode ) internalCheckNode() ;
        BPTreePage page = findHere(rec) ;
        Record r = page.internalSearch(rec) ;
        page.release() ;
        return r ;
    }

    /** Find the next page to look at as we walk down the tree */
    private final BPTreePage findHere(Record rec)
    {
        int idx = findSlot(rec) ; 
        idx = convert(idx) ;
        // Find index, or insertion point (immediate higher slot) as (-i-1)
        // A key is the highest element of the records up to this point
        // so we search down at slot idx (between something smaller and something
        // larger.
        BPTreePage page = get(idx, READ) ;
        return page ;
    }
    
    // ============ INSERT
    
    /* Traverse this page, ensuring the node below is not full before
     * decending.  Therefore there is always space to do the actual insert.
     */
    
    @Override final
    Record internalInsert(Record record)
    {
        if ( logging() )
            log.debug(format("internalInsert: %s [%s]", record, this)) ;
        
        internalCheckNode() ;
        
        int idx = findSlot(record) ;

        if ( logging() )
            log.debug(format("internalInsert: idx=%d=>%d", idx, convert(idx))) ;
        
        idx = convert(idx) ;
        
        BPTreePage page = get(idx, READ) ;
        
        if ( logging() )
            log.debug(format("internalInsert: next: %s",page));
        
        if ( page.isFull() )
        {
            // Need to split the page before descending.
            split(idx, page) ;
            // Did it shift the insert index?
            // Examine the record we pulled up in the split.
            if ( Record.keyGT(record, records.get(idx)) )
            {
                page.release() ;
                // Yes.  Get the new (upper) page
                idx = idx+1 ;
                page = get(idx, READ) ;
            }
            internalCheckNode() ;
        }

        Record r = page.internalInsert(record) ;
        page.release() ;
        return r ;
    }

    private static int convert(int idx)
    {
        if ( idx >= 0 ) return idx ;
        return decodeIndex(idx) ;
    }

    // **** Old documentation
    /* Split a non-root node y, held at slot idx.
     * Do this by splitting the node in two (call to BPTree.split)
     * and insertting the new key/pointer pair.
     * WRITE(y)
     * WRITE(z)
     * WRITE(this)
     */
    private void split(int idx, BPTreePage y)
    {
        boolean logging = logging() ; 
        //logging = true ;
        if ( logging )
        {
            log.debug(format("split >> y.id=%d  this.id=%d idx=%d", y.getId(), this.id, idx)) ;
            log.debug("split --   "+y) ;
        }
            
        internalCheckNode() ;
        if ( CheckingNode )
        {
            if ( ! y.isFull() ) error("Node is not full") ;
            if ( this.ptrs.get(idx) != y.getId() )
            {
                int a = this.ptrs.get(idx) ;
                int b = y.getId();
                error("Node to be split isn't in right place [%d/%d]", a, b) ;
            }
        }
        internalCheckNodeDeep() ;
        
        promote() ;
        y.promote() ;
        
        Record splitKey = y.getSplitKey() ;
        splitKey = keyRecord(splitKey) ;
        
        if ( logging )
            log.debug(format("Split key: %s", splitKey)) ;

        BPTreePage z = y.split();
        if ( logging )
        {
            log.debug(format("Split: %s", y)) ;
            log.debug(format("Split: %s", z)) ;
        }
        
        // Key only.
        if ( splitKey.hasSeparateValue() )
        {
            // [Issue: FREC]
            // This creates a empty (null-byte-initialized) value array.
            splitKey = params.getKeyFactory().create(splitKey.getKey()) ;

            // Better: but an on-disk change. This is key only.
            // splitKey = params.getKeyFactory().createKeyOnly(splitKey) ;
        }        
        
        // Insert new node. "add" shuffle's up as well.
        records.add(idx, splitKey) ;
        ptrs.add(idx+1, z.getId()) ;
        count++ ;
        
        if ( logging )
        {
            log.debug("split <<   "+this) ;
            log.debug("split <<   "+y) ;
            log.debug("split <<   "+z) ;
        }
        
        y.write();
        z.write();
        z.release() ;
        // y.release() ; y release management done by caller.
        this.write();
        if ( CheckingTree )
        {
            if ( Record.keyNE(splitKey, y.maxRecord()) )
                error("Split key %d but max subtree %s", splitKey, y.maxRecord()) ;
            internalCheckNodeDeep() ;
        }
    }
    
    @Override final
    Record getSplitKey()
    {
        int ix = params.SplitIndex ;
        Record split = records.get(ix) ; 
        return split ;
    }
    
    /** Split this block - return the split record (key only needed) */
    @Override final
    BPTreePage split()
    {
        // Median record : will go in parent.
        int ix = params.SplitIndex ;

        // New block.
        BPTreeNode z = create(this.parent, isLeaf) ;
        
        // Leave the low end untouched and copy, and clear the high end.
        // z becomes the new upper node, not the lower node.
        // 'this' is the lower block.
        
        int maxRec = maxRecords() ;
        // Copy from top of y into z. 
        records.copy(ix+1, z.records, 0, maxRec-(ix+1)) ;
        records.clear(ix, maxRec-ix) ;                    // Clear copied and median slot 
        records.setSize(ix) ;                             // Reset size
        
        ptrs.copy(ix+1, z.ptrs, 0, params.MaxPtr-(ix+1)) ;
        ptrs.clear(ix+1, params.MaxPtr-(ix+1)) ;
        ptrs.setSize(ix+1) ;

        // Set sizes of subnodes
        setCount(ix) ;                          // Median is ix
        internalCheckNode() ;                   // y finished
        
        z.isLeaf = isLeaf ; 
        z.setCount(maxRec - (ix+1)) ;           // Number copied into z

        // Caller puts the blocks in split(int, BTreePage)
        z.internalCheckNode() ;
        return z ;
    }
    
    /* Split the root and leave the root block as the root.
     * This is the only point the height of the tree increases.
     *
     *  Allocate new blocks.
     *  Copy root low into left
     *  Copy root high into right
     *  Set counts.
     *  Create new root settings (two pointers, one key record) 
     *  WRITE(left)
     *  WRITE(right)
     *  WRITE(root)
     */
    private static void splitRoot(BPTreeNode root)
    {
        BPlusTree bpTree = root.bpTree ;
        
        if ( CheckingNode )
            if ( root.id != 0 ) root.error("Not root: %d (root is id zero)", root.id) ;
        root.internalCheckNode() ;
        root.promote() ;
        
        // Median record
        int splitIdx = root.params.SplitIndex ;
        Record rec = root.records.get(splitIdx) ;
        
        if ( logging() )
        {
            log.debug(format("** Split root %d (%s)", splitIdx, rec)) ;
            log.debug("splitRoot >>   "+root) ;
        }

        // New blocks.
        BPTreeNode left = create(bpTree, root.id, root.isLeaf) ;
        BPTreeNode right = create(bpTree, root.id, root.isLeaf) ;
        
        //int maxRecords = maxRecords() ;
        
        // New left
        root.records.copy(0, left.records, 0, splitIdx) ;
        root.ptrs.copy(0, left.ptrs, 0, splitIdx+1) ;
        left.count = splitIdx ;

        // New right
        root.records.copy(splitIdx+1, right.records, 0, root.maxRecords()-(splitIdx+1)) ;
        root.ptrs.copy(splitIdx+1, right.ptrs, 0, root.params.MaxPtr-(splitIdx+1)) ;
        right.count = root.maxRecords()-(splitIdx+1) ;
        
        if ( logging() )
        {
            log.debug("splitRoot -- left:   "+left) ;
            log.debug("splitRoot -- right:  "+right) ;
        }
        
        // So left.count+right.count = bTree.NumRec-1
        
        // Clear root by reformatting.  New root not a leaf.  Has count of 1 after formatting.
        BPTreeNodeMgr.formatForRoot(root, false) ;
        // Make a non-leaf.
        
        // Insert two subnodes, divided by the median record
        root.count = 1 ;
        
        root.records.add(0, rec) ;
        root.ptrs.setSize(2) ;
        root.ptrs.set(0, left.id) ;        // slot 0
        root.ptrs.set(1, right.id) ;       // slot 1
        
        if ( logging())
        {
            log.debug("splitRoot <<   "+root) ;
            log.debug("splitRoot <<   "+left) ;
            log.debug("splitRoot <<   "+right) ;
        }

        left.write() ;
        right.write() ;
        left.release() ;
        right.release() ;
        root.write() ;

        if ( CheckingTree )
            root.checkNodeDeep() ;
        else
            if ( CheckingNode )
            {
                root.internalCheckNode() ;
                left.internalCheckNode() ;
                right.internalCheckNode() ;
            }
    }

    // ============ DELETE

    /* Delete
     * Descend, making sure that the node is not minimum size at each descend.
     * If it is, rebalenace.
     */
    
    @Override final
    Record internalDelete(Record rec)
    {
        internalCheckNode() ;
        if ( logging() )
            log.debug(format("internalDelete(%s) : %s", rec, this)) ;
        
        int x = findSlot(rec) ;

        // If x is >= 0, may need to adjust this 
        int y = convert(x) ;
        BPTreePage page = get(y, READ) ;
        
        boolean thisWriteNeeded = false ;
        if ( page.isMinSize() )             // Can't be root - we decended in the get(). 
        {
            promote() ;
            page = rebalance(page, y) ;
            thisWriteNeeded = true ;
            // May have moved/removed at x.  Find again. YUK.
            x = findSlot(rec) ;
            if ( CheckingNode )
            {
                internalCheckNode() ;
                page.checkNode() ;
            }
            this.write() ;
        }
        
        // Go to bottom
        // Need to return the new key.
        Record r2 =  page.internalDelete(rec) ;
        if ( x >= 0 )
        {
            promote() ;
            // YUK
            records.set(x, keyRecord(page.maxRecord())) ;
            this.write() ;
        }

        page.release() ;
        return r2 ;
    }

    /* Reduce the root when it has only one pointer and no records.
     * Keep the root as id 0 so this is just a copy-up of the one child node.
     * WRITE(root)
     * RELEASE(old child)
     * This is the only point the height of the tree decreases.
     */ 
    
    private void reduceRoot()
    {
        if ( logging() )
            log.debug(format("reduceRoot >> %s", this)) ;
        
        if ( CheckingNode && ( ! isRoot() || count != 0 ) ) error("Not an empty root") ;
        
        if ( isLeaf )
        {
            if ( logging() )
                log.debug(format("reduceRoot << leaf root")) ;
            // Now empty leaf root.
            return ;
        }
        
        BPTreePage sub = get(0, WRITE) ;
        BPTreeNode n = cast(sub) ;
        // Can pull up into the root.
        // Leave root node in same block (rather than swap to new root).
        BPTreeNodeMgr.formatForRoot(this, n.isLeaf) ;
        n.records.copy(0, this.records, 0, n.count) ;
        n.ptrs.copy(0, ptrs, 0, n.count+1) ;
        isLeaf = n.isLeaf ;
        count = n.count ;
        this.write();
        // Free up.
        n.free() ;
        internalCheckNodeDeep() ;
        
        if ( logging() )
            log.debug(format("reduceRoot << %s", this)) ;
    }

    /* Rebalance node n at slot idx in parent (this)
     * The node will then be greater than the minimum size
     * and one-pass delete is then possible. 
     * 
     * try to shift right, from the left sibling (if exists)
     *   WRITE(left)
     *   WRITE(n)
     *   WRITE(this)
     * try to shift left, from the right sibling (if exists)
     *   WRITE(right)
     *   WRITE(n)
     *   WRITE(this)
     * else 
     *  merge with left or right sibling
     * Suboperations do all the write-back of nodes.
     */ 
    private BPTreePage rebalance(final BPTreePage node, int idx)
    {
        if ( logging() )
        {
            log.debug(format("rebalance(id=%d, idx=%d)", node.getId(), idx)) ;
            log.debug(format(">> this: %s", this)) ;
            log.debug(format(">> node: %s", node)) ;
        }
        internalCheckNode() ;
        promote() ;
        node.promote() ;
        
        BPTreePage left = null ;
        if ( idx > 0 )
            // [TxTDB:PATCH-UP] 
            // release on left
            left = get(idx-1, WRITE) ;
        
        // *** SHIFTING : need to change the marker record in the parent.
        // *** getHighRecord of lower block.
        
        if ( left != null && ! left.isMinSize() )
        {
            if ( logging() )
                log.debug("rebalance/shiftRight") ;
            
            // Move elements around.
            // Has not done "this.put()" yet.
            shiftRight(left, node, idx-1) ;
            
            if ( logging() )
                log.debug("<< rebalance: "+this) ;
            if ( CheckingNode )
            {
                left.checkNode() ;
                node.checkNode();
                this.internalCheckNode() ;
            }
            left.release() ;
            return node ;
        }

        BPTreePage right = null ;
        if ( idx < count )
            right = get(idx+1, WRITE) ;
        
        if ( right != null && ! right.isMinSize() )
        {
            if ( logging() )
                log.debug("rebalance/shiftLeft") ;

            shiftLeft(node, right, idx) ;

            if ( logging() )
                log.debug("<< rebalance: "+this) ;
            if ( CheckingNode )
            {
                right.checkNode();
                node.checkNode();
                this.internalCheckNode() ;
            }
            if ( left != null ) left.release() ;
            right.release() ;
            return node ;
        }

        // Couldn't shift.  Collapse two pages.  
        if ( CheckingNode && left == null && right == null) error("No siblings") ;

        if ( left != null )
        {
            if ( logging() )
                log.debug(format("rebalance/merge/left: left=%d n=%d [%d]", left.getId(), node.getId(), idx-1)) ;
            if ( CheckingNode && left.getId() == node.getId() ) 
                error("Left and n the same: %s", left) ;
            BPTreePage page = merge(left, node, idx-1) ;
            if ( right != null )
                // HACK : We didn't use it.
                right.release() ;
            return page ;
        }
        else
        {
            // left == null
            // rigth != null
            if ( logging() )
                log.debug(format("rebalance/merge/right: n=%d right=%d [%d]", node.getId(), right.getId(), idx)) ;
            if ( CheckingNode && right.getId() == node.getId() )
                error("N and right the same: %s",right ) ;
            BPTreePage page = merge(node, right, idx) ;
            return page ;
        }
    }
    
    /** Merge left with right ; fills left, frees right */
    private BPTreePage merge(BPTreePage left, BPTreePage right, int dividingSlot)
    {
        if ( logging() )
        {
            log.debug(format(">> merge(@%d): %s", dividingSlot, this)) ;
            log.debug(">> left:  "+left) ;
            log.debug(">> right: "+right) ;
        }
        
        // /==\ + key + /==\   ==>   /====\ 
        Record splitKey = records.get(dividingSlot) ;
        BPTreePage page = left.merge(right, splitKey) ;
        // Must release right (not done in merge)
        if ( logging() )
            log.debug("-- merge: "+page) ;

        left.write();
        right.free() ;
        
        if ( page == right )
            error("Returned page is not the left") ;
            
        // Depending on whether there is a gap or not.
        if ( CheckingNode )
        {
            if ( isLeaf )
            {
                // If two data blocks, then the split key is not inlcuded (it's alread ythere, with it value)
                // Size is N+N and max could be odd so N+N and N+N+1 are possible. 
                if ( left.getCount()+1 != left.getMaxSize() && left.getCount() != left.getMaxSize() )
                    error("Inconsistent data node size: %d/%d", left.getCount(), left.getMaxSize()) ;
            }
            else if ( ! left.isFull() )
            {
                // If not two data blocks, the left side should now be full (N+N+split) 
                error("Inconsistent node size: %d/%d", left.getCount(), left.getMaxSize()) ;
            }
        }

        // Remove from parent (which is "this")
        shuffleDown(dividingSlot) ;
        this.write();
        internalCheckNodeDeep() ;
        if ( logging() )
        {
            log.debug("<< merge: "+this) ;
            log.debug("<< left:  "+left) ;
        }
        return left ;
        
//         }
//        else if ( page == right )
//        {
//            // Never happnes?
//            // Depending on whether there is a gap or not.
//            if ( CheckingNode && ! right.isFull() )
//                error("Inconsistent node size: %d", right.getCount()) ; 
//            // Remove from parent (which is "this")
//            shuffleDown(dividingSlot) ;
//            right.put() ;
//            left.release() ;
//            this.put() ;
//            internalCheckNodeDeep() ;
//            if ( logging() )
//            {
//                log.debug("<< merge: "+this) ;
//                log.debug("<< right:  "+right) ;
//            }
//            return right ;
//        }
//        else
//        {
//            error("merge: returned page is neither left nor right") ;
//            return null ; 
//        }
    }

    @Override
    BPTreePage merge(BPTreePage right, Record splitKey)
    {
        return merge(this, splitKey, cast(right)) ;
    }

    private static BPTreeNode merge(BPTreeNode left, Record splitKey, BPTreeNode right)
    {
        // Merge blocks - does not adjust the parent.
        // Copy right to top of left.
        // Caller releases 'right' (needed for testing code).

        left.records.add(splitKey) ;
        
        // Copy over right to top of left.
        right.records.copyToTop(left.records) ;
        right.ptrs.copyToTop(left.ptrs) ;
        
        // Update count
        left.count = left.count + right.count + 1  ;
        left.internalCheckNode() ;
        
        right.records.clear();
        right.ptrs.clear();
        return left ;
    }

    private void shiftRight(BPTreePage left, BPTreePage right, int i)
    {
        if ( logging() )
        {
            log.debug(">> shiftRight: this:  "+this) ;
            log.debug(">> shiftRight: left:  "+left) ;
            log.debug(">> shiftRight: right: "+right) ;
        }
        Record r1 = records.get(i) ;
        Record r2 = left.shiftRight(right, r1) ;
        r2 = keyRecord(r2) ;
        this.records.set(i, r2) ;
        
        left.write() ;
        right.write() ;
        // Do later -- this.put();
        if ( logging() )
        {
            log.debug("<< shiftRight: this:  "+this) ;
            log.debug("<< shiftRight: left:  "+left) ;
            log.debug("<< shiftRight: right: "+right) ;
        }
    }

    private void shiftLeft(BPTreePage left, BPTreePage right, int i)
    {
        if ( logging() )
        {
            log.debug(">> shiftLeft: this:  "+this) ;
            log.debug(">> shiftLeft: left:  "+left) ;
            log.debug(">> shiftLeft: right: "+right) ;
        }
        Record r1 = records.get(i) ;
        Record r2 = left.shiftLeft(right, r1) ;
        r2 = keyRecord(r2) ;
        this.records.set(i, r2) ;
        
        left.write() ;
        right.write() ;
        // Do this later - this.put();
        if ( logging() )
        {
            log.debug("<< shiftLeft: this:  "+this) ;
            log.debug("<< shiftLeft: left:  "+left) ;
            log.debug("<< shiftLeft: right: "+right) ;
        }
    }

    @Override
    Record shiftRight(BPTreePage other, Record splitKey)
    {
        BPTreeNode node = cast(other) ;
        if ( CheckingNode )
        {
            if (count == 0 ) error("Node is empty - can't shift a slot out") ;
            if ( node.isFull() ) error("Destination node is full") ;
        }
        // Records: promote moving element, replace with splitKey
        Record r = this.records.getHigh() ;
        this.records.removeTop() ;
        node.records.add(0, splitKey) ;
        
        // Pointers just shift
        this.ptrs.shiftRight(node.ptrs) ; 
        
        this.count -- ;
        node.count ++ ;
        this.internalCheckNode() ;
        node.internalCheckNode() ;
        return r ;
    }

    @Override
    Record shiftLeft(BPTreePage other, Record splitKey)
    {
        BPTreeNode node = cast(other) ;
        if ( CheckingNode )
        {
            if ( count == 0 ) error("Node is empty - can't shift a slot out") ;
            if ( isFull() ) error("Destination node is full") ;
        }
        Record r = node.records.getLow() ;
        // Records: promote moving element, replace with splitKey
        this.records.add(splitKey) ;
        node.records.shiftDown(0) ;
        
        // Pointers just shift
        this.ptrs.shiftLeft(node.ptrs) ;
        
        this.count ++ ;
        node.count -- ;
        return r ;
    }

    private void shuffleDown(int x)
    {
        // x is the index in the parent and may be on eover the end. 
        if ( logging() )
        {
            log.debug(format("ShuffleDown: i=%d count=%d MaxRec=%d", x, count, maxRecords())) ;
            log.debug("shuffleDown >> "+this) ;
        }

        if ( CheckingNode && x >= count ) error("shuffleDown out of bounds") ;

        // Just the top to clear

        if ( x == count-1 )
        {
            records.removeTop() ;
            ptrs.removeTop() ;

            count-- ;
            if ( logging() )
            {
                log.debug("shuffleDown << Clear top") ;
                log.debug("shuffleDown << "+this) ;
            }
            internalCheckNode() ;
            return ;
        }

        // Shuffle down. Removes key and pointer just above key.
        
        records.shiftDown(x) ;
        ptrs.shiftDown(x+1) ;  
        count -- ;
        if ( logging() )
            log.debug("shuffleDown << "+this) ;
        internalCheckNode() ;
    }

    // ---- Utilities

    private final BPTreeNode cast(BPTreePage other)
    {
        try { return (BPTreeNode)other  ; }
        catch (ClassCastException ex) { error("Wrong type: "+other) ; return null ; }
    }

    final int findSlot(Record rec)
    {
        int x = records.find(rec) ;
        return x ;
    }
    
    final boolean isRoot()
    {
        // No BPT remembered root node currently 
        //if ( bpTree.root == this ) return true ;
        return this.id == BPlusTreeParams.RootId ;
    }

    private Record keyRecord(Record record)
    {
        return bpTree.getRecordFactory().createKeyOnly(record) ;
    }

    // Fixup/remove? 
    private final int maxRecords() { return params.MaxRec ; }
    
    @Override
    final boolean isFull()
    {
        if ( CheckingNode && count > maxRecords()  )
            error("isFull: Moby block: %s", this) ;
        
        // Count is of records.  
        return count >= maxRecords() ;
    }
    
    /** Return true if there are no keys here or below this node */
    @Override
    final boolean hasAnyKeys()
    {
        if ( this.count > 0 ) 
            return true ;
        if ( ! isRoot() )
            return false ;
        
        // The root can be zero size and point to a single data block.
        int id = this.getPtrBuffer().getLow() ;
        BPTreePage page = get(id, READ) ;
        boolean b = page.hasAnyKeys() ;  
        page.release() ;
        return b ;
    }


    
    @Override
    final boolean isMinSize()
    {
        int min = params.getMinRec() ;
        if ( CheckingNode && count < min  )
            error("isMinSize: Dwarf block: %s", this) ;
        
        return count <= min ;
    }
    
    // ========== Other
    
    @Override
    public String toString()
    {
        StringBuilder b = new StringBuilder() ;
        if ( isLeaf )
            b.append("LEAF: ") ;
        else
            b.append("NODE: ") ;
        String labelStr = "??" ;
        if ( parent >= 0 )
            labelStr = Integer.toString(parent) ;
        else if ( parent == BPlusTreeParams.RootParent )
            labelStr = "root" ;
        if ( isLeaf )
            labelStr = labelStr+"/leaf" ;
        
        b.append(String.format("%d [%s] (size %d) -- ", id, labelStr, count)) ;
        for ( int i = 0 ; i < maxRecords() ; i++ )
        {
            b.append(childStr(i)) ;
            b.append(" (") ;
            b.append(recstr(records, i)) ;
            b.append(") ") ;
        }
        b.append(childStr(params.HighPtr)) ;
        return b.toString() ;
    }

    private final String recstr(RecordBuffer records, int idx)
    {
        if ( records.isClear(idx) )
            return "----" ;

        Record r = records._get(idx) ;
        return r.toString() ;
    }
    
    public void dump()
    {
        dump(IndentedWriter.stdout) ;
    }

    public void dump(IndentedWriter out)
    {
        output(out) ;
        out.ensureStartOfLine() ;
        out.flush();
    }
    
    public String dumpToString()
    {
        IndentedLineBuffer buff = new IndentedLineBuffer() ;
        output(buff) ;
        return buff.asString() ;
    }
    
    
    @Override
    public void output(IndentedWriter out)
    {
        out.print(toString()) ;
        out.incIndent() ;
        for ( int i = 0 ; i < count+1 ; i++ )
        {
            out.println();
            BPTreePage page = get(i, READ) ;
            page.output(out) ;
            page.release() ;
            
        }
        out.decIndent() ;
    }

//    private void indent(PrintStream out, int x)
//    {
//        for ( int i = 0 ; i < x ; i++ )
//            out.print("  ") ;
//    }

    private String childStr(int i)
    {
        if ( i >= ptrs.size() )
            return "*" ;
        int x = ptrs.get(i) ;
        return Integer.toString(x) ; 
    }
    
    // =========== Checking
    // internal checks - only if checking
    
    // Check node does not assume a valid tree - may be in mid-operation. 
    private final void internalCheckNode()
    { 
        if ( CheckingNode )
            checkNode(null, null) ;
    }
    
    private final void internalCheckNodeDeep()
    {
        if ( ! CheckingTree )
            return ;
        checkNodeDeep() ;
    }

    @Override
    final void checkNode()
    {
        checkNode(null, null) ;
    }

    @Override
    final void checkNodeDeep()
    {
        if ( isRoot() )
        {
//            if ( !isLeaf && count == 0 )
//                error("Root is of size zero (one pointer) but not a leaf") ;
            if ( parent != BPlusTreeParams.RootParent )
                error("Root parent is wrong") ;
            //            if ( count == 0 )
            //                return ;
        }
        checkNodeDeep(null, null) ;
    }

    // Checks of a single node - no looking at children
    // min - inclusive; max - inclusive (allows for duplicates) 
    final private void checkNode(Record min, Record max)
    {
        if ( count != records.size() )
            error("Inconsistent: id=%d, count=%d, records.size()=%d : %s", id, count, records.size(), this) ; 
        
        if ( ! isLeaf && count+1 != ptrs.size() )
            error("Inconsistent: id=%d, count+1=%d, ptrs.size()=%d ; %s", id, count+1, ptrs.size(), this) ; 
    
        // No BPT remembered root node currently 
        //if ( bpTree.root != null && !isRoot() && count < params.MinRec)
        if ( !isRoot() && count < params.MinRec)
        {
            //warning("Runt node: %s", this) ;
            error("Runt node: %s", this) ;
        }
        if ( !isRoot() && count > maxRecords() ) error("Over full node: %s", this) ;
        if ( ! isLeaf && parent == id ) error("Parent same as id: %s", this) ;  
        Record k = min ;
    
        // Test records in the allocated area
        for ( int i = 0 ; i < count ; i++ )
        {
            if ( records.get(i) == null ) error("Node: %d : Invalid record @%d :: %s",id, i, this) ;
            if ( k != null && keyGT(k, records.get(i)) ) 
            {
                Record r = records.get(i) ; 
                //keyGT(k, r) ;
                error("Node: %d: Not sorted (%d) (%s, %s) :: %s ", id, i, k, r, this) ;
            }
            k = records.get(i) ;
        }
        
        if ( k != null && max != null && keyGT(k,max) )
            error("Node: %d - Record is too high (max=%s):: %s", id, max, this) ;
        
        if ( SystemTDB.NullOut )
        {
            // Test records in the free area
            for ( int i = count ; i < maxRecords() ; i++ )
            {       
                if ( ! records.isClear(i) )
                    error("Node: %d - not clear (idx=%d) :: %s", id, i, this) ;
            }
        }
        
        // Pointer checks.
        int i = 0 ;
        // Check not empty at bottom. 
        for ( ; i < count+1 ; i++ )
        {
            if ( ptrs.get(i) < 0 ) 
                error("Node: %d: Invalid child pointer @%d :: %s", id, i , this) ;

            // This does BlockIO so distrubs tracking. 
            if ( CheckingTree && isLeaf )
            {
                int ptr = ptrs.get(i) ;
                BPTreeRecords records = bpTree.getRecordsMgr().getRead(ptr) ;
                int id = records.getId() ;
                if ( id != ptrs.get(i) )
                    error("Records: Block @%d has a different id: %d :: %s", id, i, this) ;
                int link = records.getLink() ;
                // Don't check if +1 does not exist.
                if ( i != count )
                {
                    BPTreeRecords page = bpTree.getRecordsMgr().getRead(ptrs.get(i)) ;
                    int id2 = page.getLink() ;
                    if ( link != id2 )
                        error("Records: Link not to next block @%d/@%d has a different id: %d :: %s", id, id2, i, records) ;
                    bpTree.getRecordsMgr().release(page) ;
                }
                records.release() ;
            }
            
        }
            
        // Check empty is empty
        if ( SystemTDB.NullOut )
        {
            int x = params.MaxPtr ;
            for ( ; i < x ; i ++ )
            {
                if ( ! ptrs.isClear(i) )
                    error("Node: %d: Unexpected pointer @%d :: %s", id, i, this) ;
            }
        }
    }

    private void checkNodeDeep(Record min, Record max)
    {
        checkNode(min, max) ;
    
        // Check pointers.
        int limit = (count == 0) ? 0 : count+1 ; 
        
        for ( int i = 0 ; i < limit ; i++ )
        {
            Record min1 = min ;
            Record max1 = max ;
            BPTreePage n = get(i, READ) ;
            
            if ( i != count )
            {
                Record keySubTree = n.getHighRecord() ;     // high key in immediate child 
                Record keyHere = records.get(i) ;           // key in this
                
                if ( keySubTree == null )
                    error("Node: %d: Can't get high record from %d", id, n.getId()) ;
                
                if ( keySubTree.getKey() == null )
                    error("Node: %d: Can't get high record is missing it's key from %d", id, n.getId()) ;
                    
                if ( keyHere == null )
                    error("Node: %d: record is null", id) ;
                
                if ( keyHere.getKey() == null )
                    error("Node: %d: Record key is null", id) ;
                
                if ( keyGT(keySubTree, keyHere) )
                    error("Node: %d: Child key %s is greater than this key %s", id, keySubTree, keyHere) ;
                
                Record keyMax = n.maxRecord() ;             // max key in subTree
                Record keyMin = n.minRecord() ;
                
                if ( keyNE(keyHere, keyMax) )
                    error("Node: %d: Key %s is not the max [%s] of the sub-tree idx=%d", id, keyHere, keyMax, i) ;
                
                if ( min != null && keyGT(min, keyMin) )
                    error("Node: %d: Minimun for this node should be %s but it's %s", id, min, keyMin) ;
                if ( max != null && keyLT(max, keyMax) )
                    error("Node: %d: Maximum for this node should be %s but it's %s", id, max, keyMax) ;
                if ( min != null && keyGT(min, keyHere) )
                    error("Node: %d: Key too small: %s - min should be %s", id, keyHere, min) ;
                // keyHere == keyMax ??
                if ( max != null && keyLT(max, keyHere) )
                    error("Node: %d: Key too large: %s - max should be %s", id, keyHere, max) ;
            }

            // Look deeper.
            if ( ! ( n instanceof BPTreeNode ) )
            {
                // Records.
                n.checkNodeDeep() ;
                n.release() ;
                continue ;
            }
            
            // Valid pointer?
            if ( isLeaf )
            {
                if ( ! bpTree.getRecordsMgr().getBlockMgr().valid(ptrs.get(i)) )
                    error("Node: %d: Dangling ptr (records) in block @%d :: %s", id, i, this) ;
            }
            else
            {
                if ( ! bpTree.getNodeManager().valid(ptrs.get(i)) )
                    error("Node: %d: Dangling ptr in block @%d :: %s", id, i, this) ;
            }

            // Calc new min/max.
            if ( i == 0 ) 
                max1 = records.get(0) ;
            else if ( i == count )
            {
                min1 = records.get(count-1) ;
                max1 = null ;
            }
            else
            { 
                min1 = records.get(i-1) ;
                max1 = records.get(i) ;
            }
//            if ( n.parent != id )
//                error("Node: %d [%d]: Parent/child mismatch :: %s", id, n.parent, this) ;
            
            ((BPTreeNode)n).checkNodeDeep(min1, max1) ;
            n.release() ;
        }
    }

    private static boolean logging()
    {
        return BPlusTreeParams.logging(log) ;
    }
    
    private void warning(String msg, Object... args)
    {
        msg = format(msg, args) ;
        System.out.println("Warning: "+msg) ;
        System.out.flush();
    }
    
    private void error(String msg, Object... args)
    {
        msg = format(msg, args) ;
        System.out.println() ;
        System.out.println(msg) ;
        System.out.flush();
        try { dumpBlocks() ; } catch (Exception ex) {}
        throw new BPTreeException(msg) ;
    }
    
    private void dumpBlocks()
    {
        System.out.println("---Nodes") ;
        bpTree.getNodeManager().dump() ;
        System.out.println("---Records") ;
        bpTree.getRecordsMgr().dump() ;
        System.out.println("---") ;
        System.out.flush();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy