All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.tdb.index.bplustree.BPlusTree Maven / Gradle / Ivy

Go to download

TDB is a storage subsystem for Jena and ARQ, it is a native triple store providing persistent storage of triples/quads.

There is a newer version: 4.10.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.tdb.index.bplustree;

import static org.apache.jena.tdb.index.bplustree.BPlusTreeParams.CheckingNode ;
import static org.apache.jena.tdb.index.bplustree.BPlusTreeParams.CheckingTree ;

import java.util.Iterator ;

import org.apache.jena.atlas.io.IndentedWriter ;
import org.apache.jena.atlas.iterator.Iter ;
import org.apache.jena.tdb.base.block.BlockMgr ;
import org.apache.jena.tdb.base.block.BlockMgrFactory ;
import org.apache.jena.tdb.base.block.BlockMgrTracker ;
import org.apache.jena.tdb.base.record.Record ;
import org.apache.jena.tdb.base.record.RecordFactory ;
import org.apache.jena.tdb.base.recordbuffer.RecordBufferPage ;
import org.apache.jena.tdb.base.recordbuffer.RecordBufferPageMgr ;
import org.apache.jena.tdb.base.recordbuffer.RecordRangeIterator ;
import org.apache.jena.tdb.index.RangeIndex ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;

/** B-Tree converted to B+Tree
 *
 * B-Tree taken from:
 * Introduction to Algorithms, Second Edition
 * Chapter 18: B-Trees
 * by Thomas H. Cormen, Charles E. Leiserson,
 *    Ronald L. Rivest and Clifford Stein
 *
 * Includes implementation of removal.
 *
 * Notes:
 * Stores "records", which are a key and value (the value may be null).
 *
 * In this B+Tree implementation, the (key,value) pairs are held in
 * RecordBuffer, which wrap a ByteBuffer that only has records in it.
 * BPTreeRecords provides the B+Tree view of a RecordBuffer. All records
 * are in RecordBuffer - the "tree" part is an index for finding the right
 * page. The tree only holds keys, copies from the (key, value) pairs in
 * the RecordBuffers.
 *
 * Notes:
 *
 * The version above splits nodes on the way down when full,
 * not when needed where a split can bubble up from below.
 * It means it only ever walks down the tree on insert.
 * Similarly, the delete code ensures a node is suitable
 * before descending.
 *
 * Variations:
 * In this impl, splitRoot leaves the root node in place.
 * The root is always the same block.
 */

public class BPlusTree implements Iterable, RangeIndex
{
    /*
     * Insertion:
     * There are two styles for handling node splitting.
     *
     * Classically, when a leaf is split, the separating key is inserted into
     * the parent, which may itself be full and so that is split, etc propagting
     * up to the root (splitting the root is the only time the depth of the
     * BTree increases). This involves walking back up the tree.
     *
     * It is more convenient to have a spare slot in a tree node, so that the
     * new key can be inserted, then the keys and child pointers split.
     *
     * Modification: during insertion, splitting is applied to any full node
     * traversed on the way down, resulting in any node passed through having
     * some space for a new key. When splitting starts at a leaf, only the
     * immediate parent is changed because it must have space for the new key.
     * There is no cascade back to the top of the tree (it would have happened on
     * the way down); in other words, splitting is done early. This is insertion
     * in a single downward pass.
     *
     * When compared to the classic approach including the extra slot for
     * convenient inserting, the space useage is approximately the same.
     *
     * Deletion:
     * Deletion always occurs at a leaf; if it's an internal node, swap the key
     * with the right-most left key (predecessor) or left-most right key (successor),
     * and delete in the leaf.
     *
     * The classic way is to propagate node merging back up from the leaf.  The
     * book outlines a way that checks that a nod eis delte-suitable (min+1 in size)
     * on the way down.  This is implemented here; this is one-pass(ish).
     *
     * Variants:
     * http://en.wikipedia.org/wiki/Btree
     *
     * B+Tree: Tree contains keys, and only the leaves have the values. Used for
     * secondary indexes (external pointers) but also for general on-disk usage
     * because more keys are packed into a level. Can chain the leaves for a
     * sorted-order traversal.
     *
     * B*Tree: Nodes are always 2/3 full. When a node is full, keys are shared adjacent
     * nodes and if all they are all full do 2 nodes get split into 3 nodes.
     * Implementation wise, it is more complicated; can cause more I/O.
     *
     * B#Tree: A B+Tree where the operations try to swap nodes between immediate
     * sibling nodes instead of immediately splitting (like delete, only on insert).
     */

    private static Logger log = LoggerFactory.getLogger(BPlusTree.class) ;

    private int rootIdx = BPlusTreeParams.RootId ;
    ///*package*/ BPTreeNode root ;
    private BPTreeNodeMgr nodeManager ;
    private BPTreeRecordsMgr recordsMgr;
    private BPlusTreeParams bpTreeParams ;

    /** Create the in-memory structures to correspond to
     * the supplied block managers for the persistent storage.
     * Initialize the persistent storage to the empty B+Tree if it does not exist.
     * This is the normal way to create a B+Tree.
     */
    public static BPlusTree create(BPlusTreeParams params, BlockMgr blkMgrNodes, BlockMgr blkMgrLeaves)
    {
        BPlusTree bpt = attach(params, blkMgrNodes, blkMgrLeaves) ;
        bpt.createIfAbsent() ;
        return bpt ;
    }

    /** Create the in-memory structures to correspond to
     *  the supplied block managers for the persistent storage.
     *  Does not initialize the B+Tree - it assumes the block managers
     *  correspond to an existing B+Tree.
     */
    public static BPlusTree attach(BPlusTreeParams params, BlockMgr blkMgrNodes, BlockMgr blkMgrRecords)
    {
        return new BPlusTree(params, blkMgrNodes, blkMgrRecords) ;
    }

    /** (Testing mainly) Make an in-memory B+Tree, with copy-in, copy-out block managers */
    public static BPlusTree makeMem(int order, int minRecords, int keyLength, int valueLength)
    { return makeMem(null, order, minRecords, keyLength, valueLength) ; }

    /** (Testing mainly) Make an in-memory B+Tree, with copy-in, copy-out block managers */
    public static BPlusTree makeMem(String name, int order, int minRecords, int keyLength, int valueLength)
    {
        BPlusTreeParams params = new BPlusTreeParams(order, keyLength, valueLength) ;

        int blkSize ;
        if ( minRecords > 0 )
        {
            int maxRecords = 2*minRecords ;
            //int rSize = RecordBufferPage.HEADER+(maxRecords*params.getRecordLength()) ;
            blkSize = RecordBufferPage.calcBlockSize(params.getRecordFactory(), maxRecords) ;
        }
        else
            blkSize = params.getCalcBlockSize() ;

        BlockMgr mgr1 = BlockMgrFactory.createMem(name+"(nodes)", params.getCalcBlockSize()) ;
        BlockMgr mgr2 = BlockMgrFactory.createMem(name+"(records)", blkSize) ;

        BPlusTree bpTree = BPlusTree.create(params, mgr1, mgr2) ;
        return bpTree ;
    }

    /** Debugging */
    public static BPlusTree addTracking(BPlusTree bpTree)
    {
        BlockMgr mgr1 = bpTree.getNodeManager().getBlockMgr() ;
        BlockMgr mgr2 = bpTree.getRecordsMgr().getBlockMgr() ;
//        mgr1 = BlockMgrTracker.track("BPT/Nodes", mgr1) ;
//        mgr2 = BlockMgrTracker.track("BPT/Records", mgr2) ;
        mgr1 = BlockMgrTracker.track(mgr1) ;
        mgr2 = BlockMgrTracker.track(mgr2) ;

        return BPlusTree.attach(bpTree.getParams(), mgr1, mgr2) ;
    }

    private BPlusTree(BPlusTreeParams params, BlockMgr blkMgrNodes, BlockMgr blkMgrRecords)
    {
        // Consistency checks.
        this.bpTreeParams = params ;
        this.nodeManager = new BPTreeNodeMgr(this, blkMgrNodes) ;
        RecordBufferPageMgr recordPageMgr = new RecordBufferPageMgr(params.getRecordFactory(), blkMgrRecords) ;
        recordsMgr = new BPTreeRecordsMgr(this, recordPageMgr) ;
    }

    /** Create if does not exist */
    private void createIfAbsent()
    {
        // This fixes the root to being block 0
        if ( ! nodeManager.valid(BPlusTreeParams.RootId) )
        //if ( ! nodeManager.getBlockMgr().isEmpty() )
        {
            // Create as does not exist.
            // [TxTDB:PATCH-UP]
            // ** Better: seperate "does it exist? - create statics used in factory"
            startUpdateBlkMgr() ;
            // Fresh BPlusTree
            rootIdx = nodeManager.createEmptyBPT() ;
            if ( rootIdx != 0 )
                throw new InternalError() ;

            if ( CheckingNode )
            {
                BPTreeNode root = nodeManager.getRead(rootIdx, BPlusTreeParams.RootParent) ;
                root.checkNodeDeep() ;
                root.release() ;
            }

            // Sync created blocks to disk - any caches are now clean.
            nodeManager.getBlockMgr().sync() ;
            recordsMgr.getBlockMgr().sync() ;

            // Cache : not currently done - root is null
            //setRoot(root) ;
            finishUpdateBlkMgr() ;
        }
    }

    private BPTreeNode getRoot()
    {
        // No caching here.
        BPTreeNode root = nodeManager.getRoot(rootIdx) ;
        //this.root = root ;
        return root ;
    }

    private void releaseRoot(BPTreeNode rootNode)
    {
//        // [TxTDB:PATCH-UP]
//        if ( root != null )
//        {
//            root.release() ;
//            //nodeManager.release(rootNode) ;
//        }
//        if ( root != null && rootNode != root )
//            log.warn("Root is not root!") ;

        rootNode.release() ;
    }

    private void setRoot(BPTreeNode node)
    {
        //root = node ;
    }

    /** Get the parameters describing this B+Tree */
    public BPlusTreeParams getParams()     { return bpTreeParams ; }

    /** Only use for careful manipulation of structures */
    public BPTreeNodeMgr getNodeManager()          { return nodeManager ; }
    /** Only use for careful manipulation of structures */
    public BPTreeRecordsMgr getRecordsMgr()     { return recordsMgr ; }

    @Override
    public RecordFactory getRecordFactory()
    {
        return bpTreeParams.recordFactory ;
    }

    @Override
    public Record find(Record record)
    {
        startReadBlkMgr() ;
        BPTreeNode root = getRoot() ;
        Record v = BPTreeNode.search(root, record) ;
        releaseRoot(root) ;
        finishReadBlkMgr() ;
        return v ;
    }

    @Override
    public boolean contains(Record record)
    {
        Record r = find(record) ;
        return r != null ;
    }

    @Override
    public Record minKey()
    {
        startReadBlkMgr() ;
        BPTreeNode root = getRoot() ;
        Record r = root.minRecord();
        releaseRoot(root) ;
        finishReadBlkMgr() ;
        return r ;
    }

    @Override
    public Record maxKey()
    {
        startReadBlkMgr() ;
        BPTreeNode root = getRoot() ;
        Record r = root.maxRecord() ;
        releaseRoot(root) ;
        finishReadBlkMgr() ;
        return r ;
    }

    @Override
    public boolean add(Record record)
    {
        return addAndReturnOld(record) == null ;
    }

    /** Add a record into the B+Tree */
    public Record addAndReturnOld(Record record)
    {
        startUpdateBlkMgr() ;
        BPTreeNode root = getRoot() ;
        Record r = BPTreeNode.insert(root, record) ;
        if ( CheckingTree ) root.checkNodeDeep() ;
        releaseRoot(root) ;
        finishUpdateBlkMgr() ;
        return r ;
    }

    @Override
    public boolean delete(Record record)
    { return deleteAndReturnOld(record) != null ; }

    public Record deleteAndReturnOld(Record record)
    {
        startUpdateBlkMgr() ;
        BPTreeNode root = getRoot() ;
        Record r = BPTreeNode.delete(root, record) ;
        if ( CheckingTree ) root.checkNodeDeep() ;
        releaseRoot(root) ;
        finishUpdateBlkMgr() ;
        return r ;
    }

    @Override
    public Iterator iterator()
    {
        startReadBlkMgr() ;
        BPTreeNode root = getRoot() ;
        Iterator iter = iterator(root) ;
        releaseRoot(root) ;
        finishReadBlkMgr() ;
        return iter ;
    }

    @Override
    public Iterator iterator(Record fromRec, Record toRec)
    {
        startReadBlkMgr() ;
        BPTreeNode root = getRoot() ;
        Iterator iter = iterator(root, fromRec, toRec) ;
        releaseRoot(root) ;
        finishReadBlkMgr() ;
        // Note that this end the read-part (find the start), not the iteration.
        // Iterator read blocks still get handled.
        return iter ;
    }

    /** Iterate over a range of fromRec (inclusive) to toRec (exclusive) */
    private static Iterator iterator(BPTreeNode node, Record fromRec, Record toRec)
    {
        // Look for starting RecordsBufferPage id.
        int id = BPTreeNode.recordsPageId(node, fromRec) ;
        if ( id < 0 )
            return Iter.nullIter() ;
        RecordBufferPageMgr pageMgr = node.getBPlusTree().getRecordsMgr().getRecordBufferPageMgr() ;
        // No pages are active at this point.
        return RecordRangeIterator.iterator(id, fromRec, toRec, pageMgr) ;
    }

    private static Iterator iterator(BPTreeNode node)
    {
        return iterator(node, null, null) ;
    }

    // Internal calls.
    private void startReadBlkMgr()
    {
        nodeManager.startRead() ;
        recordsMgr.startRead() ;
    }

    private void finishReadBlkMgr()
    {
        nodeManager.finishRead() ;
        recordsMgr.finishRead() ;
    }

    private void startUpdateBlkMgr()
    {
        nodeManager.startUpdate() ;
        recordsMgr.startUpdate() ;
    }

    private void finishUpdateBlkMgr()
    {
        nodeManager.finishUpdate() ;
        recordsMgr.finishUpdate() ;
    }

    @Override
    public boolean isEmpty()
    {
        startReadBlkMgr() ;
        BPTreeNode root = getRoot() ;
        boolean b = ! root.hasAnyKeys() ;
        releaseRoot(root) ;
        finishReadBlkMgr() ;
        return b ;
    }

    private static int SLICE = 10000 ;
    @Override
    public void clear() {
        Record[] records = new Record[SLICE] ;
        while(true) {
            Iterator iter = iterator() ;
            int i = 0 ;
            for ( i = 0 ; i < SLICE ; i++ ) {
                if ( ! iter.hasNext() )
                    break ;
                Record r = iter.next() ;
                records[i] = r ;
            }
            if ( i == 0 )
                break ;
            for ( int j = 0 ; j < i ; j++ ) {
                delete(records[j]) ;
                records[j] = null ;
            }
        }
    }

    @Override
    public void sync()
    {
        if ( nodeManager.getBlockMgr() != null )
            nodeManager.getBlockMgr().sync() ;
        if ( recordsMgr.getBlockMgr() != null )
            recordsMgr.getBlockMgr().sync() ;
    }

    @Override
    public void close()
    {
        if ( nodeManager.getBlockMgr() != null )
            nodeManager.getBlockMgr().close()   ;
        if ( recordsMgr.getBlockMgr() != null )
            recordsMgr.getBlockMgr().close() ;
    }

//    public void closeIterator(Iterator iter)
//    {
//    }

    @Override
    public long size()
    {
        Iterator iter = iterator() ;
        return Iter.count(iter) ;
    }

    @Override
    public void check()
    {
        getRoot().checkNodeDeep() ;
    }

    public void dump()
    {
        getRoot().dump() ;
    }

    public void dump(IndentedWriter out)
    {
        getRoot().dump(out) ;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy