All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.btree.IndexSegmentPlan Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.btree;

import org.apache.log4j.Logger;

import com.bigdata.btree.IndexMetadata.Options;

/**
 * A plan for building a B+-Tree based on an input branching factor and #of
 * entries.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
public class IndexSegmentPlan {

    protected static final transient Logger log = Logger
            .getLogger(IndexSegmentPlan.class);

    /**
     * A summary representation of the index build plan. The branching factor
     * and the #of entries are the inputs.  The outputs include the height of
     * the B+Tree that should be generated and the #of nodes and leaves that
     * will exist in that B+Tree. 
     */
    public String toString() {

        return getClass() + "{branchingFactor=" + m + ", nentries=" + nentries
                + ", height=" + height + ", nnodes=" + nnodes + ", nleaves="
                + nleaves + "}";
        
    }
    
    /**
     * The branching factor of the output tree (input).
     */
    final public int m;
    
    /**
     * The minimum #of values that may be placed into non-root leaf (and
     * also the minimum #of children that may be placed into a non-root
     * node). (the minimum capacity).
     */
    final public int m2; 
    
    /**
     * The #of entries in the btree (input).
     */
    final public long nentries;
    
    /**
     * The #of leaves that will exist in the output tree. When nleaves == 1
     * the output tree will consist of a root leaf. In this case we do not
     * open a temporary file for the nodes since there will not be any.
     */
    final public long nleaves; 

    /**
     * The #of non-leaf nodes in the output tree.
     */
    final public long nnodes;
    
    /**
     * The height of the output tree (#of levels in the output tree).
     */
    final public int height;

    /**
     * The #of entries to place into each leaf. The array is dimensioned to
     * {@link #nleaves}. This is a convenience reference to the last array in
     * {@link #numInNode}.
     */
    final public int[] numInLeaf;
    
    /**
     * The #of nodes at each level of the tree, including the level containing
     * the leaves.
     * 
     * @see #nleaves, which is the #of leaves in the output tree.
     */
    final public long[] numInLevel;

    /**
     * The #of children / values to place into each node in each level of the
     * output tree. The first index is the level in the tree, starting from
     * level zero which is the root and increasing through level [height+1],
     * which is the level containing the leaves of the output tree.
     * 
     * @see #numInLeaf numInLeaf, which is a reference to the last element of
     *      this array.
     */
    final public int[][] numInNode;

    /**
     * Create a plan for building a B+-Tree. The plan has only these two inputs.
     * Everything else about the plan is deterministic based on those values.
     * 
     * @param m
     *            The branching factor of the output tree (#of keys/values for a
     *            leaf or the #of children for a node).
     * @param nentries
     *            The #of entries in the tree.
     * 
     * @throws IllegalArgumentException
     *             if the branching factor is less than
     *             {@value Options#MIN_BRANCHING_FACTOR}.
     * @throws IllegalArgumentException
     *             if the #of index entries is negative (zero is allowed as a
     *             special case).
     */
    public IndexSegmentPlan(final int m, final long nentries) {

        if (m < Options.MIN_BRANCHING_FACTOR)
            throw new IllegalArgumentException();
        
        if (nentries < 0)
            throw new IllegalArgumentException();

        // The branching factor of the output tree.
        this.m = m;
        
        // The #of entries in the btree.
        this.nentries = nentries;
        
        // The minimum capacity of a leaf (or a node).
        m2 = (m+1)/2; 
        
        if(nentries == 0) {
        
            /*
             * Special case for an empty tree.
             */

            if (log.isInfoEnabled())
                log.info("Empty tree.");
            
            nleaves = 1;
            height = 0;
            numInLeaf = new int[]{0};
            numInNode = new int[][]{new int[]{0}};
            numInLevel = new long[]{1};
            nnodes = 0;
            
            return;
        
        }
        
        // The #of leaves in the output tree.
        nleaves = (int)Math.ceil((double)nentries / (double)m); 
        
        // The height of the output tree.
        height = getMinimumHeight(m,nleaves);

        if (log.isInfoEnabled())
            log.info("branchingFactor=" + m + ", nentries=" + nentries
                    + ", nleaves=" + nleaves + ", height=" + height);
        
        // #of entries in each leaf.
        numInLeaf = distributeKeys(m, m2, nleaves, nentries);

        /*
         * Figure out how many nodes are in each level of the output tree. We
         * start from the leaves and compute the #of nodes required to hold that
         * many child references.
         */

        numInNode = new int[height+1][];

        numInLevel = new long[height+1];

        /*
         * The first time through this loop the #of children is initialized to
         * the #of leaves. Thereafter is is [numThisLevel] for the previous
         * level.
         */
        long nchildren = nleaves;

        int nnodes = 0;
        
        for (int h = height - 1; h >= 0; h--) {

            /*
             * Compute the minimum #of nodes required to hold the references for
             * the children of the level in the tree beneath this one.
             */
			final long numThisLevel = (long) Math.ceil((double) nchildren
					/ (double) m);

            numInLevel[h] = numThisLevel;

            /*
             * Distribute the children among the nodes allocated for this level.
             */
            numInNode[h] = distributeChildren(m, m2, numThisLevel, nchildren);

            nchildren = numThisLevel;

            nnodes += numThisLevel;

        }
        
        numInNode[height] = numInLeaf;
        
        numInLevel[height] = nleaves;

        this.nnodes = nnodes;

    }

    /**
     * Chooses the minimum height for a tree having a specified branching factor
     * and a specified #of leaves.
     * 
     * @param m
     *            The branching factor.
     * @param nleaves
     *            The #of leaves that must be addressable by the tree.
     * 
     * @throws UnsupportedOperationException
     *             if it is not possible to build a B+Tree with that branching
     *             factor and that many leaves without exceeding maxHeight
     *             (statically configured to 10).
     */
    public static int getMinimumHeight(final int m, final long nleaves) {
        
        final int maxHeight = 10;
        
        for (int h = 0; h <= maxHeight; h++) {
        
            /*
             * The maximum #of leaves addressable by a tree of height h and the
             * given branching factor.
             * 
             * Note: Java guarantees that Math.pow(int,int) produces the exact
             * result iff that result can be represented as an integer. This
             * useful feature lets us avoid having to deal with precision issues
             * or write our own integer version of pow (computing m*m h times).
             */
			final double d = (double) Math.pow(m, h);
            
			if (d >= nleaves) {
            
                /*
                 * h is the smallest height tree of the specified branching
                 * factor m capable of addressing the specified #of leaves.
                 */
                return h;
                
            }
            
        }
        
        throw new UnsupportedOperationException(
                "Can not build tree with height less than " + maxHeight
                        + " given branchingFactor=" + m + ",  nleaves="
                        + nleaves);
    }

	/**
	 * Distributes the keys among the leaves.
	 * 

* We want to fill up every leaf, but we have to make sure that the last * leaf is not under capacity. To that end, we calculate the #of entries * that would remain if we filled up n-1 leaves completely. If the #of * remaining entries is less than or equal to the minimum capacity of a * leaf, then we have to adjust the allocation of entries such that the last * leaf is at its minimum capacity. This is done by computing the shortage * and then distributing that shortage among the leaves. Once we have * deferred enough entries we are guaranteed that the final leaf will not be * under capacity. * * @param m * The branching factor in the output tree. * @param m2 * The minimum capacity for a leaf in the output tree, which is * computed as (m+1)/2. * @param nleaves * The #of leaves in the output tree. * @param nentries * The #of entries to be inserted into the output tree. * * @return An array indicating how many entries should be inserted into each * leaf of the output tree. The array index is the leaf order * (origin zero). The value is the capacity to which that leaf * should be filled. * @throws IllegalArgumentException * if there is a problem with the arguments. * * @see TestIndexSegmentPlan * @see TestIndexSegmentBuilderWithSmallTree#test_problem3_buildOrder3() */ public static int[] distributeKeys(final int m, final int m2, final long nleaves, final long nentries) { if (m < Options.MIN_BRANCHING_FACTOR) throw new IllegalArgumentException(); if (m > Options.MAX_INDEX_SEGMENT_BRANCHING_FACTOR) throw new IllegalArgumentException(); if (m2 < (m + 1) / 2) throw new IllegalArgumentException(); if (m2 > m) throw new IllegalArgumentException(); if (nleaves <= 0) throw new IllegalArgumentException(); if (nleaves > Integer.MAX_VALUE) { /* * Note: We can not build a plan with more than MAX_INT leaves since * that would require an array with an int64 index. */ throw new IllegalArgumentException(); } if (nentries <= 0) throw new IllegalArgumentException(); if (nleaves == 1) { /* * If there is just a root leaf then any number (up to the leafs * capacity) will fit into that root leaf. */ if (nentries > m) throw new RuntimeException(); return new int[] { (int) nentries }; // Note: nentries<=m<=MAX_INT. } final int[] n = new int[(int) nleaves]; // Note: nleaves<=MAX_INT. /* * Default each leaf to m entries. */ for (int i = 0; i < nleaves; i++) { n[i] = m; } /* * The #of entries that would be allocated to the last leaf if we filled * each proceeding leaf to its capacity of [m] tuples. */ final long remaining = nentries - ((nleaves - 1) * m); /* * If the #of entries remaining would put the leaf under capacity then * we compute the shortage. We need to defer this many entries from the * previous leaves in order to have the last leaf reach its minimum * capacity. * * Note: This will be a small integer in [0:m2). */ int shortage = (int) (remaining < m2 ? m2 - remaining : 0); if( remaining < m2 ) { // The last leaf will be at minimum capacity. n[(int) (nleaves - 1)] = m2; } else { /* * The remainder will go into the last leaf without underflow. * * Note: remaining will be a small integer (LT m). */ n[(int) (nleaves - 1)] = (int) remaining; } /* * If the shortage is greater than the #of previous leaves, then we need * to short some leaves by more than one entry. This scenario can be * observed when building a tree with m := 9 and 10 entries. In that * case there are only two leaves and we wind up shorting the previous * leaf by 4 bringing both leaves down to their minimum capacity of 5. */ if (shortage > 0) { while (shortage > 0) { for (int i = (int) (nleaves - 2); i >= 0 && shortage > 0; i--) { n[i]--; shortage--; } } } return n; } /** * Distributes the children among the nodes of a given level. *

* Note: This is just an alias for * {@link #distributeKeys(int, int, long, long)}. The only difference when * distributing children among nodes is that the result returned to the * caller must be interpreted as the #of children to assigned to each node * NOT the #of keys (for leaves the #of values and the #of keys is always * the same). * * @param m * The branching factor in the output tree. * @param m2 * The minimum capacity, which should be computed as (m+1)/2. * @param nnodes * The #of nodes in the output tree for some given level of the * output tree. * @param nchildren * The #of children to be distributed among those nodes. * * @return An array indicating how many children should be inserted into * each node of the output tree at the given level. The array index * is the node order (origin zero). The value is the #of children * which must be assigned to that leaf. * * @see TestIndexSegmentPlan * @see TestIndexSegmentBuilderWithSmallTree#test_problem3_buildOrder3() */ public static int[] distributeChildren(int m, int m2, long nnodes, long nchildren) { return distributeKeys(m, m2, nnodes, nchildren); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy