com.bigdata.btree.BTreePageStats Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.btree;
import com.bigdata.util.Bytes;
public class BTreePageStats extends PageStats {
public BTreePageStats() {
}
public void visit(final AbstractBTree btree, final AbstractNode> node) {
if (m == 0) {
// Make a note of the configured branching factor.
m = btree.getBranchingFactor();
ntuples = btree.getEntryCount();
height = btree.getHeight();
}
super.visit(btree, node);
}
@Override
public int getRecommendedBranchingFactor() {
if (nnodes == 0) {
// Not enough data to make an estimate.
return m;
}
// Nominal (target) page size.
final int NOMINAL_PAGE_SIZE = 8 * Bytes.kilobyte32;
// The maximum #of allocations that can be blobs.
final float maxPercentBlobs = .05f;
// The percentage of the total allocations in each slot size.
final float[] percentages = new float[SLOT_SIZES.length];
// The percentage of allocations that are blobs.
final float percentBlobs;
{
final long nallocs = nnodes + nleaves;
for (int i = 0; i < SLOT_SIZES.length; i++) {
percentages[i] = histogram[i] / nallocs;
}
percentBlobs = blobs / nallocs;
}
if (percentBlobs > maxPercentBlobs) {
/*
* We need to reduce the branching factor for this index in order to
* bring the majority of the allocations under the blobs threshold
* (aka the NOMINAL_PAGE_SIZE).
*
* This heuristic simply reduces the branching factor by the
* percentage that we are over the target maximum percentage of blob
* allocations in the index.
*/
final int newM = (int) (m * (1.0 - (percentBlobs - maxPercentBlobs)));
return newM;
}
/*
* Estimate the best branching factor for this index.
*/
final double averageNodeBytes = (nodeBytes / (double) nnodes);
final double averageLeafBytes = (leafBytes / (double) nleaves);
/*
* The factor that we reduce the target branching factor below the
* perfect fit for the average node/leaf in order to decrease the risk
* that the histogram of allocations will include a significant fraction
* of blobs. On the WORM (and cluster) blobs are single contiguous
* allocations. On the RW mode backing stores, blobs are one allocation
* for the blob header plus at least two allocations for the blob (since
* a blob is always larger than a single allocation). Thus, when we move
* up to blobs, we do THREE (3) IOs rather than ONE (1). However, we
* still want to keep the maximum page size to a reasonable target (8k
* or 16k) since the RWStore can otherwise wind up with unusable and
* unrecoverable allocators.
*
* @see
* Optimize RWStore allocator sizes
*/
final double reductionFactor = .80;
// Estimate based on the average node size.
final int newM_nodes = (int) (reductionFactor * (m * NOMINAL_PAGE_SIZE) / averageNodeBytes);
// Estimate based on the average leaf size.
final int newM_leaves = (int) (reductionFactor * (m * NOMINAL_PAGE_SIZE) / averageLeafBytes);
// The average of those two estimates.
final int newM = (newM_nodes + newM_leaves) / 2;
return newM;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy