com.sleepycat.je.tree.CountEstimator Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of starrocks-bdb-je Show documentation
starrocks managed bdb je
The newest version!
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.tree;

import java.util.ArrayList;
import java.util.List;

import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.OperationResult;
import com.sleepycat.je.dbi.CursorImpl;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.txn.LockType;

/**
 * Estimates the number of non-deleted BIN entries between two end points,
 * using information returned in TrackingInfo from Tree.getParentINForChildIN.
 * Used for estimating dup counts, e.g., for join query optimization.  Accuracy
 * is limited by the number of samples taken to compute the average number of
 * entries at each level.  Currently only two samples (at the end points) are
 * taken, and because the tree is not balanced making the number of entries
 * highly variable, the count can easily be off by a factor of two.
 */
public class CountEstimator {

    /* If exceeded, there must be a bug of some kind. */
    private static final int MAX_RETRIES_AFTER_SPLIT = 100;

    /**
     * Returns an estimate of the number of records between two end points
     * specified by begin/end cursor positions.
     */
    public static long count(DatabaseImpl dbImpl,
                             CursorImpl beginCursor,
                             boolean beginInclusive,
                             CursorImpl endCursor,
                             boolean endInclusive) {

        /* If the two cursors are at the same position, return 1. */
        if (beginCursor.isOnSamePosition(endCursor)) {
            return 1;
        }

        /* Compute estimate for cursors at different positions. */
        final CountEstimator estimator = new CountEstimator(dbImpl);

        return estimator.count(beginCursor, endCursor) +
               (beginInclusive ? 1 : 0) +
               (endInclusive ? 1 : 0);
    }

    private final DatabaseImpl dbImpl;

    private List beginStack;
    private List endStack;

    private final List> avgEntriesStacks =
        new ArrayList>();

    private int levelCount;
    private int rootLevel;
    private float[] avgEntries;

    private CountEstimator(DatabaseImpl dbImpl) {
        this.dbImpl = dbImpl;
    }
    
    private long count(CursorImpl beginCursor, CursorImpl endCursor) {

        for (int numRetries = 0;; numRetries += 1) {

            /*
             * If we have retried too many times, give up.  This is probably
             * due to a bug of some kind, and we shouldn't loop forever.
             */
            if (numRetries > MAX_RETRIES_AFTER_SPLIT) {
                throw EnvironmentFailureException.unexpectedState();
            }

            /*
             * Set up the initial info for the computation.  Retry if a split
             * occurs.
             */
            beginStack = beginCursor.getAncestorPath();
            if (beginStack == null) {
                continue;
            }
            endStack = endCursor.getAncestorPath();
            if (endStack == null) {
                continue;
            }

            if (!findCommonAncestor()) {
                continue;
            }

            /* Get the the average entries from the two end points.  */
            getAvgEntries(beginCursor, endCursor);

            /*
             * Return the count.  FUTURE: Taking more samples between the two
             * end points would increase accuracy.
             */
            return countTotal();
        }
    }

    /**
     * Find the common ancestor node for the two end points, which we'll call
     * the root level.  If no common ancestor can be found, return false to
     * restart the process, because a split must have occurred in between
     * getting the two stacks for the end points.
     */
    private boolean findCommonAncestor() {

        levelCount = beginStack.size();
        if (levelCount != endStack.size()) {
            /* Must have been a root split. */
            return false;
        }

        rootLevel = -1;

        for (int level = levelCount - 1; level >= 0; level -= 1) {

            if (beginStack.get(level).nodeId == endStack.get(level).nodeId) {
                rootLevel = level;
                break;
            }
        }
        if (rootLevel < 0) {
            /* Must have been a split. */
            return false;
        }
        return true;
    }

    /**
     * This method starts with a preliminary average using just the two end
     * points.
     */
    private void getAvgEntries(CursorImpl beginCursor, CursorImpl endCursor) {

        avgEntriesStacks.clear();

        if (!addAvgEntriesSample(beginStack)) {
            sampleNextBIN(beginCursor, true /*moveForward*/);
        }

        if (!addAvgEntriesSample(endStack)) {
            sampleNextBIN(endCursor, false /*moveForward*/);
        }

        computeAvgEntries();
    }

    /**
     * FUTURE: use internal skip method instead, saving a btree lookup.
     */
    private void sampleNextBIN(
        CursorImpl beginOrEndCursor,
        boolean moveForward) {

        final CursorImpl cursor =
            beginOrEndCursor.cloneCursor(true /*samePosition*/);
        
        try {
            cursor.latchBIN();
            if (moveForward) {
                cursor.setOnLastSlot();
            } else {
                cursor.setOnFirstSlot();
            }

            final OperationResult result = cursor.getNext(
                null /*foundKey*/, null /*foundData*/,
                LockType.NONE, false /*dirtyReadAll*/,
                moveForward, true /*alreadyLatched*/,
                null /*rangeConstraint*/);

            if (result != null) {
                final List stack = cursor.getAncestorPath();
                if (stack != null) {
                    addAvgEntriesSample(stack);
                }
            }
        } finally {
            cursor.close();
        }
    }

    /**
     * At each level we compute the average number of entries.  This will be
     * used as a multipler to estimate the number of nodes for any IN at that
     * level.
     */
    private void computeAvgEntries() {

        avgEntries = new float[levelCount];

        avgEntries[levelCount - 1] = 1.0F;

        if (avgEntriesStacks.size() == 0) {
            return;
        }

        for (int level = levelCount - 1; level > 0; level -= 1) {
            long totalEntries = 0;
            for (List stack : avgEntriesStacks) {
                totalEntries += stack.get(level).entries;
            }
            final float avg = totalEntries / ((float) avgEntriesStacks.size());
            avgEntries[level - 1] = avg * avgEntries[level];
        }
    }

    private boolean addAvgEntriesSample(List stack) {
        if (isFirstBIN(stack) || isLastBIN(stack)) {
            return false;
        }
        avgEntriesStacks.add(stack);
        return true;
    }

    private boolean isFirstBIN(List stack) {
        for (int i = 0; i < stack.size() - 1; i += 1) {
            final TrackingInfo info = stack.get(i);
            if (info.index != 0) {
                return false;
            }
        }
        return true;
    }

    private boolean isLastBIN(List stack) {
        for (int i = 0; i < stack.size() - 1; i += 1) {
            final TrackingInfo info = stack.get(i);
            if (info.index != info.entries - 1) {
                return false;
            }
        }
        return true;
    }

    /**
     * Count the total for each node that is between the two end points.
     */
    private long countTotal() {
        long total = 0;

        /* Add nodes between the end points at the root level. */
        final int rootIndex1 = beginStack.get(rootLevel).index + 1;
        final int rootIndex2 = endStack.get(rootLevel).index;
        if (rootIndex2 > rootIndex1) {
            total += Math.round((rootIndex2 - rootIndex1) *
                                avgEntries[rootLevel]);
        }

        /* Add nodes under the end points at lower levels. */
        for (int level = rootLevel + 1; level < levelCount; level += 1) {

            /* Add nodes under left end point that are to its right. */
            final int leftIndex = beginStack.get(level).index;
            final int lastIndex = beginStack.get(level).entries - 1;
            if (lastIndex > leftIndex) {
                total += Math.round((lastIndex - leftIndex) *
                                    avgEntries[level]);
            }

            /* Add nodes under right end point that are to its left. */
            final int rightIndex = endStack.get(level).index;
            final int firstIndex = 0;
            if (rightIndex > firstIndex) {
                total += Math.round((rightIndex - firstIndex) *
                                    avgEntries[level]);
            }
        }

        return total;
    }

    /* For future use, if getKeyRatios is exposed in the API. */
    static class KeyRatios {
        final double less;
        final double equal;
        final double greater;

        KeyRatios(double less, double equal, double greater) {
            this.less = less;
            this.equal = equal;
            this.greater = greater;
        }

        @Override
        public String toString() {
            return "less: " + less +
                   " equal: " + equal +
                   " greater: " + greater;
        }
    }

    /*
     * For future use, if getKeyRatios is exposed in the API.  Be sure to test
     * boundary conditions when index is 0 or nEntries.
     *
     * Algorithm copied from __bam_key_range in BDB btree/bt_stat.c.
     */
    static KeyRatios getKeyRatios(List infoByLevel,
                                  boolean exact) {
        double factor = 1.0;
        double less = 0.0;
        double greater = 0.0;

        /*
         * At each level we know that INs greater than index contain keys
         * greater than what we are looking for and those less than index are
         * less than.  The one pointed to by index may have some less, some
         * greater or even equal.  If index is equal to the number of entries,
         * then the key is out of range and everything is less.
         */
        for (final TrackingInfo info : infoByLevel) {
            if (info.index == 0) {
                greater += (factor * (info.entries - 1)) / info.entries;
            } else if (info.index == info.entries) {
                less += factor;
            } else {
                less += (factor * info.index) / info.entries;
                greater += (factor * ((info.entries - info.index) - 1)) /
                           info.entries;
            }

            /* Factor at next level down is 1/n'th the amount at this level. */
            factor /= info.entries;

            /*
            System.out.println("factor: " + factor +
                               " less: " + less +
                               " greater: " + greater);
            */
        }

        /*
         * If there was an exact match then assign the 1/n'th factor to the key
         * itself.  Otherwise that factor belongs to those greater than the
         * key, unless the key was out of range.
         */
        final double equal;
        if (exact) {
            equal = factor;
        } else {
            if (less != 1.0) {
                greater += factor;
            }
            equal = 0.0;
        }

        return new KeyRatios(less, equal, greater);
    }
}