All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sleepycat.je.cleaner.UtilizationCalculator Maven / Gradle / Ivy

The newest version!
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.cleaner;

import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.sleepycat.je.config.EnvironmentParams;
import com.sleepycat.je.dbi.DbConfigManager;
import com.sleepycat.je.dbi.EnvConfigObserver;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.TTL;
import com.sleepycat.je.EnvironmentMutableConfig;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.Pair;

/**
 * Contains methods for calculating utilization and for selecting files to
 * clean.
 *
 * Note that we do clean files that are protected from deletion by HA/DataSync.
 * If we did not clean them and a large number of files were to become
 * unprotected at once, a large amount of log cleaning may suddenly be
 * necessary.  Cleaning the files avoids this.  Better still would be to delete
 * the metadata, but that would require writing a log entry to indicate the
 * file is ready to be deleted, to avoid cleaning from scratch after a crash.
 * [#16643] [#19221]
 *
 * Historical note: Prior to JE 6.0, LN utilization adjustments were needed
 * because the LN last logged size was not stored in the BIN [#18633].
 * Originally in JE 5, the corrected average LN size was used to adjust
 * utilization. This was changed later in JE 5 to a correction factor since
 * different log files may have different average LN sizes [#21106]. Then in
 * JE 6.0 the last logged size was added to the BIN, the default for
 * {@link com.sleepycat.je.EnvironmentConfig#CLEANER_ADJUST_UTILIZATION} was
 * changed to false and a warning was added that the feature will be removed in
 * the future [#22275]. Finally in JE 6.3 the LN adjustment code and data in
 * CheckpointEnd were removed and the parameter was deprecated [#24090].
 *
 * Unlike with LNs, we do not store the last logged size of INs, so their
 * obsolete size is computed as an average and this has the potential to cause
 * over/under-cleaning. This problem is not known to occur, but if there are
 * over/under-cleaning problems we should examine the recalculated info that is
 * output as part of the CleanerRun INFO message.
 *
 * === Expired Data and Utilization ===
 *
 * Per-file histograms are calculated by the {@link ExpirationTracker} and
 * stored in an internal database and cache by the {@link ExpirationProfile}.
 * The histograms are used to calculate the expired bytes for a file at a
 * particular time. Since obsolete (not expired) data can overlap with expired
 * data, upper and lower bounds for overall utilization are determined. When
 * the lower bound is below minUtilization, cleaning occurs.
 *
 * The file that has the lowest average utilization (midway between its upper
 * and lower bounds) is selected for cleaning. If the file's upper and lower
 * bounds are not close together (or the same), and the upper bound is above a
 * threshold, then two-pass cleaning is performed. See
 * {@link EnvironmentParams#CLEANER_TWO_PASS_GAP} and
 * {@link EnvironmentParams#CLEANER_TWO_PASS_THRESHOLD}.
 *
 * The first pass of two-pass cleaning reads the file but doesn't do any real
 * cleaning (no side effects). If this pass finds that the true utilization of
 * the file is above the threshold (the same threshold as above), then cleaning
 * does not take place and instead the histogram is updated. In this case the
 * obsolete and expired data (in the old histogram) overlap, and it is the
 * overlap that caused utilization to be estimated incorrectly. The new/updated
 * histogram is calculated such that there is no overlap, improving utilization
 * accuracy. If the first pass finds that true utilization is below the
 * threshold, then normal cleaning (pass two) occurs. Two-pass cleaning
 * protects against "over cleaning".
 *
 * The use of the overall utilization lower bound to drive cleaning is
 * considered sufficient to protect against "under cleaning". Therefore, a disk
 * space threshold is unnecessary.
 *
 * Gradual expiration is used to prevent cleaning spikes on day or hour
 * boundaries. For purposes of driving cleaning, the utilization lower bound is
 * calculated by distributing the bytes that expired in the current day/hour
 * period evenly over that day/hour.
 */
public class UtilizationCalculator implements EnvConfigObserver {

    private final EnvironmentImpl env;
    private final Cleaner cleaner;
    private final Logger logger;
    private FilesToMigrate filesToMigrate;
    private volatile int currentMinUtilization = -1;
    private volatile int currentMaxUtilization = -1;
    private volatile int predictedMinUtilization = -1;
    private volatile int predictedMaxUtilization = -1;

    UtilizationCalculator(EnvironmentImpl env, Cleaner cleaner) {
        this.env = env;
        this.cleaner = cleaner;
        logger = LoggerUtils.getLogger(getClass());
        filesToMigrate = new FilesToMigrate(env);
        env.addConfigObserver(this);
    }

    int getCurrentMinUtilization() {
        return currentMinUtilization;
    }

    int getCurrentMaxUtilization() {
        return currentMaxUtilization;
    }

    int getPredictedMinUtilization() {
        return predictedMinUtilization;
    }

    int getPredictedMaxUtilization() {
        return predictedMaxUtilization;
    }

    /**
     * Returns the best file that qualifies for cleaning or probing, or null
     * if no file qualifies.
     *
     * This method is called by FileSelector and synchronization order is:
     * 1-FileSelector, 2-UtilizationCalculator, 3-ExpirationProfile.
     *
     * @param fileSummaryMap the map containing file summary info.
     *
     * @param forceCleaning is true to always select a file, even if its
     * utilization is above the minimum utilization threshold.
     *
     * @return {file number, required utilization for 2-pass cleaning},
     * or null if no file qualifies for cleaning.
     */
    synchronized Pair getBestFile(
        final SortedMap fileSummaryMap,
        final boolean forceCleaning) {

        /* Paranoia.  There should always be 1 file. */
        if (fileSummaryMap.size() == 0) {
            LoggerUtils.logMsg(logger, env, Level.SEVERE,
                               "Can't clean, map is empty.");
            return null;
        }

        final FileSelector fileSelector = cleaner.getFileSelector();
        final Set inProgressFiles = fileSelector.getInProgressFiles();

        /* Refresh expiration info so it reflects the current time. */
        final ExpirationProfile expProfile = cleaner.getExpirationProfile();
        final long currentTime = TTL.currentSystemTime();
        expProfile.refresh(currentTime);

        /*
         * Use local variables for mutable properties.  Using values that are
         * changing during a single file selection pass would not produce a
         * well defined result.
         *
         * Note that age is a distance between files not a number of files,
         * that is, deleted files are counted in the age.
         */
        final int totalThreshold = cleaner.minUtilization;
        final int fileThreshold = cleaner.minFileUtilization;
        final int twoPassThreshold = cleaner.twoPassThreshold;
        final int twoPassGap = cleaner.twoPassGap;
        final int minAge = cleaner.minAge;
        final boolean gradualExpiration = cleaner.gradualExpiration;
        final boolean expirationEnabled = env.isExpirationEnabled();

        /*
         * Cleaning must refrain from rearranging the portion of log processed
         * as recovery time. Do not clean a file greater or equal to the first
         * active file used in recovery, which is either the last log file or
         * the file of the first active LSN in an active transaction, whichever
         * is earlier.
         *
         * TxnManager.getFirstActiveLsn() (firstActiveTxnLsn below) is
         * guaranteed to be earlier or equal to the first active LSN of the
         * checkpoint that will be performed before deleting the selected log
         * file. By selecting a file prior to this point we ensure that will
         * not clean any entry that may be replayed by recovery.
         *
         * For example:
         * 200 ckptA start, determines that ckpt's firstActiveLsn = 100
         * 400 ckptA end
         * 600 ckptB start, determines that ckpt's firstActiveLsn = 300
         * 800 ckptB end
         *
         * Any cleaning that executes before ckpt A start will be constrained
         * to files <= lsn 100, because it will have checked the TxnManager.
         * If cleaning executes after ckptA start, it may indeed clean after
         * ckptA's firstActiveLsn, but the cleaning run will wait to ckptB end
         * to delete files.
         */
        long firstActiveFile = fileSummaryMap.lastKey();
        final long firstActiveTxnLsn = env.getTxnManager().getFirstActiveLsn();

        if (firstActiveTxnLsn != DbLsn.NULL_LSN) {

            long firstActiveTxnFile =
                DbLsn.getFileNumber(firstActiveTxnLsn);

            if (firstActiveFile > firstActiveTxnFile) {
                firstActiveFile = firstActiveTxnFile;
            }
        }

        /*
         * Note that minAge is at least one and may be configured to a higher
         * value to prevent cleaning recently active files.
         */
        final long lastFileToClean = firstActiveFile - minAge;

        /*
         * Given the total, obsolete and expired bytes:
         *
         * min utilization: 100 * ((obsolete + expired) / total)
         * max utilization: 100 * (max(obsolete, expired) / total)
         * avg utilization: (max - min) / 2
         *
         * Calculate min/max utilization in several categories:
         *
         * + Current total utilization, not counting the results of cleaning
         *   that has not yet occurred, and using the current expired sizes.
         *
         * + Predicted total utilization, estimating the results of cleaning
         *   yet to occur, and using a gradual expired size to prevent cleaning
         *   spikes after hour/day boundaries.
         *
         * + Utilization for the "best" file to use when cleaning normally. The
          *  file with the lowest avg utilization is selected.
         *
         * + Also determine the "best gradual" file with the lowest gradual
         *   max utilization. Note that when a file is selected due to the
         *   minFileUtilization threshold, gradual utilization must be used.
         *   Otherwise, cleaning due to this threshold would defeat gradual
         *   expiration in general.
         */
        Long bestFile = null;
        int bestFileAvgUtil = 101;
        int bestFileMinUtil = 0;
        int bestFileMaxUtil = 0;
        Long bestGradualFile = null;
        int bestGradualFileMaxUtil = 101;
        long currentTotalSize = 0;
        long currentMinObsoleteSize = 0;
        long currentMaxObsoleteSize = 0;
        long predictedTotalSize = 0;
        long predictedMinObsoleteSize = 0;
        long predictedMaxObsoleteSize = 0;

        for (final Map.Entry entry :
             fileSummaryMap.entrySet()) {

            final Long file = entry.getKey();
            final long fileNum = file;

            if (!env.getFileProtector().isActiveOrNewFile(file)) {
                continue;
            }

            final FileSummary summary = entry.getValue();

            final int expiredSize;
            final int expiredGradualSize;

            if (expirationEnabled) {

                final Pair expiredSizes =
                    expProfile.getExpiredBytes(fileNum, currentTime);

                expiredSize = Math.min(
                    expiredSizes.first(), summary.totalSize);

                expiredGradualSize = gradualExpiration ?
                    Math.min(expiredSizes.second(), summary.totalSize) :
                    expiredSize;

            } else {
                expiredSize = 0;
                expiredGradualSize = 0;
            }

            /*
             * If the file is safe-to-delete, it is entirely obsolete by
             * definition. This is more accurate than using getObsoleteSize.
             */
            final int obsoleteSize = summary.getObsoleteSize();

            final int minObsoleteSize = Math.max(
                obsoleteSize, expiredSize);

            final int maxObsoleteSize = Math.min(
                obsoleteSize + expiredSize,
                summary.totalSize);

            final int minGradualObsoleteSize = Math.max(
                obsoleteSize, expiredGradualSize);

            final int maxGradualObsoleteSize = Math.min(
                obsoleteSize + expiredGradualSize,
                summary.totalSize);

            currentTotalSize += summary.totalSize;
            currentMinObsoleteSize += minObsoleteSize;
            currentMaxObsoleteSize += maxObsoleteSize;

            /*
             * If the file has been clean or is being cleaned, assume the
             * file's data will occupy only its currently utilized bytes, after
             * cleaning and deletion, based on the min obsolete size. This is
             * an intentionally overly optimistic prediction of the results of
             * cleaning, and is used to prevent over-cleaning, especially one
             * due to a backlog created using inaccurate predictions.
             */
            if (inProgressFiles.contains(file)) {
                final int utilizedSize = summary.totalSize - minObsoleteSize;
                predictedTotalSize += utilizedSize;
                continue;
            }

            predictedTotalSize += summary.totalSize;
            predictedMinObsoleteSize += minGradualObsoleteSize;
            predictedMaxObsoleteSize += maxGradualObsoleteSize;

            /* Skip files that are too young to be cleaned. */
            if (fileNum > lastFileToClean) {
                continue;
            }

            /*
             * Pick the "best" file -- the one having the lowest avg
             * utilization so far.
             */
            final int thisMinUtil = FileSummary.utilization(
                maxObsoleteSize, summary.totalSize);

            final int thisMaxUtil = FileSummary.utilization(
                minObsoleteSize, summary.totalSize);

            final int thisAvgUtil = (thisMinUtil + thisMaxUtil) / 2;

            if (bestFile == null || thisAvgUtil < bestFileAvgUtil) {
                bestFile = file;
                bestFileAvgUtil = thisAvgUtil;
                bestFileMinUtil = thisMinUtil;
                bestFileMaxUtil = thisMaxUtil;
            }

            /*
             * Pick the "best gradual" file -- the one having the lowest max
             * gradual utilization so far.
             */
            final int thisGradualMaxUtil = FileSummary.utilization(
                minGradualObsoleteSize, summary.totalSize);

            if (bestGradualFile == null ||
                thisGradualMaxUtil < bestGradualFileMaxUtil) {

                bestGradualFile = file;
                bestGradualFileMaxUtil = thisGradualMaxUtil;
            }
        }

        final int currentMinUtil = FileSummary.utilization(
            currentMaxObsoleteSize, currentTotalSize);

        final int currentMaxUtil = FileSummary.utilization(
            currentMinObsoleteSize, currentTotalSize);

        final int predictedMinUtil = FileSummary.utilization(
            predictedMaxObsoleteSize, predictedTotalSize);

        final int predictedMaxUtil = FileSummary.utilization(
            predictedMinObsoleteSize, predictedTotalSize);

        currentMinUtilization = currentMinUtil;
        currentMaxUtilization = currentMaxUtil;
        predictedMinUtilization = predictedMinUtil;
        predictedMaxUtilization = predictedMaxUtil;

        /*
         * 1. If total min utilization is below the threshold, clean the
         *    "best" file, which is the one with the lowest avg utilization.
         *
         * 2. Else if the "best gradual" file has a max gradual utilization
         *    that is below the threshold for a single file, clean it.
         *
         * 3. Else if there are more files to migrate, clean the next file to
         *    be migrated.
         *
         * 4. Else if cleaning is forced for unit testing, clean the best file.
         */
        final Long fileChosen;
        final String reason;

        if (predictedMinUtil < totalThreshold) {

            fileChosen = bestFile;
            reason = "predicted min util is below minUtilization";

        } else if (bestGradualFileMaxUtil < fileThreshold) {

            fileChosen = bestGradualFile;
            reason = "file has avg util below minFileUtilization";

        } else if (filesToMigrate.hasNext(fileSummaryMap, inProgressFiles)) {

            fileChosen = filesToMigrate.next(fileSummaryMap, inProgressFiles);
            reason = "there are more forceCleanFiles";

        } else if (forceCleaning) {

            fileChosen = bestFile;
            reason = "forced for testing";

        } else {
            fileChosen = null;
            reason = "no file selected";
        }

        String bestFileMsg = "";
        String twoPassMsg = "";
        int pass1RequiredUtil = -1;

        if (fileChosen != null && fileChosen.equals(bestFile)) {

            bestFileMsg =
                ", chose file with util min: " + bestFileMinUtil +
                " max: " + bestFileMaxUtil +
                " avg: " + bestFileAvgUtil;

            /*
             * If the difference between the file's min and max utilization is
             * more than twoPassGap, and its max utilization is more then
             * twoPassThreshold, use two pass cleaning. When using two-pass
             * cleaning, skip the second pass if its recalculated utilization
             * is higher than twoPassThreshold. In other words, if the benefit
             * of cleaning is low, don't actually clean it.
             */
            if (bestFileMaxUtil > twoPassThreshold &&
                bestFileMaxUtil - bestFileMinUtil >= twoPassGap) {

                pass1RequiredUtil = twoPassThreshold;
                twoPassMsg = ", 2-pass cleaning";
            }
        }

        final Level logLevel = (fileChosen != null) ? Level.INFO : Level.FINE;

        if (logger.isLoggable(logLevel)) {

            LoggerUtils.logMsg(
                logger, env, logLevel,
                "Clean file " +
                ((fileChosen != null) ?
                    ("0x" + Long.toHexString(fileChosen)) : "none") +
                ": " + reason + twoPassMsg +
                ", current util min: " + currentMinUtil +
                " max: " + currentMaxUtil +
                ", predicted util min: " + predictedMinUtil +
                " max: " + predictedMaxUtil +
                bestFileMsg);
        }

        return (fileChosen != null) ?
            new Pair<>(fileChosen, pass1RequiredUtil) :
            null;
    }

    /**
     * Process notifications of mutable property changes.
     *
     * @throws IllegalArgumentException via FilesToMigrate ctor and
     * parseForceCleanFiles.
     */

    public synchronized void envConfigUpdate(DbConfigManager cm,
                                EnvironmentMutableConfig ignore) {

        filesToMigrate = new FilesToMigrate(env);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy