All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sleepycat.je.cleaner.Cleaner Maven / Gradle / Ivy

The newest version!
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.cleaner;

import static com.sleepycat.je.ExtinctionFilter.ExtinctionStatus.EXTINCT;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_ACTIVE_LOG_SIZE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_AVAILABLE_LOG_SIZE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_BIN_DELTAS_CLEANED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_BIN_DELTAS_DEAD;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_BIN_DELTAS_MIGRATED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_BIN_DELTAS_OBSOLETE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_DELETIONS;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_DISK_READS;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_ENTRIES_READ;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_INS_CLEANED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_INS_DEAD;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_INS_MIGRATED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_INS_OBSOLETE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_LNQUEUE_HITS;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_LNS_CLEANED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_LNS_DEAD;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_LNS_EXPIRED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_LNS_EXTINCT;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_LNS_LOCKED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_LNS_MARKED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_LNS_MIGRATED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_LNS_OBSOLETE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_MAX_UTILIZATION;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_MIN_UTILIZATION;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PENDING_DBS_INCOMPLETE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PENDING_DBS_PROCESSED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PENDING_DB_QUEUE_SIZE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PENDING_LNS_LOCKED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PENDING_LNS_PROCESSED;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PENDING_LN_QUEUE_SIZE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PREDICTED_MAX_UTILIZATION;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PREDICTED_MIN_UTILIZATION;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PROTECTED_LOG_SIZE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_PROTECTED_LOG_SIZE_MAP;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_RESERVED_LOG_SIZE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_REVISAL_RUNS;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_RUNS;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_TOTAL_LOG_SIZE;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.CLEANER_TWO_PASS_RUNS;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.GROUP_DESC;
import static com.sleepycat.je.cleaner.CleanerStatDefinition.GROUP_NAME;

import java.io.File;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.sleepycat.je.CacheMode;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.EnvironmentMutableConfig;
import com.sleepycat.je.ExtinctionFilter.ExtinctionStatus;
import com.sleepycat.je.StatsConfig;
import com.sleepycat.je.cleaner.FileSelector.CheckpointStartCleanerState;
import com.sleepycat.je.config.EnvironmentParams;
import com.sleepycat.je.dbi.CursorImpl;
import com.sleepycat.je.dbi.DatabaseId;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbConfigManager;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvConfigObserver;
import com.sleepycat.je.dbi.EnvironmentFailureReason;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.log.FileManager;
import com.sleepycat.je.log.LogItem;
import com.sleepycat.je.log.ReplicationContext;
import com.sleepycat.je.tree.BIN;
import com.sleepycat.je.tree.FileSummaryLN;
import com.sleepycat.je.tree.IN;
import com.sleepycat.je.tree.LN;
import com.sleepycat.je.tree.Node;
import com.sleepycat.je.tree.Tree;
import com.sleepycat.je.tree.TreeLocation;
import com.sleepycat.je.txn.BasicLocker;
import com.sleepycat.je.txn.LockGrantType;
import com.sleepycat.je.txn.LockManager;
import com.sleepycat.je.txn.LockResult;
import com.sleepycat.je.txn.LockType;
import com.sleepycat.je.utilint.AtomicLongMapStat;
import com.sleepycat.je.utilint.DaemonRunner;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.FileStoreInfo;
import com.sleepycat.je.utilint.IntStat;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.LongStat;
import com.sleepycat.je.utilint.Pair;
import com.sleepycat.je.utilint.RateLimitingLogger;
import com.sleepycat.je.utilint.StatGroup;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.VLSN;

/**
 * The Cleaner is responsible for effectively garbage collecting the JE log.
 * It selects the least utilized log file for cleaning (see FileSelector),
 * reads through the log file (FileProcessor) and determines whether each entry
 * is obsolete (no longer relevant) or active (referenced by the Btree).
 * Entries that are active are migrated (copied) to the end of the log, and
 * finally the cleaned file is deleted.
 *
 * The migration of active entries is a multi-step process that can be
 * configured to operate in different ways.  Eviction and checkpointing, as
 * well as the cleaner threads (FileProcessor instances) are participants in
 * this process.  Migration may be immediate or lazy.
 *
 * Active INs are always migrated lazily, which means that they are marked
 * dirty by the FileProcessor, and then logged later by an eviction or
 * checkpoint.  Active LNs are always migrated immediately by the FileProcessor
 * by logging them.
 *
 * When the FileProcessor is finished with a file, all lazy migration for that
 * file is normally completed by the end of the next checkpoint, if not sooner
 * via eviction.  The checkpoint/recovery mechanism will ensure that obsolete
 * entries will not be referenced by the Btree.  At the end of the checkpoint,
 * it is therefore safe to delete the log file.
 *
 * There is one exception to the above paragraph.  When attempting to migrate
 * an LN, if the LN cannot be locked then we must retry the migration at a
 * later time.  Also, if a database removal is in progress, we consider all
 * entries in the database obsolete but cannot delete the log file until
 * database removal is complete.  Such "pending" LNs and databases are queued
 * and processed periodically during file processing and at the start of a
 * checkpoint; see processPending().  In this case, we may have to wait for
 * more than one checkpoint to occur before the log file can be deleted.  See
 * FileSelector and the use of the pendingLNs and pendingDBs collections.
 */
public class Cleaner implements DaemonRunner, EnvConfigObserver {
    /* From cleaner */
    static final String CLEAN_IN = "CleanIN:";
    static final String CLEAN_LN = "CleanLN:";
    static final String CLEAN_PENDING_LN = "CleanPendingLN:";

    private static final NumberFormat INT_FORMAT =
        NumberFormat.getIntegerInstance();

    /**
     * The CacheMode to use for Btree searches.  This is currently UNCHANGED
     * because we update the generation of the BIN when we migrate an LN.
     * In other other cases, it is not desirable to keep INs in cache.
     */
    static final CacheMode UPDATE_GENERATION = CacheMode.UNCHANGED;

    /**
     * Whether the cleaner should participate in critical eviction.  Ideally
     * the cleaner would not participate in eviction, since that would reduce
     * the cost of cleaning.  However, the cleaner can add large numbers of
     * nodes to the cache.  By not participating in eviction, other threads
     * could be kept in a constant state of eviction and would effectively
     * starve.  Therefore, this setting is currently enabled.
     */
    static final boolean DO_CRITICAL_EVICTION = true;

    private static final String DELETED_SUBDIR = "deleted";

    /* Used to ensure that the cleaner is woken often enough. */
    private final static long MAX_CLEANER_BYTES_INTERVAL = 100L << 20;;

    /* 10GB is the lower threshold for adjusting MAX_DISK. */
    private final static long MAX_DISK_ADJUSTMENT_THRESHOLD =
        10L * 1024L * 1024L * 1024L;

    /* Used to disable processing of safe-to-delete files during testing. */
    private volatile boolean fileDeletionEnabled = true;

    /* Used to limit manageDiskUsage calls to one thread at a time. */
    private final ReentrantLock manageDiskUsageLock = new ReentrantLock();

    /*
     * Cleaner stats. Updates to these counters occur in multiple threads,
     * including FileProcessor threads, and are not synchronized. This could
     * produce errors in counting, but avoids contention around stat updates.
     */
    private final StatGroup statGroup;
    final LongStat nCleanerRuns;
    final LongStat nTwoPassRuns;
    final LongStat nRevisalRuns;
    private final LongStat nCleanerDeletions;
    final LongStat nINsObsolete;
    final LongStat nINsCleaned;
    final LongStat nINsDead;
    final LongStat nINsMigrated;
    final LongStat nBINDeltasObsolete;
    final LongStat nBINDeltasCleaned;
    final LongStat nBINDeltasDead;
    final LongStat nBINDeltasMigrated;
    final LongStat nLNsObsolete;
    final LongStat nLNsExpired;
    final LongStat nLNsExtinct;
    final LongStat nLNsCleaned;
    final LongStat nLNsDead;
    final LongStat nLNsLocked;
    final LongStat nLNsMigrated;
    final LongStat nLNsMarked;
    final LongStat nLNQueueHits;
    private final LongStat nPendingLNsProcessed;
    private final LongStat nPendingLNsLocked;
    private final LongStat nPendingDBsProcessed;
    private final LongStat nPendingDBsIncomplete;
    final LongStat nEntriesRead;
    final LongStat nDiskReads;
    /*
     * Log size stats. These are CUMMULATIVE and the stat objects are created
     * by loadStats. They are accessed as a group while synchronized on
     * statGroup to ensure the set of values is consistent/coherent.
     */
    private FileProtector.LogSizeStats logSizeStats;
    private long availableLogSize;
    private long totalLogSize;

    /*
     * Unlike availableLogSize, maxDiskOverage and freeDiskShortage are
     * calculated based on actual disk usage, without subtracting the size of
     * the reserved files. So these values may be GT zero even if
     * availableLogSize is GTE zero. If maxDiskOverage or freeDiskShortage
     * is GT zero, then manageDiskUsage will try to delete log files to
     * avoid a violation.
     */
    private long maxDiskOverage;
    private long freeDiskShortage;

    /* Message summarizing current log size stats, with limit violations. */
    private String diskUsageMessage;

    /*
     * If a disk usage limit is violated, this is diskUsageMessage; otherwise
     * it is null. It is volatile so it can be checked cheaply during CRUD ops.
     */
    private volatile String diskUsageViolationMessage;

    /*
     * Used to prevent repeated logging about a disk limit violation.
     * Protected by manageDiskUsageLock.
     */
    private boolean loggedDiskLimitViolation;

    /*
     * Configuration parameters.
     */
    long lockTimeout;
    int readBufferSize;
    int lookAheadCacheSize;
    long nDeadlockRetries;
    boolean expunge;
    private boolean useDeletedDir;
    int twoPassGap;
    int twoPassThreshold;
    boolean gradualExpiration;
    long cleanerBytesInterval;
    boolean trackDetail;
    private boolean fetchObsoleteSize;
    int dbCacheClearCount;
    private final boolean rmwFixEnabled;
    int minUtilization;
    int minFileUtilization;
    int minAge;
    private long maxDiskLimit;
    private long freeDiskLimit;
    private long adjustedMaxDiskLimit;
    private long reservedDiskLimit;

    private final String name;
    private final EnvironmentImpl env;
    private final FileStoreInfo fileStoreInfo;
    private final FileProtector fileProtector;
    private final UtilizationProfile profile;
    private final UtilizationTracker tracker;
    private final ExpirationProfile expirationProfile;
    private final UtilizationCalculator calculator;
    private final FileSelector fileSelector;
    private FileProcessor[] threads;

    private final Logger logger;
    final AtomicLong totalRuns;
    TestHook fileChosenHook;

    private final RateLimitingLogger
        reservedFileRepairMaybeExtinctLogger;

    /** @see #processPending */
    private final AtomicBoolean processPendingReentrancyGuard =
        new AtomicBoolean(false);

    /** @see #wakeupAfterWrite */
    private final AtomicLong bytesWrittenSinceActivation = new AtomicLong(0);

    public Cleaner(EnvironmentImpl env, String name) {
        this.env = env;
        this.name = name;

        /* Initialize the non-CUMULATIVE stats definitions. */
        statGroup = new StatGroup(GROUP_NAME, GROUP_DESC);
        nCleanerRuns = new LongStat(statGroup, CLEANER_RUNS);
        nTwoPassRuns = new LongStat(statGroup, CLEANER_TWO_PASS_RUNS);
        nRevisalRuns = new LongStat(statGroup, CLEANER_REVISAL_RUNS);
        nCleanerDeletions = new LongStat(statGroup, CLEANER_DELETIONS);
        nINsObsolete = new LongStat(statGroup, CLEANER_INS_OBSOLETE);
        nINsCleaned = new LongStat(statGroup, CLEANER_INS_CLEANED);
        nINsDead = new LongStat(statGroup, CLEANER_INS_DEAD);
        nINsMigrated = new LongStat(statGroup, CLEANER_INS_MIGRATED);
        nBINDeltasObsolete = new LongStat(statGroup, CLEANER_BIN_DELTAS_OBSOLETE);
        nBINDeltasCleaned = new LongStat(statGroup, CLEANER_BIN_DELTAS_CLEANED);
        nBINDeltasDead = new LongStat(statGroup, CLEANER_BIN_DELTAS_DEAD);
        nBINDeltasMigrated = new LongStat(statGroup, CLEANER_BIN_DELTAS_MIGRATED);
        nLNsObsolete = new LongStat(statGroup, CLEANER_LNS_OBSOLETE);
        nLNsExpired = new LongStat(statGroup, CLEANER_LNS_EXPIRED);
        nLNsExtinct = new LongStat(statGroup, CLEANER_LNS_EXTINCT);
        nLNsCleaned = new LongStat(statGroup, CLEANER_LNS_CLEANED);
        nLNsDead = new LongStat(statGroup, CLEANER_LNS_DEAD);
        nLNsLocked = new LongStat(statGroup, CLEANER_LNS_LOCKED);
        nLNsMigrated = new LongStat(statGroup, CLEANER_LNS_MIGRATED);
        nLNsMarked = new LongStat(statGroup, CLEANER_LNS_MARKED);
        nLNQueueHits = new LongStat(statGroup, CLEANER_LNQUEUE_HITS);
        nPendingLNsProcessed =
            new LongStat(statGroup, CLEANER_PENDING_LNS_PROCESSED);
        nPendingLNsLocked = new LongStat(statGroup, CLEANER_PENDING_LNS_LOCKED);
        nPendingDBsProcessed =
            new LongStat(statGroup, CLEANER_PENDING_DBS_PROCESSED);
        nPendingDBsIncomplete =
            new LongStat(statGroup, CLEANER_PENDING_DBS_INCOMPLETE);
        nEntriesRead = new LongStat(statGroup, CLEANER_ENTRIES_READ);
        nDiskReads = new LongStat(statGroup, CLEANER_DISK_READS);

        logSizeStats =
            new FileProtector.LogSizeStats(0, 0, 0, new HashMap<>());

        if (env.isMemOnly()) {
            fileStoreInfo = null;
        } else {
            try {
                fileStoreInfo = FileStoreInfo.getInfo(
                    env.getEnvironmentHome().getAbsolutePath());
            } catch (IOException e) {
                throw EnvironmentFailureException.unexpectedException(env, e);
            }
        }
        fileProtector = new FileProtector(env);
        tracker = new UtilizationTracker(env, this);
        profile = new UtilizationProfile(env, tracker);
        expirationProfile = new ExpirationProfile(env);
        calculator = new UtilizationCalculator(env, this);
        fileSelector = new FileSelector();
        threads = new FileProcessor[0];
        logger = LoggerUtils.getLogger(getClass());
        totalRuns = new AtomicLong(0);

        /* Log MAYBE_EXTINCT message at most once per minute. */
        reservedFileRepairMaybeExtinctLogger = new RateLimitingLogger<>(
            (int) TimeUnit.MINUTES.toMillis(1), 1, logger);

        /*
         * The trackDetail property is immutable because of the complexity (if
         * it were mutable) in determining whether to update the memory budget
         * and perform eviction.
         */
        trackDetail = env.getConfigManager().getBoolean
            (EnvironmentParams.CLEANER_TRACK_DETAIL);

        rmwFixEnabled = env.getConfigManager().getBoolean
            (EnvironmentParams.CLEANER_RMW_FIX);

        /* Initialize mutable properties and register for notifications. */
        setMutableProperties(env.getConfigManager());
        env.addConfigObserver(this);
    }

    /**
     * Process notifications of mutable property changes.
     *
     * @throws IllegalArgumentException via Environment ctor and
     * setMutableConfig.
     */
    public void envConfigUpdate(DbConfigManager cm,
                                EnvironmentMutableConfig ignore) {

        setMutableProperties(cm);

        /* A parameter that impacts cleaning may have changed. */
        wakeupActivate();
    }

    private void setMutableProperties(final DbConfigManager cm) {

        lockTimeout = cm.getDuration(EnvironmentParams.CLEANER_LOCK_TIMEOUT);

        readBufferSize = cm.getInt(EnvironmentParams.CLEANER_READ_SIZE);
        if (readBufferSize <= 0) {
            readBufferSize =
                cm.getInt(EnvironmentParams.LOG_ITERATOR_READ_SIZE);
        }

        lookAheadCacheSize =
            cm.getInt(EnvironmentParams.CLEANER_LOOK_AHEAD_CACHE_SIZE);

        nDeadlockRetries = cm.getInt(EnvironmentParams.CLEANER_DEADLOCK_RETRY);

        expunge = cm.getBoolean(EnvironmentParams.CLEANER_REMOVE);

        useDeletedDir =
            cm.getBoolean(EnvironmentParams.CLEANER_USE_DELETED_DIR);

        twoPassGap =
            cm.getInt(EnvironmentParams.CLEANER_TWO_PASS_GAP);

        twoPassThreshold =
            cm.getInt(EnvironmentParams.CLEANER_TWO_PASS_THRESHOLD);

        if (twoPassThreshold == 0) {
            twoPassThreshold =
                cm.getInt(EnvironmentParams.CLEANER_MIN_UTILIZATION) - 5;
        }

        gradualExpiration =
            cm.getBoolean(EnvironmentParams.CLEANER_GRADUAL_EXPIRATION);

        dbCacheClearCount =
            cm.getInt(EnvironmentParams.ENV_DB_CACHE_CLEAR_COUNT);

        int nThreads = cm.getInt(EnvironmentParams.CLEANER_THREADS);
        assert nThreads > 0;

        if (nThreads != threads.length) {

            /* Shutdown threads when reducing their number. */
            for (int i = nThreads; i < threads.length; i += 1) {
                if (threads[i] == null) {
                    continue;
                }
                threads[i].shutdown();
                threads[i] = null;
            }

            /* Copy existing threads that are still used. */
            FileProcessor[] newThreads = new FileProcessor[nThreads];
            for (int i = 0; i < nThreads && i < threads.length; i += 1) {
                newThreads[i] = threads[i];
            }

            /* Don't lose track of new threads if an exception occurs. */
            threads = newThreads;

            /* Start new threads when increasing their number. */
            for (int i = 0; i < nThreads; i += 1) {
                if (threads[i] != null) {
                    continue;
                }
                threads[i] = new FileProcessor(
                    name + '-' + (i + 1),
                    i == 0 /*firstThread*/,
                    env, this, profile, calculator, fileSelector);
            }
        }

        cleanerBytesInterval = cm.getLong(
            EnvironmentParams.CLEANER_BYTES_INTERVAL);

        if (cleanerBytesInterval == 0) {
            cleanerBytesInterval =
                cm.getLong(EnvironmentParams.LOG_FILE_MAX) / 4;

            cleanerBytesInterval = Math.min(
                cleanerBytesInterval, MAX_CLEANER_BYTES_INTERVAL);
        }

        final int wakeupInterval =
            cm.getDuration(EnvironmentParams.CLEANER_WAKEUP_INTERVAL);

        for (FileProcessor thread : threads) {
            if (thread == null) {
                continue;
            }
            thread.setWaitTime(wakeupInterval);
        }

        fetchObsoleteSize =
            cm.getBoolean(EnvironmentParams.CLEANER_FETCH_OBSOLETE_SIZE);

        minAge = cm.getInt(EnvironmentParams.CLEANER_MIN_AGE);
        minUtilization = cm.getInt(EnvironmentParams.CLEANER_MIN_UTILIZATION);
        minFileUtilization =
            cm.getInt(EnvironmentParams.CLEANER_MIN_FILE_UTILIZATION);

        maxDiskLimit = cm.getLong(EnvironmentParams.MAX_DISK);
        adjustedMaxDiskLimit = maxDiskLimit;

        if (env.isMemOnly()) {
            /* Env home dir may not exist, can't query file system info. */
            freeDiskLimit = 0;
        } else {
            final int replayFreeDiskPct = env.getReplayFreeDiskPercent();
            if (replayFreeDiskPct == 0) {
                /* No backward compatibility is needed. */
                freeDiskLimit = cm.getLong(EnvironmentParams.FREE_DISK);
            } else {
                /* Use replayFreeDiskPercent for backward compatibility. */
                if (cm.isSpecified(EnvironmentParams.FREE_DISK)) {
                    throw new IllegalArgumentException(
                        "Cannot specify both " + EnvironmentConfig.FREE_DISK +
                            " and je.rep.replayFreeDiskPercent.");
                }
                freeDiskLimit =
                    (getDiskTotalSpace() * replayFreeDiskPct) / 100;
            }

            if (maxDiskLimit > MAX_DISK_ADJUSTMENT_THRESHOLD ||
                cm.isSpecified(EnvironmentParams.FREE_DISK) ||
                replayFreeDiskPct != 0) {
                adjustedMaxDiskLimit -= freeDiskLimit;
            }
        }

        reservedDiskLimit = cm.getLong(EnvironmentParams.RESERVED_DISK);
    }

    public FileProtector getFileProtector() {
        return fileProtector;
    }

    public UtilizationTracker getUtilizationTracker() {
        return tracker;
    }

    public UtilizationProfile getUtilizationProfile() {
        return profile;
    }

    UtilizationCalculator getUtilizationCalculator() {
        return calculator;
    }

    public ExpirationProfile getExpirationProfile() {
        return expirationProfile;
    }

    public FileSelector getFileSelector() {
        return fileSelector;
    }

    public boolean getFetchObsoleteSize(DatabaseImpl db) {
        return fetchObsoleteSize && !db.isLNImmediatelyObsolete();
    }

    /**
     * @see EnvironmentParams#CLEANER_RMW_FIX
     * @see FileSummaryLN#postFetchInit
     */
    public boolean isRMWFixEnabled() {
        return rmwFixEnabled;
    }

    /* For unit testing only. */
    void setFileChosenHook(TestHook hook) {
        fileChosenHook = hook;
    }

    /*
     * Delegate the run/pause/wakeup/shutdown DaemonRunner operations.  We
     * always check for null to account for the possibility of exceptions
     * during thread creation.  Cleaner daemon can't ever be run if No Locking
     * mode is enabled.
     */
    public void runOrPause(boolean run) {

        if (env.isNoLocking()) {
            return;
        }

        for (FileProcessor processor : threads) {
            if (processor == null) {
                continue;
            }
            if (run) {
                processor.activateOnWakeup();
            }
            processor.runOrPause(run);
        }
    }

    /**
     * If the number of bytes written since the last activation exceeds the
     * cleaner's byte interval, wakeup the file processor threads in activate
     * mode.
     */
    public void wakeupAfterWrite(int writeSize) {

        if (bytesWrittenSinceActivation.addAndGet(writeSize) >
            cleanerBytesInterval) {

            bytesWrittenSinceActivation.set(0);
            wakeupActivate();
        }
    }

    /**
     * Wakeup the file processor threads in activate mode, meaning that
     * FileProcessor.doClean will be called.
     *
     * @see FileProcessor#onWakeup()
     */
    public void wakeupActivate() {

        for (FileProcessor thread : threads) {
            if (thread == null) {
                continue;
            }
            thread.activateOnWakeup();
            thread.wakeup();
        }
    }

    public void requestShutdown() {
        for (FileProcessor thread : threads) {
            if (thread == null) {
                continue;
            }
            thread.requestShutdown();
        }
    }

    public void shutdown() {
        for (int i = 0; i < threads.length; i += 1) {
            if (threads[i] == null) {
                continue;
            }
            threads[i].shutdown();
            threads[i] = null;
        }
    }

    public int getNWakeupRequests() {
        int count = 0;
        for (FileProcessor thread : threads) {
            if (thread == null) {
                continue;
            }
            count += thread.getNWakeupRequests();
        }
        return count;
    }

    /**
     * Cleans selected files and returns the number of files cleaned.  This
     * method is not invoked by a deamon thread, it is programatically.
     *
     * @param cleanMultipleFiles is true to clean until we're under budget,
     * or false to clean at most one file.
     *
     * @param forceCleaning is true to clean even if we're not under the
     * utilization threshold.
     *
     * @return the number of files cleaned, not including files cleaned
     * unsuccessfully.
     */
    public int doClean(boolean cleanMultipleFiles, boolean forceCleaning) {

        FileProcessor processor = createProcessor();

        return processor.doClean
            (false /*invokedFromDaemon*/, cleanMultipleFiles, forceCleaning);
    }

    public FileProcessor createProcessor() {
        return new FileProcessor(
            "", false, env, this, profile, calculator, fileSelector);
    }

    /**
     * Load stats.
     */
    public StatGroup loadStats(StatsConfig config) {

        final StatGroup stats = statGroup.cloneGroup(config.getClear());

        /* Add all CUMULATIVE stats explicitly. */
        final Pair pendingQueueSizes =
            fileSelector.getPendingQueueSizes();

        new IntStat(
            stats, CLEANER_MIN_UTILIZATION,
            calculator.getCurrentMinUtilization());
        new IntStat(
            stats, CLEANER_MAX_UTILIZATION,
            calculator.getCurrentMaxUtilization());
        new IntStat(
            stats, CLEANER_PREDICTED_MIN_UTILIZATION,
            calculator.getPredictedMinUtilization());
        new IntStat(
            stats, CLEANER_PREDICTED_MAX_UTILIZATION,
            calculator.getPredictedMaxUtilization());
        new IntStat(
            stats, CLEANER_PENDING_LN_QUEUE_SIZE,
            pendingQueueSizes.first());
        new IntStat(
            stats, CLEANER_PENDING_DB_QUEUE_SIZE,
            pendingQueueSizes.second());

        /*
         * Synchronize on statGroup while adding log size stats, to return a
         * consistent set of values.
         */
        synchronized (statGroup) {
            new LongStat(
                stats, CLEANER_ACTIVE_LOG_SIZE,
                logSizeStats.activeSize);
            new LongStat(
                stats, CLEANER_RESERVED_LOG_SIZE,
                logSizeStats.reservedSize);
            new LongStat(
                stats, CLEANER_PROTECTED_LOG_SIZE,
                logSizeStats.protectedSize);
            new LongStat(
                stats, CLEANER_AVAILABLE_LOG_SIZE,
                availableLogSize);
            new LongStat(
                stats, CLEANER_TOTAL_LOG_SIZE,
                totalLogSize);

            final AtomicLongMapStat protectedSizeMap =
                new AtomicLongMapStat(stats, CLEANER_PROTECTED_LOG_SIZE_MAP);

            for (final Map.Entry entry :
                logSizeStats.protectedSizeMap.entrySet()) {

                protectedSizeMap.
                    createStat(entry.getKey()).
                    set(entry.getValue());
            }
        }

        return stats;
    }

    /**
     * Enables or disabling processing of safe-to-delete files, including
     * the truncation of the VLSN index. Disabling this is needed for tests,
     * when VLSNIndex changes should be prevented.
     */
    public void enableFileDeletion(boolean enable) {
        fileDeletionEnabled = enable;
    }

    boolean isFileDeletionEnabled() {
        return fileDeletionEnabled;
    }

    /**
     * Updates log size stats and deletes unprotected reserved files in order
     * to stay within disk limits.
     *
     * This method must be called frequently enough to maintain disk usage
     * safely below the limits. For an HA env this is particularly important
     * since we retain all reserved files until we approach the disk limits.
     * For this, calling this method at least every CLEANER_BYTES_INTERVAL
     * should suffice.
     *
     * It is also important to call this method based on a time interval
     * when writing stops, to retry deletions when files are protected or the
     * env is locked by read-only processes. For this, calling this method at
     * least every CLEANER_WAKEUP_INTERVAL should suffice.
     */
    public void manageDiskUsage() {

        /* Fail loudly if the environment is invalid. */
        env.checkIfInvalid();

        if (env.isMemOnly() || env.mayNotWrite() || !fileDeletionEnabled) {
            return;
        }

        /*
         * Only one thread at a time can truncate the VLSNIndex head, request
         * the environment lock, and update stats. This is a periodic action,
         * so probe the lock to avoid blocking other cleaner threads.
         */
        if (!manageDiskUsageLock.tryLock()) {
            return;
        }

        try {
            /* Periodically update the stats. */
            freshenLogSizeStats();

            if (fileProtector.getNReservedFiles() > 0) {

                boolean freshenStats = false;

                if (env.isReplicated()) {
                    /*
                     * Reserved files are retained until we approach a disk
                     * limit. Determine how many bytes we need to reclaim by
                     * deleting reserved files. Add a reasonable value to stay
                     * safely below the limits between cleaner wakeups. Note
                     * that max(overage,shortage) may be negative, since
                     * overage and shortage may be negative.
                     */
                    long origBytesNeeded =
                        ((maxDiskLimit > 0) ?
                            Math.max(maxDiskOverage, freeDiskShortage) :
                            freeDiskShortage) +
                        Math.max(
                            1L << 20,
                            3 * cleanerBytesInterval);

                    /* Try to free reserved space, if there is a limit. */
                    if (reservedDiskLimit > 0) {
                        origBytesNeeded = Math.max(origBytesNeeded,
                            logSizeStats.reservedSize - reservedDiskLimit);
                    }

                    /*
                     * First try deleting files without truncating the
                     * VLSNIndex head.
                     */
                    long bytesNeeded = origBytesNeeded;
                    if (bytesNeeded > 0) {
                        bytesNeeded = deleteUnprotectedFiles(bytesNeeded);
                    }

                    /*
                     * If we still need space, try truncating the VLSNIndex
                     * and then deleting files. See FileProtector for details.
                     */
                    if (bytesNeeded > 0 &&
                        env.tryVlsnHeadTruncate(bytesNeeded)) {

                        bytesNeeded = deleteUnprotectedFiles(bytesNeeded);
                        freshenStats = true;
                    }

                    if (bytesNeeded < origBytesNeeded) {
                        freshenStats = true;
                    }
                } else {
                    /*
                     * For a non-HA env, simply try to delete all the reserved
                     * files.
                     */
                    final long bytesNeeded =
                        deleteUnprotectedFiles(Long.MAX_VALUE);

                    if (bytesNeeded < Long.MAX_VALUE) {
                        freshenStats = true;
                    }
                }

                /*
                 * Freshen the stats if any files are deleted, so write
                 * operations can occur ASAP if they were previously
                 * prohibited. Also freshen the stats if we truncated the
                 * VLSNIndex, so that the stats reflect the current factors
                 * gating file deletion.
                 */
                if (freshenStats) {
                    freshenLogSizeStats();
                }
            }

            /*
             * If there is still a violation, and we have not logged it since
             * the violation status changed, then log it now. We do not expect
             * the violation status to change frequently.
             */
            final String violation = diskUsageViolationMessage;
            if (violation != null) {
                if (!loggedDiskLimitViolation) {
                    LoggerUtils.logMsg(logger, env, Level.SEVERE, violation);
                    loggedDiskLimitViolation = true;
                }
            } else {
                loggedDiskLimitViolation = false;
            }

        } catch (EnvLockedException e) {

            LoggerUtils.logMsg(
                logger, env, Level.SEVERE,
                "Could not delete files due to read-only processes. " +
                    diskUsageMessage);

        } finally {
            manageDiskUsageLock.unlock();
        }
    }

    /** @see #deleteUnprotectedFiles */
    private static class EnvLockedException extends Exception {}

    /**
     * Deletes unprotected reserved files in an attempt to free bytesNeeded.
     * In a non-HA env, attempts to delete all reserved files, irrespective of
     * bytesNeeded.
     *
     * 

An exclusive environment is held while deleting the files to lock * out read-only processes. The lock is held while deleting the reserved * file records as well, but this is inexpensive and should not cause long * delays for read-only processes.

* * @param bytesNeeded the amount of space we need to reclaim to stay within * disk limits. * * @return number of bytes we could not reclaim due to protected files, or * zero if we deleted files totaling bytesNeeded or more. * * @throws EnvLockedException if we can't get an exclusive environment * lock because the env is locked by read-only processes, and therefore no * files can be deleted. */ private long deleteUnprotectedFiles(long bytesNeeded) throws EnvLockedException { final FileManager fileManager = env.getFileManager(); final SortedSet deletedFiles = new TreeSet<>(); if (!fileManager.lockEnvironment(false, true)) { throw new EnvLockedException(); } try { long file = -1; while (bytesNeeded > 0 || !env.isReplicated()) { final Pair pair = fileProtector.takeNextCondemnedFile(file + 1); if (pair == null) { break; } file = pair.first(); final long size = pair.second(); if (!deleteFile(file)) { /* Sometimes files cannot be deleted on Windows. */ fileProtector.putBackCondemnedFile(file, size); continue; } bytesNeeded = Math.max(0, bytesNeeded - size); profile.deleteReservedFileRecord(file); nCleanerDeletions.increment(); deletedFiles.add(file); } } finally { fileManager.releaseExclusiveLock(); if (!deletedFiles.isEmpty()) { final StringBuilder sb = new StringBuilder( "Cleaner deleted files:"); for (final Long file : deletedFiles) { sb.append(" 0x"); sb.append(Long.toHexString(file)); } LoggerUtils.traceAndLog( logger, env, Level.INFO, sb.toString()); } } if (!deletedFiles.isEmpty()) { /* * The reserved DB deletions are not durable at this point. If we * crash, restore the file from backup (to workaround a bug), and * recover, the files remain reserved because populateCache cannot * detect this situation. The size of this window is reduced by * flushing here, but it cannot be completely closed. It is very * unlikely to cause a problem because it is a small window and * restoring files from backup is rare. */ env.flushLog(false); } return bytesNeeded; } /** * Deletes the given file, if it reserved or previously condemned. */ boolean deleteReservedFile(final Long file, final String label) { if (env.isMemOnly() || env.mayNotWrite() || !fileDeletionEnabled) { return false; } final FileManager fileManager = env.getFileManager(); if (!fileManager.lockEnvironment(false, true)) { return false; } boolean deleted = false; try { final Long size = fileProtector.takeCondemnedFile(file); if (size == null) { return false; } if (deleteFile(file)) { profile.deleteReservedFileRecord(file); nCleanerDeletions.increment(); deleted = true; } else { /* Sometimes files cannot be deleted on Windows. */ fileProtector.putBackCondemnedFile(file, size); } return true; } finally { fileManager.releaseExclusiveLock(); if (deleted) { LoggerUtils.traceAndLog( logger, env, Level.INFO, label + " deleted files: 0x" + Long.toHexString(file)); } } } /** * Attempts to delete the file and returns whether it has been deleted. */ private boolean deleteFile(final Long file) { final FileManager fileManager = env.getFileManager(); final String expungeLabel = expunge ? "delete" : "rename"; final String expungedLabel = expungeLabel + "d"; try { if (expunge) { if (fileManager.deleteFile(file)) { return true; } } else { /* See EnvironmentConfig.CLEANER_EXPUNGE. */ final File newFile = fileManager.renameFile( file, FileManager.DEL_SUFFIX, useDeletedDir ? DELETED_SUBDIR : null); if (newFile != null) { newFile.setLastModified(System.currentTimeMillis()); return true; } } } catch (IOException e) { throw new EnvironmentFailureException( env, EnvironmentFailureReason.LOG_WRITE, "Unable to " + expungeLabel + " " + file, e); } /* * If the file is not valid (missing) then the file was previously * deleted. This can occur on Windows when we retry deletion (see * below). */ if (!fileManager.isFileValid(file)) { return true; } /* * Log a message and return false to retry the deletion later. The * deletion is known to fail on Windows, and probably this occurs whe * the file was recently closed. */ LoggerUtils.traceAndLog( logger, env, Level.WARNING, "Log file 0x" + Long.toHexString(file) + " could not be " + expungedLabel + ". The deletion will be retried later."); return false; } /** * Updates the cached set of log size stats, including maxDiskOverage, * freeDiskShortage, diskUsageMessage and diskUsageViolationMessage. * * Normally this should only be called by manageDiskUsage while holding * the manageDiskUsageLock. However, it may be called directly during * (single threaded) recovery. */ public void freshenLogSizeStats() { recalcLogSizeStats( fileProtector.getLogSizeStats(), getDiskFreeSpace()); } /** * Implementation of freshenLogSizeStats. Exposed for testing. */ void recalcLogSizeStats(final FileProtector.LogSizeStats stats, final long diskFreeSpace) { /* * Use locals for limits, since they may be changed by other threads. */ final long maxLimit = maxDiskLimit; final long adjustedMax = adjustedMaxDiskLimit; final long freeLimit = freeDiskLimit; /* * Calculate overage/shortage and available size. Below are examples of * availableLogBytes values where: * * totalLS=75 activeLS=50 reservedLS=25 protectedLS=5 * * freeDL maxDL diskFS freeB1 freeB2 availableLS * 5 - 20 15 15 35 * 25 - 5 -20 -20 0 * 30 - 5 -25 -25 -5 * 5 100 20 15 15 35 * 25 100 20 -5 -5 15 * 5 80 20 15 0 20 * 25 80 20 -5 -20 0 * 25 200 5 -20 -20 0 * 25 75 20 -5 -25 -5 * 50 80 90 40 -45 -25 */ final long freeBytes1 = diskFreeSpace - freeLimit; final long freeShortage = 0 - freeBytes1; final long totalSize = stats.activeSize + stats.reservedSize; final long maxOverage; final long freeBytes2; if (adjustedMax > 0) { maxOverage = totalSize - adjustedMax; freeBytes2 = Math.min(freeBytes1, adjustedMax - totalSize); } else { maxOverage = 0; freeBytes2 = freeBytes1; } final long availBytes = freeBytes2 + stats.reservedSize - stats.protectedSize; final StringBuilder sb = new StringBuilder(); if (availBytes <= 0) { sb.append("Disk usage is not within je.maxDisk or je.freeDisk "); sb.append("limits and write operations are prohibited:"); } else { sb.append("Disk usage is currently within je.maxDisk and "); sb.append("je.freeDisk limits:"); } sb.append(" maxDiskLimit="); sb.append(INT_FORMAT.format(maxLimit)); sb.append(" freeDiskLimit="); sb.append(INT_FORMAT.format(freeLimit)); sb.append(" adjustedMaxDiskLimit="); sb.append(INT_FORMAT.format(adjustedMax)); sb.append(" maxDiskOverage="); sb.append(INT_FORMAT.format(maxOverage)); sb.append(" freeDiskShortage="); sb.append(INT_FORMAT.format(freeShortage)); sb.append(" diskFreeSpace="); sb.append(INT_FORMAT.format(diskFreeSpace)); sb.append(" availableLogSize="); sb.append(INT_FORMAT.format(availBytes)); sb.append(" totalLogSize="); sb.append(INT_FORMAT.format(totalSize)); sb.append(" activeLogSize="); sb.append(INT_FORMAT.format(stats.activeSize)); sb.append(" reservedLogSize="); sb.append(INT_FORMAT.format(stats.reservedSize)); sb.append(" protectedLogSize="); sb.append(INT_FORMAT.format(stats.protectedSize)); sb.append(" protectedLogSizeMap={"); for (final Map.Entry entry : stats.protectedSizeMap.entrySet()) { sb.append(entry.getKey()).append(":"); sb.append(INT_FORMAT.format(entry.getValue())); } sb.append("}"); final String msg = sb.toString(); /* Synchronize on statGroup to maintain consistent set of stats. */ synchronized (statGroup) { maxDiskOverage = maxOverage; freeDiskShortage = freeShortage; diskUsageMessage = msg; diskUsageViolationMessage = (availBytes <= 0) ? msg : null; availableLogSize = availBytes; totalLogSize = totalSize; logSizeStats = stats; } } private long getDiskFreeSpace() { try { return fileStoreInfo.getUsableSpace(); } catch (IOException e) { throw EnvironmentFailureException.unexpectedException(env, e); } } private long getDiskTotalSpace() { try { return fileStoreInfo.getTotalSpace(); } catch (IOException e) { throw EnvironmentFailureException.unexpectedException(env, e); } } /** * Returns a message describing disk space limits and usage, regardless of * whether the limit is violated or not. If there is a limit violation, * returns the same value as {@link #getDiskLimitViolation()}. Does not * return null. */ public String getDiskLimitMessage() { return diskUsageMessage; } /** * Uses cached disk usage info to determine whether disk space limits are * currently violated. This method simply returns a volatile field. The * cached information is updated frequently enough to prevent violating the * limits by a large amount. * * @return a non-null message (appropriate for an exception) if a disk * limit is currently violated, else null. */ public String getDiskLimitViolation() { return diskUsageViolationMessage; } long getMaxDiskOverage() { return maxDiskOverage; } long getFreeDiskShortage() { return freeDiskShortage; } /** * Returns a copy of the cleaned files at the time a checkpoint starts. * *

If {@link CheckpointStartCleanerState#isEmpty} returns false, the * checkpoint should flush an extra level, and {@link * #updateFilesAtCheckpointEnd} should be called when the checkpoint is * complete.

*/ public CheckpointStartCleanerState getFilesAtCheckpointStart() { /* Pending LNs can prevent file deletion. */ processPending(); return fileSelector.getFilesAtCheckpointStart(env, logger); } /** * When a checkpoint is complete, update the files that were returned at * the beginning of the checkpoint. */ public void updateFilesAtCheckpointEnd(CheckpointStartCleanerState info) { /* Update cleaned file status and get newly reserved files. */ final Map reservedFiles = fileSelector.updateFilesAtCheckpointEnd(env, info); /* * Insert reserved file db record and delete other (unnecessary) * metadata for reserved files. */ profile.reserveFiles(reservedFiles); /* Try deleting files since file status may have changed. */ manageDiskUsage(); /* * Periodically process completed expiration trackers. This is done * here in case cleaner threads are disabled. */ expirationProfile.processCompletedTrackers(); } /** * If any LNs or databases are pending, process them. This method should * be called often enough to prevent the pending LN set from growing too * large. * * If we do this too seldom, the pending LN queue may grow large, and it * isn't budgeted memory. If we process it too often, we will * repeatedly attempt to migrate pending LNs, which involves Btree * lookups and lock attempts. */ void processPending() { /* * This method is not synchronized because that would block cleaner * and checkpointer threads unnecessarily. However, we do prevent * reentrancy, for two reasons: * 1. It is wasteful for two threads to process the same pending * entries. * 2. Many threads calling getDb may increase the liklihood of * livelock. [#20816] */ if (!processPendingReentrancyGuard.compareAndSet(false, true)) { return; } try { final DbTree dbMapTree = env.getDbTree(); final LockManager lockManager = env.getTxnManager().getLockManager(); final Map pendingLNs = fileSelector.getPendingLNs(); if (pendingLNs != null) { final TreeLocation location = new TreeLocation(); for (final Map.Entry entry : pendingLNs.entrySet()) { if (!env.isValid()) { return; } if (diskUsageViolationMessage != null) { break; /* We can't write. */ } nPendingLNsProcessed.increment(); final long logLsn = entry.getKey(); final LNInfo info = entry.getValue(); final byte[] key = info.getKey(); final DatabaseId dbId = info.getDbId(); final DatabaseImpl db = dbMapTree.getDb(dbId, lockTimeout); try { if (env.expiresWithin( info.getExpirationTime(), 0 - env.getTtlLnPurgeDelay())) { if (lockManager.isLockUncontended(logLsn)) { fileSelector.removePendingLN(logLsn); nLNsExpired.increment(); nLNsObsolete.increment(); } else { nPendingLNsLocked.increment(); } continue; } if (env.getExtinctionState(db, key) == EXTINCT) { fileSelector.removePendingLN(logLsn); nLNsExtinct.increment(); nLNsObsolete.increment(); continue; } /* Evict before processing each entry. */ if (DO_CRITICAL_EVICTION) { env.daemonEviction(true /*backgroundIO*/); } processPendingLN(logLsn, db, key, location); } finally { dbMapTree.releaseDb(db); } } } final List pendingDBs = fileSelector.getPendingDBs(); if (pendingDBs != null) { for (final DatabaseId dbId : pendingDBs) { if (!env.isValid()) { return; } nPendingDBsProcessed.increment(); final DatabaseImpl db = dbMapTree.getDb(dbId, lockTimeout); try { if (db == null) { fileSelector.removePendingDB(dbId); } else { nPendingDBsIncomplete.increment(); } } finally { dbMapTree.releaseDb(db); } } } } finally { processPendingReentrancyGuard.set(false); } } /** * Processes a pending LN, getting the lock first to ensure that the * overhead of retries is minimal. */ private void processPendingLN( final long logLsn, final DatabaseImpl db, final byte[] keyFromLog, final TreeLocation location) { boolean parentFound; // We found the parent BIN. boolean processedHere = true; // The LN was cleaned here. boolean lockDenied = false; // The LN lock was denied. boolean obsolete = false; // The LN is no longer in use. boolean completed = false; // This method completed. BasicLocker locker = null; BIN bin = null; try { /* * If the DB is gone, this LN is obsolete. If delete cleanup is in * progress, put the DB into the DB pending set; this LN will be * declared deleted after the delete cleanup is finished. */ if (db == null || db.isDeleting()) { if (db != null) { addPendingDB(DbLsn.getFileNumber(logLsn), db.getId()); } nLNsDead.increment(); obsolete = true; completed = true; return; } final Tree tree = db.getTree(); assert tree != null; /* * Get a non-blocking read lock on the original log LSN. If this * fails, then the original LSN is still write-locked. We may have * to lock again, if the LSN has changed in the BIN, but this * initial check prevents a Btree lookup in some cases. */ locker = BasicLocker.createBasicLocker(env, false /*noWait*/); /* Don't allow this short-lived lock to be preempted/stolen. */ locker.setPreemptable(false); final LockResult lockRet = locker.nonBlockingLock( logLsn, LockType.READ, false /*jumpAheadOfWaiters*/, db); if (lockRet.getLockGrant() == LockGrantType.DENIED) { /* Try again later. */ nPendingLNsLocked.increment(); lockDenied = true; completed = true; return; } /* * Search down to the bottom most level for the parent of this LN. */ parentFound = tree.getParentBINForChildLN( location, keyFromLog, false /*splitsAllowed*/, false /*blindDeltaOps*/, UPDATE_GENERATION); bin = location.bin; final int index = location.index; if (!parentFound) { nLNsDead.increment(); obsolete = true; completed = true; return; } /* Migrate an LN. */ processedHere = false; lockDenied = migratePendingLN(db, logLsn, bin.getLsn(index), bin, index); completed = true; } catch (RuntimeException e) { e.printStackTrace(); LoggerUtils.traceAndLogException( env, "com.sleepycat.je.cleaner.Cleaner", "processLN", "Exception thrown: ", e); throw e; } finally { if (bin != null) { bin.releaseLatch(); } if (locker != null) { locker.operationEnd(); } /* BIN must not be latched when synchronizing on FileSelector. */ if (completed && !lockDenied) { fileSelector.removePendingLN(logLsn); } /* * If migratePendingLN was not called above, perform tracing in * this method. */ if (processedHere) { logFine(CLEAN_PENDING_LN, null /*node*/, DbLsn.NULL_LSN, completed, obsolete, false /*migrated*/); } } } /** * Migrate a pending LN in the given BIN entry, if it is not obsolete. The * BIN must be latched on entry and is left latched by this method. * * @return whether migration could not be completed because the LN lock was * denied. */ private boolean migratePendingLN( final DatabaseImpl db, final long logLsn, final long treeLsn, final BIN bin, final int index) { /* Status variables are used to generate debug tracing info. */ boolean obsolete = false; // The LN is no longer in use. boolean migrated = false; // The LN was in use and is migrated. boolean completed = false; // This method completed. boolean clearTarget = false; // Node was non-resident when called. /* * If wasCleaned is false we don't count statistics unless we migrate * the LN. This avoids double counting. */ BasicLocker locker = null; LN ln = null; try { if (treeLsn == DbLsn.NULL_LSN) { /* This node was never written, no need to migrate. */ completed = true; return false; } /* If the record has been deleted, the logrec is obsolete */ if (bin.isEntryKnownDeleted(index)) { nLNsDead.increment(); obsolete = true; completed = true; return false; } /* * Get a non-blocking read lock on the LN. A pending node is * already locked, but the original pending LSN may have changed. * We must lock the current LSN to guard against aborts. */ if (logLsn != treeLsn) { locker = BasicLocker.createBasicLocker(env, false /*noWait*/); /* Don't allow this short-lived lock to be preempted/stolen. */ locker.setPreemptable(false); final LockResult lockRet = locker.nonBlockingLock( treeLsn, LockType.READ, false /*jumpAheadOfWaiters*/, db); if (lockRet.getLockGrant() == LockGrantType.DENIED) { /* * LN is currently locked by another Locker, so we can't * assume anything about the value of the LSN in the bin. */ nLNsLocked.increment(); completed = true; return true; } else { nLNsDead.increment(); obsolete = true; completed = true; return false; } } else if (bin.isEmbeddedLN(index)) { throw EnvironmentFailureException.unexpectedState( env, "LN is embedded although its associated logrec (at " + treeLsn + " does not have the embedded flag on"); } /* * Get the ln so that we can log it to its new position. * Notice that the fetchLN() call below will return null if the * slot is defunct and the LN has been purged by the cleaner. */ ln = (LN) bin.getTarget(index); if (ln == null) { ln = bin.fetchLN(index, CacheMode.EVICT_LN); clearTarget = !db.getId().equals(DbTree.ID_DB_ID); } /* Don't migrate defunct LNs. */ if (ln == null || ln.isDeleted()) { bin.setKnownDeletedAndEvictLN(index); nLNsDead.increment(); obsolete = true; completed = true; return false; } /* * Migrate the LN. * * Do not pass a locker, because there is no need to lock the new * LSN, as done for user operations. Another locker cannot attempt * to lock the new LSN until we're done, because we release the * lock before we release the BIN latch. */ final LogItem logItem = ln.log( env, db, null /*locker*/, null /*writeLockInfo*/, false /*newEmbeddedLN*/, bin.getKey(index), bin.getExpiration(index), bin.isExpirationInHours(), false /*currEmbeddedLN*/, treeLsn, bin.getLastLoggedSize(index), false /*isInsertion*/, true /*backgroundIO*/, getMigrationRepContext(ln)); bin.updateEntry( index, logItem.lsn, ln.getVLSNSequence(), logItem.size); nLNsMigrated.increment(); /* Lock new LSN on behalf of existing lockers. */ CursorImpl.lockAfterLsnChange( db, treeLsn, logItem.lsn, null /*excludeLocker*/); migrated = true; completed = true; return false; } finally { /* * If the node was originally non-resident, evict it now so that we * don't create more work for the evictor and reduce the cache * memory available to the application. */ if (clearTarget) { bin.evictLN(index); } if (locker != null) { locker.operationEnd(); } logFine( CLEAN_PENDING_LN, ln, treeLsn, completed, obsolete, migrated); } } /** * Returns the ReplicationContext to use for migrating the given LN. If * VLSNs are preserved in this Environment then the VLSN is logically part * of the data record, and LN.getVLSNSequence will return the VLSN, which * should be included in the migrated LN. */ static ReplicationContext getMigrationRepContext(LN ln) { long vlsnSeq = ln.getVLSNSequence(); if (vlsnSeq <= 0) { return ReplicationContext.NO_REPLICATE; } return new ReplicationContext(new VLSN(vlsnSeq), false /*inReplicationStream*/); } /** * Adds the DB ID to the pending DB set if it is being deleted but deletion * is not yet complete. */ void addPendingDB(Long file, DatabaseId id) { if (fileSelector.addPendingDB(file, id)) { LoggerUtils.logMsg(logger, env, Level.FINE, "CleanAddPendingDB " + id); } } /** * If the LSN at the cursor position refers to an LN in a reserved file, * and the LN may be fetched (is not embedded, etc) then repair the * problem by reactivating the file. * *

To guarantee that an active LN at the cursor position is detected, a * read lock on the record at the cursor position should be held when * this method is called. If a read lock is not held, a write lock may be * held by another thread and a different LSN may be restored to the BIN * slot after an abort.

* *

This method may release the record lock for the cursor's current * LSN. The lock is always held while determining the active LSN, but * released to performing more costly operations. If no costly operations * are necessary, the lock is not released.

* * @return true if the LSN could be checked, or false if it could not be * checked because the extinction filter returned MAYBE_EXTINCT. */ public boolean repairReservedFile(final DatabaseImpl dbImpl, final CursorImpl cursorImpl, final byte[] key) { /* Return early if LN is never fetched. */ if (dbImpl.isLNImmediatelyObsolete()) { return true; } /* * Latch the BIN to get the LSN and check other conditions for early * return. * * We call isDefunct (which calls isExpired) rather than calling * isProbablyExpired to avoid reactivating files unnecessarily. */ final long lsn; cursorImpl.latchBIN(); try { final BIN bin = cursorImpl.getBIN(); if (bin == null) { return true; } final int index = cursorImpl.getIndex(); lsn = bin.getLsn(index); if (DbLsn.isTransientOrNull(lsn) || bin.isEmbeddedLN(index) || bin.isDefunct(index)) { /* LN is never fetched. */ return true; } } finally { cursorImpl.releaseBIN(); } /* * Now that we have a known active LSN, release the record lock before * doing more expensive checks. */ env.getTxnManager().getLockManager().release( lsn, cursorImpl.getLocker()); /* * Check whether the LSN is in a reserved file. This gets a global * mutex that could be held by the cleaner, so do it after releasing * the BIN latch. */ final Long fileNum = DbLsn.getFileNumber(lsn); if (!env.getFileProtector().isReservedFile(fileNum)) { return true; } /* * Check extinction status after releasing BIN latch and checking for * a reserved file. It should be uncommon that a record is extinct * since the extinction scanner normally removes extinct slots * promptly. We can avoid the cost of getExtinctionState when the * file is not reserved. */ final ExtinctionStatus extinctionStatus = env.getExtinctionState(dbImpl, key); if (extinctionStatus == ExtinctionStatus.EXTINCT) { /* LN is never fetched. */ return true; } /* * If we cannot get the extinction status then the app's metadata is * probably unavailable, which is an error condition. We should not * reactivate the file because in an abnormal situation we might * reactivate and clean large numbers of files unnecessarily. Just * log a warning using a rate-limiting logger. */ if (extinctionStatus == ExtinctionStatus.MAYBE_EXTINCT) { LoggerUtils.logMsg( reservedFileRepairMaybeExtinctLogger, env, this, Level.WARNING, "Extinction status is MAYBE_EXTINCT. " + "Did not repair one or more reserved files. lsn=" + DbLsn.getNoFormatString(lsn)); return false; } assert extinctionStatus == ExtinctionStatus.NOT_EXTINCT; profile.reactivateReservedFile(fileNum); return true; } /** * Send trace messages to the java.util.logger. Don't rely on the logger * alone to conditionalize whether we send this message, we don't even want * to construct the message if the level is not enabled. */ void logFine(String action, Node node, long logLsn, boolean completed, boolean obsolete, boolean dirtiedMigrated) { if (logger.isLoggable(Level.FINE)) { StringBuilder sb = new StringBuilder(); sb.append(action); if (node instanceof IN) { sb.append(" node="); sb.append(((IN) node).getNodeId()); } sb.append(" logLsn="); sb.append(DbLsn.getNoFormatString(logLsn)); sb.append(" complete=").append(completed); sb.append(" obsolete=").append(obsolete); sb.append(" dirtiedOrMigrated=").append(dirtiedMigrated); LoggerUtils.logMsg(logger, env, Level.FINE, sb.toString()); } } /** * Release resources and update memory budget. Should only be called * when this environment is closed and will never be accessed again. */ public void close() { profile.close(); tracker.close(); fileSelector.close(env.getMemoryBudget()); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy