All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sleepycat.je.tree.IN Maven / Gradle / Ivy

The newest version!
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.tree;

import static com.sleepycat.je.EnvironmentFailureException.unexpectedState;
import static com.sleepycat.je.ExtinctionFilter.ExtinctionStatus.EXTINCT;
import static com.sleepycat.je.ExtinctionFilter.ExtinctionStatus.NOT_EXTINCT;

import java.io.FileNotFoundException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Comparator;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.sleepycat.je.CacheMode;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.cleaner.PackedObsoleteInfo;
import com.sleepycat.je.dbi.DatabaseId;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvironmentFailureReason;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.INList;
import com.sleepycat.je.dbi.MemoryBudget;
import com.sleepycat.je.dbi.TTL;
import com.sleepycat.je.evictor.Evictor;
import com.sleepycat.je.evictor.OffHeapCache;
import com.sleepycat.je.latch.LatchContext;
import com.sleepycat.je.latch.LatchFactory;
import com.sleepycat.je.latch.LatchSupport;
import com.sleepycat.je.latch.LatchTable;
import com.sleepycat.je.latch.SharedLatch;
import com.sleepycat.je.log.ErasedException;
import com.sleepycat.je.log.LogEntryType;
import com.sleepycat.je.log.LogItem;
import com.sleepycat.je.log.LogParams;
import com.sleepycat.je.log.LogUtils;
import com.sleepycat.je.log.Loggable;
import com.sleepycat.je.log.Provisional;
import com.sleepycat.je.log.ReplicationContext;
import com.sleepycat.je.log.WholeEntry;
import com.sleepycat.je.log.entry.BINDeltaLogEntry;
import com.sleepycat.je.log.entry.INLogEntry;
import com.sleepycat.je.log.entry.LNLogEntry;
import com.sleepycat.je.log.entry.LogEntry;
import com.sleepycat.je.tree.dupConvert.DBIN;
import com.sleepycat.je.tree.dupConvert.DIN;
import com.sleepycat.je.tree.dupConvert.DupConvert;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.SizeofMarker;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;
import com.sleepycat.je.utilint.VLSN;

/**
 * An IN represents an Internal Node in the JE tree.
 *
 * Explanation of KD (KnownDeleted) and PD (PendingDelete) entry flags
 * ===================================================================
 *
 * PD: set for all LN entries that are deleted, even before the LN is
 * committed.  Is used as an authoritative (transactionally correct) indication
 * that an LN is deleted. PD will be cleared if the txn for the deleted LN is
 * aborted.
 *
 * KD: set under special conditions for entries containing LNs which are known
 * to be obsolete.  Not used for entries in an active/uncommitted transaction.
 *
 * First notice that IN.fetchLN will allow a FileNotFoundException when the
 * PD or KD flag is set on the entry.  And it will allow a NULL_LSN when the KD
 * flag is set.
 *
 * KD was implemented first, and was originally used when the cleaner attempts
 * to migrate an LN and discovers it is deleted (see Cleaner.migrateLN). We
 * need KD because the INCompressor may not have run, and may not have
 * compressed the BIN. There's the danger that we'll try to fetch that entry,
 * and that the file was deleted by the cleaner.
 *
 * KD was used more recently when an unexpected exception occurs while logging
 * an LN, after inserting the entry.  Rather than delete the entry to clean up,
 * we mark the entry KD so it won't cause a fetch error later.  In this case
 * the entry LSN is NULL_LSN. See Tree.insertNewSlot.
 *
 * PD is closely related to the first use of KD above (for cleaned deleted LNs)
 * and came about because of a cleaner optimization we make. The cleaner
 * considers all deleted LN log entries to be obsolete, without doing a tree
 * lookup, and without any record of an obsolete offset.  This makes the cost
 * of cleaning of deleted LNs very low.  For example, if the log looks like
 * this:
 *
 * 100  LNA
 * 200  delete of LNA
 *
 * then LSN 200 will be considered obsolete when this file is processed by the
 * cleaner. After all, only two things can happen: (1) the txn commits, and we
 * don't need LSN 200, because we can wipe this LN out of the tree, or (2) the
 * txn aborts, and we don't need LSN 200, because we are going to revert to LSN
 * 100/LNA.
 *
 * We set PD for the entry of a deleted LN at the time of the operation, and we
 * clear PD if the transaction aborts.  There is no real danger that this log
 * entry will be processed by the cleaner before it's committed, because
 * cleaning can only happen after the first active LSN.
 *
 * Just as in the first use of KD above, setting PD is necessary to avoid a
 * fetch error, when the file is deleted by the cleaner but the entry
 * containing the deleted LN has not been deleted by the INCompressor.
 *
 * PD is also set in replication rollback, when LNs are marked as
 * invisible.
 *
 * When LSN locking was implemented (see CursorImpl.lockLN), the PD flag took
 * on additional meaning.  PD is used to determine whether an LN is deleted
 * without fetching it, and therefore is relied on to be transactionally
 * correct.
 *
 * In addition to the setting and use of the KD/PD flags described above, the
 * situation is complicated by the fact that we must restore the state of these
 * flags during abort, recovery, and set them properly during slot reuse.
 *
 * We have been meaning to consider whether PD and KD can be consolidated into
 * one flag: simply the Deleted flag.  The Deleted flag would be set in the
 * same way as PD is currently set, as well as the second use of KD described
 * above (when the LSN is NULL_LSN after an insertion error).  The use of KD
 * and PD for invisible entries and recovery rollback should also be
 * considered.
 *
 * If we consolidate the two flags and set the Deleted flag during a delete
 * operation (like PD), we'll have to remove optimizations (in CursorImpl for
 * example) that consider a slot deleted when KD is set.  Since KD is rarely
 * set currently, this shouldn't have a noticeable performance impact.
 */
public class IN extends Node implements Comparable, LatchContext {

    private static final String BEGIN_TAG = "";
    private static final String END_TAG = "";
    private static final String TRACE_SPLIT = "Split:";
    private static final String TRACE_DELETE = "Delete:";

    private static final int BYTES_PER_LSN_ENTRY = 4;
    public static final int MAX_FILE_OFFSET = 0xfffffe;
    private static final int THREE_BYTE_NEGATIVE_ONE = 0xffffff;

    /**
     * Used as the "empty rep" for the INLongRep offHeapBINIds field.
     *
     * minLength is 3 because BIN IDs are LRU list indexes. Initially 100k
     * indexes are allocated and the largest values are used first.
     *
     * allowSparseRep is true because some workloads will only load BIN IDs for
     * a subset of the BINs in the IN.
     */
    private static final INLongRep.EmptyRep EMPTY_OFFHEAP_BIN_IDS =
        new INLongRep.EmptyRep(3, true);

    /*
     * Levels:
     * The mapping tree has levels in the 0x20000 -> 0x2ffff number space.
     * The main tree has levels in the 0x10000 -> 0x1ffff number space.
     * The duplicate tree levels are in 0-> 0xffff number space.
     */
    public static final int DBMAP_LEVEL = 0x20000;
    public static final int MAIN_LEVEL = 0x10000;
    public static final int LEVEL_MASK = 0x0ffff;
    public static final int MIN_LEVEL = -1;
    public static final int BIN_LEVEL = MAIN_LEVEL | 1;

    /* Used to indicate that an exact match was found in findEntry. */
    public static final int EXACT_MATCH = (1 << 16);

    /* Used to indicate that an insert was successful. */
    public static final int INSERT_SUCCESS = (1 << 17);

    /*
     * A bit flag set in the return value of partialEviction() to indicate
     * whether the IN is evictable or not.
     */
    public static final long NON_EVICTABLE_IN = (1L << 62);

    /*
     * Boolean properties of an IN, encoded as bits inside the flags
     * data member.
     */
    private static final int IN_DIRTY_BIT = 0x1;
    private static final int IN_RECALC_TOGGLE_BIT = 0x2;
    private static final int IN_IS_ROOT_BIT = 0x4;
    private static final int IN_HAS_CACHED_CHILDREN_BIT = 0x8;
    private static final int IN_PRI2_LRU_BIT = 0x10;
    private static final int IN_DELTA_BIT = 0x20;
    private static final int IN_FETCHED_COLD_BIT = 0x40;
    private static final int IN_FETCHED_COLD_OFFHEAP_BIT = 0x80;
    private static final int IN_RESIDENT_BIT = 0x100;
    private static final int IN_PROHIBIT_NEXT_DELTA_BIT = 0x200;
    private static final int IN_EXPIRATION_IN_HOURS = 0x400;

    /* Tracing for LRU-related ops */
    private static final boolean traceLRU = false;
    private static final boolean traceDeltas = false;
    private static final Level traceLevel = Level.INFO;

    DatabaseImpl databaseImpl;

    private int level;

    /* The unique id of this node. */
    long nodeId;

    /* Some bits are persistent and some are not, see serialize. */
    int flags;

    /*
     * The identifier key is a key that can be used used to search for this IN.
     * Initially it is the key of the zeroth slot, but insertions prior to slot
     * zero make this no longer true.  It is always equal to some key in the
     * IN, and therefore it is changed by BIN.compress when removing slots.
     */
    private byte[] identifierKey;

    int nEntries;

    byte[] entryStates;

    /*
     * entryKeys contains the keys in their entirety if key prefixing is not
     * being used. If prefixing is enabled, then keyPrefix contains the prefix
     * and entryKeys contains the suffixes. Records with small enough data
     * (smaller than the value je.tree.maxEmbeddedLN param) are stored in
     * their entirity (both key (or key suffix) and data) inside BINs. This is
     * done by combining the record key and data as a two-part key (see the
     * dbi/DupKeyData class) and storing the resulting array in entryKeys.
     * A special case is when the record to be embedded has no data. Then,
     * the two-part key format is not used, but instead the NO_DATA_LN_BIT
     * is turned on in the slot's state. This saves the space overhead of
     * using the two-part key format.
     */
    INKeyRep entryKeys;
    byte[] keyPrefix;

    /*
     * The following entryLsnXXX fields are used for storing LSNs.  There are
     * two possible representations: a byte array based rep, and a long array
     * based one.  For compactness, the byte array rep is used initially.  A
     * single byte[] that uses four bytes per LSN is used. The baseFileNumber
     * field contains the lowest file number of any LSN in the array.  Then for
     * each entry (four bytes each), the first byte contains the offset from
     * the baseFileNumber of that LSN's file number.  The remaining three bytes
     * contain the file offset portion of the LSN.  Three bytes will hold a
     * maximum offset of 16,777,214 (0xfffffe), so with the default JE log file
     * size of 10,000,000 bytes this works well.
     *
     * If either (1) the difference in file numbers exceeds 127
     * (Byte.MAX_VALUE) or (2) the file offset is greater than 16,777,214, then
     * the byte[] based rep mutates to a long[] based rep.
     *
     * In the byte[] rep, DbLsn.NULL_LSN is represented by setting the file
     * offset bytes for a given entry to -1 (0xffffff).
     *
     * Note: A compact representation will be changed to the non-compact one,
     * if needed, but in the current implementation, the reverse mutation
     * (from long to compact) never takes place.
     */
    long baseFileNumber;
    byte[] entryLsnByteArray;
    long[] entryLsnLongArray;
    public static boolean disableCompactLsns; // DbCacheSize only

    /*
     * The children of this IN. Only the ones that are actually in the cache
     * have non-null entries. Specialized sparse array represents are used to
     * represent the entries. The representation can mutate as modifications
     * are made to it.
     */
    INTargetRep entryTargets;

    /*
     * In a level 2 IN, the LRU IDs of the child BINs.
     */
    private INLongRep offHeapBINIds = EMPTY_OFFHEAP_BIN_IDS;

    long inMemorySize;

    /*
     * accumluted memory budget delta.  Once this exceeds
     * MemoryBudget.ACCUMULATED_LIMIT we inform the MemoryBudget that a change
     * has occurred.  See SR 12273.
     */
    private int accumulatedDelta = 0;

    /*
     * Max allowable accumulation of memory budget changes before MemoryBudget
     * should be updated. This allows for consolidating multiple calls to
     * updateXXXMemoryBudget() into one call.  Not declared final so that the
     * unit tests can modify it.  See SR 12273.
     */
    public static final int ACCUMULATED_LIMIT_DEFAULT = 1000;
    public static int ACCUMULATED_LIMIT = ACCUMULATED_LIMIT_DEFAULT;

    /**
     * References to the next and previous nodes in an LRU list. If the node
     * is not in any LRUList, both of these will be null. If the node is at
     * the front/back of an LRUList, prevLRUNode/nextLRUNode will point to
     * the node itself.
     */
    private IN nextLRUNode = null;
    private IN prevLRUNode = null;

    /*
     * Let L be the most recently written logrec for this IN instance.
     * (a) If this is a UIN, lastFullVersion is the lsn of L.
     * (b) If this is a BIN instance and L is a full-version logrec,
     *     lastFullVersion is the lsn of L.
     * (c) If this is a BIN instance and L is a delta logrec, lastFullVersion
     *     is the lsn of the most recently written full-version logrec for the
     *     same BIN.
     *
     * It is set in 2 cases:
     *
     * (a) after "this" is created via reading a logrec L, lastFullVersion is
     * set to L's lsn, if L is a UIN or a full BIN. (this is done in
     * IN.postFetch/RecoveryInit(), via IN.setLastLoggedLsn()). If L is a BIN
     * delta, lastFullVersion is set by BINDeltaLogEntry.readEntry() to
     * L.prevFullLsn.
     *
     * (b) After logging a UIN or a full-BIN logrec, it is set to the LSN of
     * the logrec written. This is done in IN.afterLog().
     *
     * Notice that this is a persistent field, but except from case (c), when
     * reading a logrec L, it is set not to the value found in L, but to the
     * lsn of L. This is why its read/write is managed by the INLogEntry class
     * rather than the IN readFromLog/writeFromLog methods.
     */
    long lastFullVersion = DbLsn.NULL_LSN;

    /*
     * BINs have a lastDeltaVersion data field as well, which is defined as
     * follows:
     *
     * Let L be the most recently written logrec for this BIN instance. If
     * L is a full-version logrec, lastDeltaVersion is NULL; otherwise it
     * is the lsn of L.
     *
     * It is used for obsolete tracking.
     *
     * It is set in 2 cases:
     *
     * (a) after "this" is created via reading a logrec L, lastDeltaVersion
     * is set to L's lsn, if L is a BIN-delta logrec, or to NULL if L is a
     * full-BIN logrec (this is done in IN.postFetch/RecoveryInit(), via
     * BIN.setLastLoggedLsn()).
     *
     * (b) After we write a logrec L for this BIN instance, lastDeltaVersion
     * is set to NULL if L is a full-BIN logrec, or to L's lsn, if L is a
     * BIN-delta logrec (this is done in BIN.afterLog()).
     *
     * Notice that this is a persistent field, but when reading a logrec L,
     * it is set not to the value found in L, but to the lsn of L. This is why
     * its read/write is managed by the INLogEntry class rather than the IN
     * readFromLog/writeFromLog methods.
     *
     * private long lastDeltaVersion = DbLsn.NULL_LSN;
     */


    /*
     * A sequence of obsolete info that cannot be counted as obsolete until an
     * ancestor IN is logged non-provisionally.
     */
    private PackedObsoleteInfo provisionalObsolete;

    /* See convertDupKeys. */
    private boolean needDupKeyConversion;

    private int pinCount = 0;

    private SharedLatch latch;

    private IN parent;

    private TestHook fetchINHook;

    /**
     * Create an empty IN, with no node ID, to be filled in from the log.
     */
    public IN() {
        init(null, Key.EMPTY_KEY, 0, 0);
    }

    /**
     * Create a new IN.
     */
    public IN(
        DatabaseImpl dbImpl,
        byte[] identifierKey,
        int capacity,
        int level) {

        nodeId = dbImpl.getEnv().getNodeSequence().getNextLocalNodeId();

        init(dbImpl, identifierKey, capacity,
            generateLevel(dbImpl.getId(), level));

        initMemorySize();
    }

    /**
     * For Sizeof.
     */
    public IN(@SuppressWarnings("unused") SizeofMarker marker) {

        /*
         * Set all variable fields to null, since they are not part of the
         * fixed overhead.
         */
        entryTargets = null;
        entryKeys = null;
        keyPrefix = null;
        entryLsnByteArray = null;
        entryLsnLongArray = null;
        entryStates = null;

        latch = LatchFactory.createSharedLatch(
            LatchSupport.DUMMY_LATCH_CONTEXT, isAlwaysLatchedExclusively());

        /*
         * Use the latch to force it to grow to "runtime size".
         */
        latch.acquireExclusive();
        latch.release();
        latch.acquireExclusive();
        latch.release();
    }

    /**
     * Create a new IN.  Need this because we can't call newInstance() without
     * getting a 0 for nodeId.
     */
    IN createNewInstance(
        byte[] identifierKey,
        int maxEntries,
        int level) {
        return new IN(databaseImpl, identifierKey, maxEntries, level);
    }

    /**
     * Initialize IN object.
     */
    protected void init(
        DatabaseImpl db,
        @SuppressWarnings("hiding")
        byte[] identifierKey,
        int initialCapacity,
        @SuppressWarnings("hiding")
        int level) {

        setDatabase(db);
        latch = LatchFactory.createSharedLatch(
            this, isAlwaysLatchedExclusively());
        flags = 0;
        nEntries = 0;
        this.identifierKey = identifierKey;
        entryTargets = INTargetRep.NONE;
        entryKeys = new INKeyRep.Default(initialCapacity);
        keyPrefix = null;
        baseFileNumber = -1;

        /*
         * Normally we start out with the compact LSN rep and then mutate to
         * the long rep when needed.  But for some purposes (DbCacheSize) we
         * start out with the long rep and never use the compact rep.
         */
        if (disableCompactLsns) {
            entryLsnByteArray = null;
            entryLsnLongArray = new long[initialCapacity];
        } else {
            entryLsnByteArray = new byte[initialCapacity << 2];
            entryLsnLongArray = null;
        }

        entryStates = new byte[initialCapacity];
        this.level = level;
    }

    @Override
    public final boolean isIN() {
        return true;
    }

    @Override
    public final boolean isUpperIN() {
        return !isBIN();
    }

    @Override
    public final String getLatchName() {
        return shortClassName() + getNodeId();
    }

    @Override
    public final int getLatchTimeoutMs() {
        return databaseImpl.getEnv().getLatchTimeoutMs();
    }

    @Override
    public final LatchTable getLatchTable() {
        return LatchSupport.btreeLatchTable;
    }

    /*
     * Return whether the shared latch for this kind of node should be of the
     * "always exclusive" variety.  Presently, only IN's are actually latched
     * shared.  BINs are latched exclusive only.
     */
    boolean isAlwaysLatchedExclusively() {
        return false;
    }

    /**
     * Latch this node if it is not latched by another thread. Update the LRU
     * using the given cacheMode if the latch succeeds.
     */
    public final boolean latchNoWait(CacheMode cacheMode) {
        if (!latch.acquireExclusiveNoWait()) {
            return false;
        }
        updateLRU(cacheMode);
        return true;
    }

    /**
     * Latch this node exclusive and update the LRU using the given cacheMode.
     */
    public void latch(CacheMode cacheMode) {
        latch.acquireExclusive();
        updateLRU(cacheMode);
    }

    /**
     * Latch this node exclusive and update the LRU using the default cacheMode.
     */
    public final void latch() {
        latch(CacheMode.DEFAULT);
    }

    /**
     * Latch this node shared and update the LRU using the given cacheMode.
     */
    @Override
    public void latchShared(CacheMode cacheMode) {
        latch.acquireShared();
        updateLRU(cacheMode);
    }

    /**
     * Latch this node shared and update the LRU using the default cacheMode.
     */
    @Override
    public final void latchShared() {
        latchShared(CacheMode.DEFAULT);
    }

    /**
     * Latch this node exclusive and do not update the LRU or cause other
     * related side effects.
     *
     * @param db is passed in order to initialize the database for an
     * uninitialized node, which is necessary in order to latch it.
     */
    public final void latchNoUpdateLRU(DatabaseImpl db) {
        if (databaseImpl == null) {
            databaseImpl = db;
        }
        latch.acquireExclusive();
    }

    /**
     * Latch this node exclusive and do not update the LRU or cause other
     * related side effects.
     */
    public final void latchNoUpdateLRU() {
        assert databaseImpl != null;
        latch.acquireExclusive();
    }

    /**
     * Release the latch on this node.
     */
    @Override
    public final void releaseLatch() {
        latch.release();
    }

    /**
     * Release the latch on this node if it is owned.
     */
    public final void releaseLatchIfOwner() {
        latch.releaseIfOwner();
    }

    /**
     * @return true if this thread holds the IN's latch
     */
    public final boolean isLatchOwner() {
        return latch.isOwner();
    }

    public final boolean isLatchExclusiveOwner() {
        return latch.isExclusiveOwner();
    }

    /* For unit testing. */
    public final int getLatchNWaiters() {
        return latch.getNWaiters();
    }

    public final void updateLRU(CacheMode cacheMode) {

        if (!getInListResident()) {
            return;
        }

        switch (cacheMode) {
        case UNCHANGED:
        case MAKE_COLD:
            break;
        case DEFAULT:
        case EVICT_LN:
        case EVICT_BIN:
        case KEEP_HOT:
            setFetchedCold(false);
            setFetchedColdOffHeap(false);

            if (isBIN() || !hasCachedChildrenFlag()) {
                assert(isBIN() || !hasCachedChildren());
                getEvictor().moveBack(this);
            }
            break;
        default:
            assert false;
        }
    }

    /**
     * This method should be used carefully. Unless this node and the parent
     * are already known to be latched, call latchParent instead to access the
     * parent safely.
     */
    public IN getParent() {
        return parent;
    }

    public void setParent(IN in) {
        assert in != null;

        /*
         * Must hold EX-latch when changing a non-null parent. But when setting
         * the parent initially (it is currently null), we assume it is being
         * attached and no other threads have access to it.
         */
        if (parent != null && !isLatchExclusiveOwner()) {
            throw unexpectedState();
        }

        parent = in;
    }

    /**
     * Latches the parent exclusively, leaving this node latched. The parent
     * must not already be latched.
     *
     * This node must be latched on entry and will be latched on exit. This
     * node's latch may be released temporarily, in which case it will be
     * ex-latched (since the parent is ex-latched, this isn't a drawback).
     *
     * Does not perform cache mode processing, since this node is already
     * latched.
     *
     * @return the ex-latched parent, for which calling getKnownChildIndex with
     * this node is guaranteed to succeed.
     *
     * @throws EnvironmentFailureException (fatal) if the parent latch is
     * already held.
     */
    public final IN latchParent() {

        assert latch.isOwner();
        assert !isRoot();
        assert getParent() != null;

        while (true) {
            final IN p = getParent();

            if (p.latch.acquireExclusiveNoWait()) {
                return p;
            }

            pin();
            try {
                latch.release();
                p.latch.acquireExclusive();
                latch.acquireExclusive();
            } finally {
                unpin();
            }

            if (getParent() == p) {
                return p;
            }

            p.latch.release();
        }
    }

    /**
     * Returns the index of the given child. Should only be called when the
     * caller knows that the given child is resident.
     */
    public int getKnownChildIndex(final Node child) {

        for (int i = 0; i < nEntries; i += 1) {
            if (getTarget(i) == child) {
                return i;
            }
        }

        throw unexpectedState();
    }

    public final synchronized void pin() {
        assert(isLatchOwner());
        assert(pinCount >= 0);
        ++pinCount;
    }

    public final synchronized void unpin() {
        assert(pinCount > 0);
        --pinCount;
    }

    public final synchronized boolean isPinned() {
        assert(isLatchExclusiveOwner());
        assert(pinCount >= 0);
        return pinCount > 0;
    }

    /**
     * Get the database for this IN.
     */
    public final DatabaseImpl getDatabase() {
        return databaseImpl;
    }

    /**
     * Set the database reference for this node.
     */
    public final void setDatabase(DatabaseImpl db) {
        databaseImpl = db;
    }

    /*
     * Get the database id for this node.
     */
    public final DatabaseId getDatabaseId() {
        return databaseImpl.getId();
    }

    @Override
    public final EnvironmentImpl getEnvImplForFatalException() {
        return databaseImpl.getEnv();
    }

    public final EnvironmentImpl getEnv() {
        return databaseImpl.getEnv();
    }

    final Evictor getEvictor() {
        return databaseImpl.getEnv().getEvictor();
    }

    final OffHeapCache getOffHeapCache() {
        return databaseImpl.getEnv().getOffHeapCache();
    }

    /**
     * Convenience method to return the database key comparator.
     */
    public final Comparator getKeyComparator() {
        return databaseImpl.getKeyComparator();
    }

    @Override
    public final int getLevel() {
        return level;
    }

    public final int getNormalizedLevel() {
        return level & LEVEL_MASK;
    }

    private static int generateLevel(DatabaseId dbId, int newLevel) {
        if (dbId.equals(DbTree.ID_DB_ID)) {
            return newLevel | DBMAP_LEVEL;
        } else {
            return newLevel | MAIN_LEVEL;
        }
    }

    public final long getNodeId() {
        return nodeId;
    }

    /* For unit tests only. */
    final void setNodeId(long nid) {
        nodeId = nid;
    }

    /**
     * We would like as even a hash distribution as possible so that the
     * Evictor's LRU is as accurate as possible.  ConcurrentHashMap takes the
     * value returned by this method and runs its own hash algorithm on it.
     * So a bit complement of the node ID is sufficient as the return value and
     * is a little better than returning just the node ID.  If we use a
     * different container in the future that does not re-hash the return
     * value, we should probably implement the Wang-Jenkins hash function here.
     */
    @Override
    public final int hashCode() {
        return (int) ~getNodeId();
    }

    @Override
    public final boolean equals(Object obj) {
        if (!(obj instanceof IN)) {
            return false;
        }
        IN in = (IN) obj;
        return (this.getNodeId() == in.getNodeId());
    }

    /**
     * Sort based on equality key.
     */
    public final int compareTo(IN argIN) {
        long argNodeId = argIN.getNodeId();
        long myNodeId = getNodeId();

        if (myNodeId < argNodeId) {
            return -1;
        } else if (myNodeId > argNodeId) {
            return 1;
        } else {
            return 0;
        }
    }

    public final boolean getDirty() {
        return (flags & IN_DIRTY_BIT) != 0;
    }

    public final void setDirty(boolean dirty) {
        if (dirty) {
            flags |= IN_DIRTY_BIT;
        } else {
            flags &= ~IN_DIRTY_BIT;
        }
    }

    @Override
    public final boolean isBINDelta() {
        assert(isUpperIN() || isLatchOwner());
        return (flags & IN_DELTA_BIT) != 0;
    }

    /*
     * This version of isBINDelta() takes a checkLatched param to allow
     * for cases where it is ok to call the method without holding the
     * BIN latch (e.g. in single-threaded tests, or when the BIN is not
     * attached to the tree (and thus inaccessible from other threads)).
     */
    @Override
    public final boolean isBINDelta(boolean checkLatched) {
        assert(!checkLatched || isUpperIN() || isLatchOwner());
        return (flags & IN_DELTA_BIT) != 0;
    }

    final void setBINDelta(boolean delta) {
        if (delta) {
            flags |= IN_DELTA_BIT;
        } else {
            flags &= ~IN_DELTA_BIT;
        }
    }

    /**
     * Indicates that the BIN was fetched from disk, or loaded from the
     * off-heap cache, using CacheMode.UNCHANGED, and has not been accessed
     * with another CacheMode. BINs in this state should be evicted from main
     * cache as soon as they are no longer referenced by a cursor. If they were
     * loaded from off-heap cache, they should be stored off-heap when they are
     * evicted from main. The FetchedColdOffHeap flag indicates whether the
     * BIN was loaded from off-heap cache.
     */
    public final boolean getFetchedCold() {
        return (flags & IN_FETCHED_COLD_BIT) != 0;
    }

    /** @see #getFetchedCold() */
    public final void setFetchedCold(boolean val) {
        if (val) {
            flags |= IN_FETCHED_COLD_BIT;
        } else {
            flags &= ~IN_FETCHED_COLD_BIT;
        }
    }

    /** @see #getFetchedCold() */
    public final boolean getFetchedColdOffHeap() {
        return (flags & IN_FETCHED_COLD_OFFHEAP_BIT) != 0;
    }

    /** @see #getFetchedCold() */
    public final void setFetchedColdOffHeap(boolean val) {
        if (val) {
            flags |= IN_FETCHED_COLD_OFFHEAP_BIT;
        } else {
            flags &= ~IN_FETCHED_COLD_OFFHEAP_BIT;
        }
    }

    public final boolean getRecalcToggle() {
        return (flags & IN_RECALC_TOGGLE_BIT) != 0;
    }

    public final void setRecalcToggle(boolean toggle) {
        if (toggle) {
            flags |= IN_RECALC_TOGGLE_BIT;
        } else {
            flags &= ~IN_RECALC_TOGGLE_BIT;
        }
    }

    public final boolean isRoot() {
        return (flags & IN_IS_ROOT_BIT) != 0;
    }

    final void setIsRoot(boolean isRoot) {
        setIsRootFlag(isRoot);
        setDirty(true);
    }

    private void setIsRootFlag(boolean isRoot) {
        if (isRoot) {
            flags |= IN_IS_ROOT_BIT;
        } else {
            flags &= ~IN_IS_ROOT_BIT;
        }
    }

    public final boolean hasCachedChildrenFlag() {
        return (flags & IN_HAS_CACHED_CHILDREN_BIT) != 0;
    }

    private void setHasCachedChildrenFlag(boolean value) {
        if (value) {
            flags |= IN_HAS_CACHED_CHILDREN_BIT;
        } else {
            flags &= ~IN_HAS_CACHED_CHILDREN_BIT;
        }
    }

    public final boolean isInPri2LRU() {
        return (flags & IN_PRI2_LRU_BIT) != 0;
    }

    /* public for unit tests */
    public final void setInPri2LRU(boolean value) {
        if (value) {
            flags |= IN_PRI2_LRU_BIT;
        } else {
            flags &= ~IN_PRI2_LRU_BIT;
        }
    }

    public boolean isExpirationInHours() {
        return (flags & IN_EXPIRATION_IN_HOURS) != 0;
    }

    void setExpirationInHours(boolean value) {
        if (value) {
            flags |= IN_EXPIRATION_IN_HOURS;
        } else {
            flags &= ~IN_EXPIRATION_IN_HOURS;
        }
    }

    /**
     * @return the identifier key for this node.
     */
    public final byte[] getIdentifierKey() {
        return identifierKey;
    }

    /**
     * Set the identifier key for this node.
     *
     * @param key - the new identifier key for this node.
     *
     * @param makeDirty should normally be true, but may be false when an
     * expired slot containing the identifier key has been deleted.
     */
    public final void setIdentifierKey(byte[] key, boolean makeDirty) {

        assert(!isBINDelta());

        /*
         * The identifierKey is "intentionally" not kept track of in the
         * memory budget.  If we did, then it would look like this:

         int oldIDKeySz = (identifierKey == null) ?
                           0 :
                           MemoryBudget.byteArraySize(identifierKey.length);

         int newIDKeySz = (key == null) ?
                           0 :
                           MemoryBudget.byteArraySize(key.length);
         updateMemorySize(newIDKeySz - oldIDKeySz);

         */
        identifierKey = key;

        if (makeDirty) {
            setDirty(true);
        }
    }

    /**
     * @return the number of entries in this node.
     */
    public final int getNEntries() {
        return nEntries;
    }

    /**
     * @return the maximum number of entries in this node.
     *
     * Overriden by TestIN in INEntryTestBase.java
     */
    public int getMaxEntries() {
        return entryStates.length;
    }

    public final byte getState(int idx) {
        return entryStates[idx];
    }

    /**
     * @return true if the object is dirty.
     */
    public final boolean isDirty(int idx) {
        return ((entryStates[idx] & EntryStates.DIRTY_BIT) != 0);
    }

    /**
     * @return true if the idx'th entry has been deleted, although the
     * transaction that performed the deletion may not be committed.
     */
    public final boolean isEntryPendingDeleted(int idx) {
        return ((entryStates[idx] & EntryStates.PENDING_DELETED_BIT) != 0);
    }

    /**
     * Set pendingDeleted to true.
     */
    public final void setPendingDeleted(int idx) {

        entryStates[idx] |= EntryStates.PENDING_DELETED_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Set pendingDeleted to false.
     */
    final void clearPendingDeleted(int idx) {

        entryStates[idx] &= EntryStates.CLEAR_PENDING_DELETED_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * @return true if the idx'th entry is deleted for sure.  If a transaction
     * performed the deletion, it has been committed.
     */
    public final boolean isEntryKnownDeleted(int idx) {
        return ((entryStates[idx] & EntryStates.KNOWN_DELETED_BIT) != 0);
    }

    /**
     * Set KD flag to true and clear the PD flag (PD does not need to be on
     * if KD is on).
     */
    public final void setKnownDeleted(int idx) {

        assert(isBIN());

        entryStates[idx] |= EntryStates.KNOWN_DELETED_BIT;
        entryStates[idx] &= EntryStates.CLEAR_PENDING_DELETED_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Set knownDeleted flag to true and evict the child LN if cached. The
     * child LN is evicted to save memory, since it will never be fetched
     * again.
     */
    public final void setKnownDeletedAndEvictLN(int index) {

        assert(isBIN());

        setKnownDeleted(index);

        LN oldLN = (LN) getTarget(index);
        if (oldLN != null) {
            updateMemorySize(oldLN, null /* newNode */);
        }
        setTarget(index, null);
    }

    /**
     * Set knownDeleted to false.
     */
    final void clearKnownDeleted(int idx) {

        assert(isBIN());

        entryStates[idx] &= EntryStates.CLEAR_KNOWN_DELETED_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /*
     * In the future we may want to move the following static methods to an
     * EntryState utility class and share all state bit twidling among IN,
     * ChildReference, and DeltaInfo.
     */

    /**
     * Returns true if the given state is known deleted.
     */
    static boolean isStateKnownDeleted(byte state) {
        return ((state & EntryStates.KNOWN_DELETED_BIT) != 0);
    }

    /**
     * Returns true if the given state is pending deleted.
     */
    static boolean isStatePendingDeleted(byte state) {
        return ((state & EntryStates.PENDING_DELETED_BIT) != 0);
    }

    /**
     * Return true if the LN at the given slot is embedded.
     */
    public final boolean isEmbeddedLN(int idx) {
        return ((entryStates[idx] & EntryStates.EMBEDDED_LN_BIT) != 0);
    }

    public static boolean isEmbeddedLN(byte state) {
        return ((state & EntryStates.EMBEDDED_LN_BIT) != 0);
    }

    /**
     * Set embeddedLN to true.
     */
    private void setEmbeddedLN(int idx) {

        entryStates[idx] |= EntryStates.EMBEDDED_LN_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Set embeddedLN to false.
     */
    private void clearEmbeddedLN(int idx) {

        entryStates[idx] &= EntryStates.CLEAR_EMBEDDED_LN_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Return true if the LN at the given slot is an embedded LN with no data.
     */
    public final boolean isNoDataLN(int idx) {
        return ((entryStates[idx] & EntryStates.NO_DATA_LN_BIT) != 0);
    }

    public static boolean isNoDataLN(byte state) {
        return ((state & EntryStates.NO_DATA_LN_BIT) != 0);
    }

    /**
     * Set noDataLN to true.
     */
    void setNoDataLN(int idx) {

        entryStates[idx] |= EntryStates.NO_DATA_LN_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Set noDataLN to false.
     */
    private void clearNoDataLN(int idx) {

        entryStates[idx] &= EntryStates.CLEAR_NO_DATA_LN_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /*
     *
     */
    public final boolean haveEmbeddedData(int idx) {
        return (isEmbeddedLN(idx) && !isNoDataLN(idx));
    }

    /* For unit testing */
    public final int getNumEmbeddedLNs() {
        int res = 0;
        for (int i = 0; i < getNEntries(); ++i) {
            if (isEmbeddedLN(i)) {
                ++res;
            }
        }

        return res;
    }

    /* For unit testing */
    public final INKeyRep getKeyVals() {
        return entryKeys;
    }

    public final byte[] getKeyPrefix() {
        return keyPrefix;
    }

    /*
     * For unit testing only
     */
    public final boolean hasKeyPrefix() {
        return keyPrefix != null;
    }

    /* This has default protection for access by the unit tests. */
    final void setKeyPrefix(byte[] keyPrefix) {

        assert databaseImpl != null;

        int prevLength = (this.keyPrefix == null) ? 0 : this.keyPrefix.length;
        this.keyPrefix = keyPrefix;
        /* Update the memory budgeting to reflect changes in the key prefix. */
        int currLength = (keyPrefix == null) ? 0 : keyPrefix.length;
        updateMemorySize(prevLength, currLength);
    }

    /**
     * Return the idx'th key. If prefixing is enabled, construct a new byte[]
     * containing the prefix and suffix. If prefixing is not enabled, just
     * return the current byte[] in entryKeys.
     */
    public final byte[] getKey(int idx) {

        assert idx < nEntries;

        byte[] key = entryKeys.getFullKey(
            keyPrefix, idx, haveEmbeddedData(idx));

        assert(key != null);

        return key;
    }

    public final byte[] getData(int idx) {

        if (haveEmbeddedData(idx)) {
            return entryKeys.getData(idx);
        }

        if (isNoDataLN(idx)) {
            return Key.EMPTY_KEY;
        }

        return null;
    }

    /**
     * Returns the size of the key that is stored persistently, which will be
     * the combined key-data for an embedded LN or duplicated DB record.
     */
    int getStoredKeySize(int idx) {
        return entryKeys.size(idx);
    }

    /**
     * Updates the key in the idx-th slot of this BIN, if the DB allows key
     * updates and the new key is not identical to the current key in the slot.
     * It also updates the data (if any) that is embedded with the key in the
     * idx-slot, or embeds new data in that slot, is the "data" param is
     * non-null, or removes embedded data, if "data" is null. Finally, it
     * sets the EMBEDDED_LN_BIT and NO_DATA_LN_BIT flags in the slot's state.
     *
     * @param key is the key to set in the slot and is the LN key.
     *
     * @param data If the data portion of a record must be embedded in this
     * BIN, "data" stores the record's data. Null otherwise. See also comment
     * for the keyEntries field. 
     * 
     * @return true if a multi-slot change was made and the complete IN memory
     * size must be updated.
     */
    private boolean updateLNSlotKey(int idx, byte[] key, byte[] data) {

        assert(isBIN());

        boolean haveEmbeddedData = haveEmbeddedData(idx);

        if (data == null) {
            if (isEmbeddedLN(idx)) {
                clearEmbeddedLN(idx); 
                clearNoDataLN(idx);
            }
        } else {
            if (!isEmbeddedLN(idx)) {
                setEmbeddedLN(idx);
            }
            if (data.length == 0) {
                setNoDataLN(idx);
            } else {
                clearNoDataLN(idx);
            }
        }

        /*
         * The new key may be null if a dup LN was deleted, in which case there
         * is no need to update it.  There is no need to compare keys if there
         * is no comparator configured, since a key cannot be changed when the
         * default comparator is used.
         */
        if (key != null &&
            (databaseImpl.allowsKeyUpdates() ||
             DupConvert.needsConversion(databaseImpl)) &&
            !Arrays.equals(key, getKey(idx))) {

            setDirty(true);
            return setKey(idx, key, data, false);

        } else if (haveEmbeddedData) {

            /*
             * The key does not change, but the slot contains embedded data,
             * which must now either be removed (if data == null or
             * data.length == 0) or updated.
             * TODO #21488: update the data only if it actually changes.
             */
            setDirty(true);
            entryStates[idx] |= EntryStates.DIRTY_BIT;

            INKeyRep.Type oldRepType = entryKeys.getType();
            entryKeys = entryKeys.setData(idx, data, this);
            return oldRepType != entryKeys.getType();

        } else if (data != null && data.length != 0) {

            /*
             * The key does not change, but we now have to embed data in a slot
             * that does not currently have embedded data.
             */
            setDirty(true);
            entryStates[idx] |= EntryStates.DIRTY_BIT;

            key = entryKeys.getKey(idx, false);
            INKeyRep.Type oldRepType = entryKeys.getType();
            entryKeys = entryKeys.set(idx, key, data, this);
            return oldRepType != entryKeys.getType();

        } else {
            return false;
        }
    }

    /*
     * Convenience wrapper for setKey() method below
     */
    private boolean insertKey(
        int idx,
        byte[] key,
        byte[] data) {

        /*
         * Set the id key when inserting the first entry. This is important
         * when compression removes all entries from a BIN, and then an entry
         * is inserted before the empty BIN is purged.
         */
        if (nEntries == 1 && !isBINDelta()) {
            setIdentifierKey(key, true /*makeDirty*/);
        }

        return setKey(idx, key, data, true);
    }

    // TODO re-enable this and figure out why it is firing

    private boolean idKeyIsSlotKey() {

        if (true) {
            return true;
        }

        if (!isBIN() || nEntries == 0) {
            return true;
        }

        for (int i = 0; i < nEntries; i += 1) {

            if (entryKeys.compareKeys(
                identifierKey,  keyPrefix, i, haveEmbeddedData(i),
                databaseImpl.getKeyComparator()) == 0) {

                return true;
            }
        }

        return false;
    }

    /*
     * Convenience wrapper for setKey() method below. It is used for
     * upper INs only, so no need to worry about the EMBEDDED_LN_BIT
     * and NO_DATA_LN_BIT flags.
     */
    private boolean updateKey(
        int idx,
        byte[] key,
        byte[] data) {
        return setKey(idx, key, data, false);
    }

    /**
     * This method inserts or updates a key at a given slot. In either case,
     * the associated embedded data (if any) is inserted or updated as well,
     * and the key prefix is adjusted, if necessary.
     *
     * In case of insertion (indicated by a true value for the isInsertion
     * param), it is assumed that the idx slot does not store any valid info,
     * so any change to the key prefix (if any) is due to the insertion of
     * this new new key and not to the removal of the current key at the idx
     * slot.
     *
     * In case of update, the method does not check if the current key is
     * indeed different from the new key; it just updates the key
     * unconditionally. If the slot has embedded data, that data will also
     * be updated (if the data param is not null), or be removed (if the data
     * param is null). If the slot does not have embedded data and the data
     * param is not null, the given data will be embedded.
     *
     * Note: For BINs, the maintenance of the EMBEDDED_LN_BIT andNO_DATA_LN_BIT
     * is done by the callers of this method.
     *
     * @param data If the data portion of a record must be embedded in this
     * BIN, "data" stores the record's data. Null otherwise. See also comment
     * for the keyEntries field. 
     *
     * @return true if a multi-slot change was made and the complete IN memory
     * size must be updated.
     */
    public boolean setKey(
        int idx,
        byte[] key,
        byte[] data,
        boolean isInsertion) {

        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);

        /*
         * Only compute key prefix if prefixing is enabled and there's an
         * existing prefix.
         */
        if (databaseImpl.getKeyPrefixing() && keyPrefix != null) {

            int newPrefixLen = Key.getKeyPrefixLength(
                keyPrefix, keyPrefix.length, key);

            if (newPrefixLen < keyPrefix.length) {

                /*
                 * The new key doesn't share the current prefix, so recompute
                 * the prefix and readjust all the existing suffixes.
                 */
                byte[] newPrefix = (isInsertion ?
                                    Key.createKeyPrefix(keyPrefix, key) :
                                    computeKeyPrefix(idx));

                if (newPrefix != null) {
                    /* Take the new key into consideration for new prefix. */
                    newPrefix = Key.createKeyPrefix(newPrefix, key);
                }

                recalcSuffixes(newPrefix, key, data, idx);
                return true;

            } else {

                INKeyRep.Type prevRepType = entryKeys.getType();

                byte[] suffix = computeKeySuffix(keyPrefix, key);
                entryKeys = entryKeys.set(idx, suffix, data, this);

                return prevRepType != entryKeys.getType();
            }

        } else if (keyPrefix != null) {

            /*
             * Key prefixing has been turned off on this database, but there
             * are existing prefixes. Remove prefixes for this IN.
             */
            recalcSuffixes(null, key, data, idx);
            return true;

        } else {
            INKeyRep.Type oldRepType = entryKeys.getType();
            entryKeys = entryKeys.set(idx, key, data, this);
            return oldRepType != entryKeys.getType();
        }
    }

    /*
     * Given 2 byte arrays, "prefix" and "key", where "prefix" is or stores
     * a prefix of "key", allocate and return another byte array that stores
     * the suffix of "key" w.r.t. "prefix".
     */
    private static byte[] computeKeySuffix(byte[] prefix, byte[] key) {

        int prefixLen = (prefix == null ? 0 : prefix.length);

        if (prefixLen == 0) {
            return key;
        }

        int suffixLen = key.length - prefixLen;
        byte[] ret = new byte[suffixLen];
        System.arraycopy(key, prefixLen, ret, 0, suffixLen);
        return ret;
    }

    /*
     * Iterate over all keys in this IN and recalculate their suffixes based on
     * newPrefix.  If keyVal and idx are supplied, it means that entry[idx] is
     * about to be changed to keyVal so use that instead of
     * entryKeys.get(idx) when computing the new suffixes. If idx is < 0,
     * and keyVal is null, then recalculate suffixes for all entries in this.
     */
    private void recalcSuffixes(
        byte[] newPrefix,
        byte[] key,
        byte[] data,
        int idx) {

        for (int i = 0; i < nEntries; i++) {

            byte[] curKey = (i == idx ? key : getKey(i));

            byte[] curData = null;

            if (i == idx) {
                curData = data;
            } else if (haveEmbeddedData(i)) {
                curData = entryKeys.getData(i);
            }

            byte[] suffix = computeKeySuffix(newPrefix, curKey);

            entryKeys = entryKeys.set(i, suffix, curData, this);
        }

        setKeyPrefix(newPrefix);
    }

    /**
     * Forces computation of the key prefix, without requiring a split.
     * Is public for use by DbCacheSize.
     */
    public final void recalcKeyPrefix() {

        assert(!isBINDelta());

        recalcSuffixes(computeKeyPrefix(-1), null, null, -1);
    }

    /*
     * Computes a key prefix based on all the keys in 'this'.  Return null if
     * the IN is empty or prefixing is not enabled or there is no common
     * prefix for the keys.
     */
    private byte[] computeKeyPrefix(int excludeIdx) {

        if (!databaseImpl.getKeyPrefixing() || nEntries <= 1) {
            return null;
        }

        int firstIdx = (excludeIdx == 0) ? 1 : 0;
        byte[] curPrefixKey = getKey(firstIdx);
        int prefixLen = curPrefixKey.length;

        /*
         * Only need to look at first and last entries when keys are ordered
         * byte-by-byte.  But when there is a comparator, keys are not
         * necessarily ordered byte-by-byte.  [#21328]
         */
        boolean byteOrdered;
        if (true) {
            /* Disable optimization for now.  Needs testing. */
            byteOrdered = false;
        } else {
            byteOrdered = (databaseImpl.getKeyComparator() == null);
        }

        if (byteOrdered) {
            int lastIdx = nEntries - 1;
            if (lastIdx == excludeIdx) {
                lastIdx -= 1;
            }
            if (lastIdx > firstIdx) {
                byte[] lastKey = getKey(lastIdx);
                int newPrefixLen = Key.getKeyPrefixLength(
                    curPrefixKey, prefixLen, lastKey);

                if (newPrefixLen < prefixLen) {
                    curPrefixKey = lastKey;
                    prefixLen = newPrefixLen;
                }
            }
        } else {
            for (int i = firstIdx + 1; i < nEntries; i++) {

                if (i == excludeIdx) {
                    continue;
                }

                byte[] curKey = getKey(i);

                int newPrefixLen = Key.getKeyPrefixLength(
                    curPrefixKey, prefixLen, curKey);

                if (newPrefixLen < prefixLen) {
                    curPrefixKey = curKey;
                    prefixLen = newPrefixLen;
                }
            }
        }

        byte[] ret = new byte[prefixLen];
        System.arraycopy(curPrefixKey, 0, ret, 0, prefixLen);

        return ret;
    }

    /*
     * For debugging.
     */
    final boolean verifyKeyPrefix() {

        byte[] computedKeyPrefix = computeKeyPrefix(-1);
        if (keyPrefix == null) {
            return computedKeyPrefix == null;
        }

        if (computedKeyPrefix == null ||
            computedKeyPrefix.length < keyPrefix.length) {
            System.out.println("VerifyKeyPrefix failed");
            System.out.println(dumpString(0, false));
            return false;
        }
        for (int i = 0; i < keyPrefix.length; i++) {
            if (keyPrefix[i] != computedKeyPrefix[i]) {
                System.out.println("VerifyKeyPrefix failed");
                System.out.println(dumpString(0, false));
                return false;
            }
        }
        return true;
    }

    /**
     * Returns whether the given key is greater than or equal to the first key
     * in the IN and less than or equal to the last key in the IN.  This method
     * is used to determine whether a key to be inserted belongs in this IN,
     * without doing a tree search.  If false is returned it is still possible
     * that the key belongs in this IN, but a tree search must be performed to
     * find out.
     */
    public final boolean isKeyInBounds(byte[] key) {

        assert(!isBINDelta());

        if (nEntries < 2) {
            return false;
        }

        Comparator comparator = getKeyComparator();
        int cmp;

        /* Compare key given to my first key. */
        cmp = entryKeys.compareKeys(
            key, keyPrefix, 0, haveEmbeddedData(0), comparator);

        if (cmp < 0) {
            return false;
        }

        /* Compare key given to my last key. */
        int idx =  nEntries - 1;
        cmp = entryKeys.compareKeys(
            key, keyPrefix, idx, haveEmbeddedData(idx), comparator);

        return cmp <= 0;
    }

    /**
     * Return the idx'th LSN for this entry.
     *
     * @return the idx'th LSN for this entry.
     */
    public final long getLsn(int idx) {

        if (entryLsnLongArray == null) {
            int offset = idx << 2;
            int fileOffset = getFileOffset(offset);
            if (fileOffset == -1) {
                return DbLsn.NULL_LSN;
            } else {
                return DbLsn.makeLsn((baseFileNumber +
                                      getFileNumberOffset(offset)),
                                     fileOffset);
            }
        } else {
            return entryLsnLongArray[idx];
        }
    }

    /**
     * Set the LSN of the idx'th slot, mark the slot dirty, and update
     * memory consuption. Throw exception if the update is not legitimate.
     */
    public void setLsn(int idx, long lsn) {
        setLsn(idx, lsn, true);
    }

    /**
     * Set the LSN of the idx'th slot, mark the slot dirty, and update
     * memory consuption. If "check" is true, throw exception if the
     * update is not legitimate.
     */
    private void setLsn(int idx, long lsn, boolean check) {

        if (!check || shouldUpdateLsn(getLsn(idx), lsn)) {

            int oldSize = computeLsnOverhead();

            /* setLsnInternal can mutate to an array of longs. */
            setLsnInternal(idx, lsn);

            updateMemorySize(computeLsnOverhead() - oldSize);
            entryStates[idx] |= EntryStates.DIRTY_BIT;
            setDirty(true);
        }
    }

    /*
     * Set the LSN of the idx'th slot. If the current storage for LSNs is the
     * compact one, mutate it to the the non-compact, if necessary.
     */
    final void setLsnInternal(int idx, long value) {

        /* Will implement this in the future. Note, don't adjust if mutating.*/
        //maybeAdjustCapacity(offset);
        if (entryLsnLongArray != null) {
            entryLsnLongArray[idx] = value;
            return;
        }

        int offset = idx << 2;

        if (value == DbLsn.NULL_LSN) {
            setFileNumberOffset(offset, (byte) 0);
            setFileOffset(offset, -1);
            return;
        }

        long thisFileNumber = DbLsn.getFileNumber(value);

        if (baseFileNumber == -1) {
            /* First entry. */
            baseFileNumber = thisFileNumber;
            setFileNumberOffset(offset, (byte) 0);

        } else {

            if (thisFileNumber < baseFileNumber) {
                if (!adjustFileNumbers(thisFileNumber)) {
                    mutateToLongArray(idx, value);
                    return;
                }
                baseFileNumber = thisFileNumber;
            }

            long fileNumberDifference = thisFileNumber - baseFileNumber;
            if (fileNumberDifference > Byte.MAX_VALUE) {
                mutateToLongArray(idx, value);
                return;
            }

            setFileNumberOffset(
                offset, (byte) (thisFileNumber - baseFileNumber));
            //assert getFileNumberOffset(offset) >= 0;
        }

        int fileOffset = (int) DbLsn.getFileOffset(value);
        if (fileOffset > MAX_FILE_OFFSET) {
            mutateToLongArray(idx, value);
            return;
        }

        setFileOffset(offset, fileOffset);
        //assert getLsn(offset) == value;
    }

    private boolean adjustFileNumbers(long newBaseFileNumber) {

        long oldBaseFileNumber = baseFileNumber;
        for (int i = 0;
             i < entryLsnByteArray.length;
             i += BYTES_PER_LSN_ENTRY) {
            if (getFileOffset(i) == -1) {
                continue;
            }

            long curEntryFileNumber =
                oldBaseFileNumber + getFileNumberOffset(i);
            long newCurEntryFileNumberOffset =
                (curEntryFileNumber - newBaseFileNumber);

            if (newCurEntryFileNumberOffset > Byte.MAX_VALUE) {
                long undoOffset = oldBaseFileNumber - newBaseFileNumber;
                for (int j = i - BYTES_PER_LSN_ENTRY;
                     j >= 0;
                     j -= BYTES_PER_LSN_ENTRY) {
                    if (getFileOffset(j) == -1) {
                        continue;
                    }
                    setFileNumberOffset
                        (j, (byte) (getFileNumberOffset(j) - undoOffset));
                    //assert getFileNumberOffset(j) >= 0;
                }
                return false;
            }
            setFileNumberOffset(i, (byte) newCurEntryFileNumberOffset);

            //assert getFileNumberOffset(i) >= 0;
        }
        return true;
    }

    private void setFileNumberOffset(int offset, byte fileNumberOffset) {
        entryLsnByteArray[offset] = fileNumberOffset;
    }

    private byte getFileNumberOffset(int offset) {
        return entryLsnByteArray[offset];
    }

    private void setFileOffset(int offset, int fileOffset) {
        put3ByteInt(offset + 1, fileOffset);
    }

    private int getFileOffset(int offset) {
        return get3ByteInt(offset + 1);
    }

    private void put3ByteInt(int offset, int value) {
        entryLsnByteArray[offset++] = (byte) value;
        entryLsnByteArray[offset++] = (byte) (value >>> 8);
        entryLsnByteArray[offset]   = (byte) (value >>> 16);
    }

    private int get3ByteInt(int offset) {
        int ret = (entryLsnByteArray[offset++] & 0xFF);
        ret += (entryLsnByteArray[offset++] & 0xFF) << 8;
        ret += (entryLsnByteArray[offset]   & 0xFF) << 16;
        if (ret == THREE_BYTE_NEGATIVE_ONE) {
            ret = -1;
        }

        return ret;
    }

    private void mutateToLongArray(int idx, long value) {
        int nElts = entryLsnByteArray.length >> 2;
        long[] newArr = new long[nElts];
        for (int i = 0; i < nElts; i++) {
            newArr[i] = getLsn(i);
        }
        newArr[idx] = value;
        entryLsnLongArray = newArr;
        entryLsnByteArray = null;
    }

    /**
     * For a deferred write database, ensure that information is not lost when
     * a new LSN is assigned.  Also ensures that a transient LSN is not
     * accidentally assigned to a persistent entry.
     *
     * Because this method uses strict checking, prepareForSlotReuse must
     * sometimes be called when a new logical entry is being placed in a slot,
     * e.g., during an IN split or an LN slot reuse.
     *
     * The following transition is a NOOP and the LSN is not set:
     *   Any LSN to same value.
     * The following transitions are allowed and cause the LSN to be set:
     *   Null LSN to transient LSN
     *   Null LSN to persistent LSN
     *   Transient LSN to persistent LSN
     *   Persistent LSN to new persistent LSN
     * The following transitions should not occur and throw an exception:
     *   Transient LSN to null LSN
     *   Transient LSN to new transient LSN
     *   Persistent LSN to null LSN
     *   Persistent LSN to transient LSN
     *
     * The above imply that a transient or null LSN can overwrite only a null
     * LSN.
     */
    private final boolean shouldUpdateLsn(long oldLsn, long newLsn) {

        /* Save a little computation in packing/updating an unchanged LSN. */
        if (oldLsn == newLsn) {
            return false;
        }
        /* The rules for a new null LSN can be broken in a read-only env. */
        if (newLsn == DbLsn.NULL_LSN && getEnv().isReadOnly()) {
            return true;
        }
        /* Enforce LSN update rules.  Assume lsn != oldLsn. */
        if (databaseImpl.isDeferredWriteMode()) {
            if (oldLsn != DbLsn.NULL_LSN && DbLsn.isTransientOrNull(newLsn)) {
                throw unexpectedState(
                    "DeferredWrite LSN update not allowed" +
                    " oldLsn = " + DbLsn.getNoFormatString(oldLsn) +
                    " newLsn = " + DbLsn.getNoFormatString(newLsn));
            }
        } else {
            if (DbLsn.isTransientOrNull(newLsn)) {
                throw unexpectedState(
                    "LSN update not allowed" +
                    " oldLsn = " + DbLsn.getNoFormatString(oldLsn) +
                    " newLsn = " + DbLsn.getNoFormatString(newLsn));
            }
        }
        return true;
    }

    /* For unit tests. */
    final long[] getEntryLsnLongArray() {
        return entryLsnLongArray;
    }

    /* For unit tests. */
    final byte[] getEntryLsnByteArray() {
        return entryLsnByteArray;
    }

    /* For unit tests. */
    final void initEntryLsn(int capacity) {
        entryLsnLongArray = null;
        entryLsnByteArray = new byte[capacity << 2];
        baseFileNumber = -1;
    }

    /* Will implement this in the future. Note, don't adjust if mutating.*/
    /***
      private void maybeAdjustCapacity(int offset) {
      if (entryLsnLongArray == null) {
      int bytesNeeded = offset + BYTES_PER_LSN_ENTRY;
      int currentBytes = entryLsnByteArray.length;
      if (currentBytes < bytesNeeded) {
      int newBytes = bytesNeeded +
      (GROWTH_INCREMENT * BYTES_PER_LSN_ENTRY);
      byte[] newArr = new byte[newBytes];
      System.arraycopy(entryLsnByteArray, 0, newArr, 0,
      currentBytes);
      entryLsnByteArray = newArr;
      for (int i = currentBytes;
      i < newBytes;
      i += BYTES_PER_LSN_ENTRY) {
      setFileNumberOffset(i, (byte) 0);
      setFileOffset(i, -1);
      }
      }
      } else {
      int currentEntries = entryLsnLongArray.length;
      int idx = offset >> 2;
      if (currentEntries < idx + 1) {
      int newEntries = idx + GROWTH_INCREMENT;
      long[] newArr = new long[newEntries];
      System.arraycopy(entryLsnLongArray, 0, newArr, 0,
      currentEntries);
      entryLsnLongArray = newArr;
      for (int i = currentEntries; i < newEntries; i++) {
      entryLsnLongArray[i] = DbLsn.NULL_LSN;
      }
      }
      }
      }
     ***/

    /**
     * The last logged size is not stored for UINs.
     */
    boolean isLastLoggedSizeStored(int idx) {
        return false;
    }

    boolean mayHaveLastLoggedSizeStored() {
        return false;
    }

    /**
     * The last logged size is not stored for UINs.
     */
    public void setLastLoggedSize(int idx, int lastLoggedSize) {
    }

    /**
     * The last logged size is not stored for UINs.
     */
    public void clearLastLoggedSize(int idx) {
    }

    /**
     * The last logged size is not stored for UINs.
     */
    void setLastLoggedSizeUnconditional(int idx, int lastLoggedSize) {
    }

    /**
     * The last logged size is not stored for UINs.
     */
    public int getLastLoggedSize(int idx) {
        return 0;
    }

    public void setOffHeapBINId(int idx,
                                int val,
                                boolean pri2,
                                boolean dirty) {

        assert getNormalizedLevel() == 2;
        assert val >= 0;

        setOffHeapBINPri2(idx, pri2);
        setOffHeapBINDirty(idx, dirty);

        final long newVal = val + 1;
        final long oldVal = offHeapBINIds.get(idx);

        if (oldVal == newVal) {
            return;
        }

        assert oldVal == 0;

        offHeapBINIds = offHeapBINIds.set(idx, newVal, this);
    }

    public void clearOffHeapBINId(int idx) {

        assert getNormalizedLevel() == 2;

        setOffHeapBINPri2(idx, false);
        setOffHeapBINDirty(idx, false);

        final long oldVal = offHeapBINIds.get(idx);

        if (oldVal == 0) {
            return;
        }

        offHeapBINIds = offHeapBINIds.set(idx, 0, this);

        if (getInListResident() &&
            getNormalizedLevel() == 2 &&
            offHeapBINIds.isEmpty()) {

            getEvictor().moveToPri1LRU(this);
        }
    }

    public int getOffHeapBINId(int idx) {
        return ((int) offHeapBINIds.get(idx)) - 1;
    }

    public boolean hasOffHeapBINIds() {
        return !offHeapBINIds.isEmpty();
    }

    public long getOffHeapBINIdsMemorySize() {
        return offHeapBINIds.getMemorySize();
    }

    private void setOffHeapBINDirty(int idx, boolean val) {
        if (val) {
            entryStates[idx] |= EntryStates.OFFHEAP_DIRTY_BIT;
        } else {
            entryStates[idx] &= EntryStates.CLEAR_OFFHEAP_DIRTY_BIT;
        }
    }

    public boolean isOffHeapBINDirty(int idx) {
        return (entryStates[idx] & EntryStates.OFFHEAP_DIRTY_BIT) != 0;
    }

    private void setOffHeapBINPri2(int idx, boolean val) {
        if (val) {
            entryStates[idx] |= EntryStates.OFFHEAP_PRI2_BIT;
        } else {
            entryStates[idx] &= EntryStates.CLEAR_OFFHEAP_PRI2_BIT;
        }
    }

    public boolean isOffHeapBINPri2(int idx) {
        return (entryStates[idx] & EntryStates.OFFHEAP_PRI2_BIT) != 0;
    }

    public final INTargetRep getTargets() {
        return entryTargets;
    }

    /**
     * Sets the idx'th target. No need to make dirty, that state only applies
     * to key and LSN.
     *
     * 

WARNING: This method does not update the memory budget. The caller * must update the budget.

*/ void setTarget(int idx, Node target) { assert isLatchExclusiveOwner() : "Not latched for write " + getClass().getName() + " id=" + getNodeId(); final Node curChild = entryTargets.get(idx); entryTargets = entryTargets.set(idx, target, this); if (target != null && target.isIN()) { ((IN) target).setParent(this); } if (isUpperIN()) { if (target == null) { /* * If this UIN has just lost its last cached child, set its * hasCachedChildren flag to false and put it back to the * LRU list. */ if (curChild != null && hasCachedChildrenFlag() && !hasCachedChildren()) { setHasCachedChildrenFlag(false); if (!isDIN()) { if (traceLRU) { LoggerUtils.envLogMsg( traceLevel, getEnv(), Thread.currentThread().getId() + "-" + Thread.currentThread().getName() + "-" + getEnv().getName() + " setTarget(): " + " Adding UIN " + getNodeId() + " to LRU after detaching chld " + ((IN) curChild).getNodeId()); } getEvictor().addBack(this); } } } else { if (curChild == null && !hasCachedChildrenFlag()) { setHasCachedChildrenFlag(true); if (traceLRU) { LoggerUtils.envLogMsg( traceLevel, getEnv(), Thread.currentThread().getId() + "-" + Thread.currentThread().getName() + "-" + getEnv().getName() + " setTarget(): " + " Removing UIN " + getNodeId() + " after attaching child " + ((IN) target).getNodeId()); } getEvictor().remove(this); } } } } /** * Return the idx'th target. * * This method does not load children from off-heap cache, so it always * returns null when then child is not in main cache. Note that when * children are INs (this is not a BIN), when this method returns null it * is does not imply that the child is non-dirty, because dirty BINs are * stored off-heap. To fetch the current version from off-heap cache in * that case, call loadIN instead. */ public final Node getTarget(int idx) { return entryTargets.get(idx); } /** * Returns the idx-th child of "this" upper IN, fetching the child from * the log and attaching it to its parent if it is not already attached. * This method is used during tree searches. * * On entry, the parent must be latched already. * * If the child must be fetched from the log, the parent is unlatched. * After the disk read is done, the parent is relatched. However, due to * splits, it may not be the correct parent anymore. If so, the method * will return null, and the caller is expected to restart the tree search. * * On return, the parent will be latched, unless null is returned or an * exception is thrown. * * The "searchKey" param is the key that the caller is looking for. It is * used by this method in determining if, after a disk read, "this" is * still the correct parent for the child. "searchKey" may be null if the * caller is doing a LEFT or RIGHT search. */ final IN fetchINWithNoLatch(int idx, byte [] searchKey) { return fetchINWithNoLatch(idx, searchKey, null); } /** * This variant of fetchIN() takes a SearchResult as a param, instead of * an idx (it is used by Tree.getParentINForChildIN()). The ordinal number * of the child to fetch is specified by result.index. result.index will * be adjusted by this method if, after a disk read, the ordinal number * of the child changes due to insertions, deletions or splits in the * parent. */ final IN fetchINWithNoLatch(SearchResult result, byte [] searchKey) { return fetchINWithNoLatch(result.index, searchKey, result); } /** * Provides the implementation of the above two methods. */ private IN fetchINWithNoLatch( int idx, byte [] searchKey, SearchResult result) { assert(isUpperIN()); assert(isLatchOwner()); final EnvironmentImpl envImpl = getEnv(); final OffHeapCache ohCache = envImpl.getOffHeapCache(); boolean isMiss = false; boolean success = false; IN child = (IN)entryTargets.get(idx); if (child == null) { final long lsn = getLsn(idx); if (lsn == DbLsn.NULL_LSN) { throw unexpectedState(makeFetchErrorMsg( "NULL_LSN in upper IN", lsn, idx)); } /* * For safety we must get a copy of the BIN off-heap bytes while * latched, but we can materialize the bytes while unlatched * (further below) to reduce the work done while latched. */ byte[] ohBytes = null; if (getNormalizedLevel() == 2) { ohBytes = ohCache.getBINBytes(this, idx); } pin(); try { releaseLatch(); TestHookExecute.doHookIfSet(fetchINHook); if (ohBytes != null) { child = ohCache.materializeBIN(envImpl, ohBytes); } else { final WholeEntry wholeEntry = envImpl.getLogManager(). getLogEntryAllowInvisibleAtRecovery( lsn, getLastLoggedSize(idx)); final LogEntry logEntry = wholeEntry.getEntry(); child = (IN) logEntry.getResolvedItem(databaseImpl); isMiss = true; } latch(CacheMode.UNCHANGED); /* * The following if statement relies on splits being logged * immediately, or more precisely, the split node and its * new sibling being logged immediately, while both siblings * and their parent are latched exclusively. The reason for * this is as follows: * * Let K be the search key. If we are doing a left-deep or * right-deep search, K is -INF or +INF respectively. * * Let P be the parent IN (i.e., "this") and S be the slot at * the idx position before P was unlatched above. Here, we * view slots as independent objects, not identified by their * position in an IN but by some unique (imaginary) and * immutable id assigned to the slot when it is first inserted * into an IN. * * Before unlatching P, S was the correct slot to follow down * the tree looking for K. After P is unlatched and then * relatched, let S' be the slot at the idx position, if P * still has an idx position. We consider the following 2 cases: * * 1. S' exists and S'.LSN == S.LSN. Then S and S' are the same * (logical) slot (because two different slots can never cover * overlapping ranges of keys, and as a result, can never point * to the same LSN). Then, S is still the correct slot to take * down the tree, unless the range of keys covered by S has * shrunk while P was unlatched. But the only way for S's key * range to shrink is for its child IN to split, which could * not have happened because if it did, the before and after * LSNs of S would be different, given that splits are logged * immediately. We conclude that the set of keys covered by * S after P is relatched is the same or a superset of the keys * covered by S before P was unlatched, and thus S (at the idx * position) is still the correct slot to follow. * * 2. There is no idx position in P or S'.LSN != S.LSN. In * this case we cannot be sure if S' (if it exists) is the * the correct slot to follow. So, we (re)search for K in P * to check if P is still the correct parent and find the * correct slot to follow. If this search lands on the 1st or * last slot in P, we may return null because using the key * info contained in P only, we do not know the full range of * keys covered by those two slots. If null is returned, the * caller is expected to restart the tree search from the root. * * Notice that the if conditions are necessary before calling * findEntry(). Without them, we could get into an infinite * loop of search re-tries in the scenario where nothing changes * in the tree and findEntry always lands on the 1st or last * slot in P. The conditions guarantee that we may restart the * tree search only if something changes with S while P is * unlatched (S moves to a different position or a different * IN or it points to a different LSN). * * Notice also that if P does not split while it is unlatched, * the range of keys covered by P does not change either. This * implies that the correct slot to follow *must* be inside P, * and as a result, the 1st and last slots in P can be trusted. * Unfortunately, however, we have no way to detecting reliably * whether P splits or not. * * Special care for DBs in DW mode: * * For DBs in DW mode, special care must be taken because * splits are not immediately logged. So, for DW DBs, to avoid * a call to findEntry() we require that not only S'.LSN == * S.LSN, but also the the child is not cached. These 2 * conditions together guarantee that the child did not split * while P was unlatched, because if the child did split, it * was fetched and cached first, so after P is relatched, * either the child would be still cached, or if it was evicted * after the split, S.LSN would have changed. */ if (idx >= nEntries || getLsn(idx) != lsn || (databaseImpl.isDeferredWriteMode() && entryTargets.get(idx) != null)) { if (searchKey == null) { return null; } idx = findEntry(searchKey, false, false); if ((idx == 0 || idx == nEntries - 1) && !isKeyInBounds(searchKey)) { return null; } } if (result != null) { result.index = idx; } /* * "this" is still the correct parent and "idx" points to the * correct slot to follow for the search down the tree. But * what we fetched from the log may be out-of-date by now * (because it was fetched and then updated by other threads) * or it may not be the correct child anymore ("idx" was * changed by the findEntry() call above). We check 5 cases: * * (a) There is already a cached child at the "idx" position. * In this case, we return whatever is there because it has to * be the most recent version of the appropriate child node. * This is true even when a split or reverse split occurred. * The check for isKeyInBounds above is critical in that case. * * (b) There is no cached child at the "idx" slot, but the slot * LSN is not the same as the LSN we read from the log. This is * the case if "idx" was changed by findEntry() or other * threads fetched the same child as this thread, updated it, * and then evicted it. The child we fetched is obsolete and * should not be attached. For simplicity, we just return null * in this (quite rare) case. * * (c) We loaded the BIN from off-heap cache and, similar to * case (b), another thread has loaded the same child, modified * it, and then evicted it, placing it off-heap again. It's LSN * did not change because it wasn't logged. We determine * whether the off-heap BIN has changed, and if so then * null is returned. This is also rare. * * (d) The child was loaded from disk (not off-heap cache) but * an off-heap cache entry for this BIN has appeared. Another * thread loaded the BIN from disk and then it was moved * off-heap, possibly after it was modified. We should use the * off-heap version and for simplicity we return null. This is * also rare. * * (e) Otherwise, we attach the fetched/loaded child to the * parent. */ if (entryTargets.get(idx) != null) { child = (IN) entryTargets.get(idx); } else if (getLsn(idx) != lsn) { return null; } else if (ohBytes != null && ohCache.haveBINBytesChanged(this, idx, ohBytes)) { return null; } else if (ohBytes == null && getOffHeapBINId(idx) >= 0) { return null; } else { child.latchNoUpdateLRU(databaseImpl); if (ohBytes != null) { child.postLoadInit(this, idx); } else { child.postFetchInit(databaseImpl, lsn); } attachNode(idx, child, null); child.releaseLatch(); } success = true; } catch (FileNotFoundException e) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, makeFetchErrorMsg(null, lsn, idx), e); } catch (ErasedException e) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_CHECKSUM, "IN is erased unexpectedly, implied corruption. " + makeFetchErrorMsg(null, lsn, idx), e); } catch (EnvironmentFailureException e) { e.addErrorMessage(makeFetchErrorMsg(null, lsn, idx)); throw e; } catch (RuntimeException e) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_INTEGRITY, makeFetchErrorMsg(e.toString(), lsn, idx), e); } finally { /* * Release the parent latch if null is being returned. In this * case, the parent was unlatched earlier during the disk read, * and as a result, the caller cannot make any assumptions * about the state of the parent. * * If we are returning or throwing out of this try block, the * parent may or may not be latched. So, only release the latch * if it is currently held. */ if (!success) { if (child != null) { child.incFetchStats(envImpl, isMiss); } releaseLatchIfOwner(); } unpin(); } } assert(hasCachedChildren() == hasCachedChildrenFlag()); child.incFetchStats(envImpl, isMiss); return child; } /** * Returns the idx-th child of "this" upper IN, fetching the child from * the log and attaching it to its parent if it is not already attached. * * On entry, the parent must be EX-latched already and it stays EX-latched * for the duration of this method and on return (even in case of * exceptions). * * @param idx The slot of the child to fetch. */ public IN fetchIN(int idx, CacheMode cacheMode) { assert(isUpperIN()); if (!isLatchExclusiveOwner()) { throw unexpectedState("EX-latch not held before fetch"); } final EnvironmentImpl envImpl = getEnv(); final OffHeapCache ohCache = envImpl.getOffHeapCache(); boolean isMiss = false; IN child = (IN) entryTargets.get(idx); if (child == null) { final long lsn = getLsn(idx); if (lsn == DbLsn.NULL_LSN) { throw unexpectedState( makeFetchErrorMsg("NULL_LSN in upper IN", lsn, idx)); } try { byte[] ohBytes = null; if (getNormalizedLevel() == 2) { ohBytes = ohCache.getBINBytes(this, idx); if (ohBytes != null) { child = ohCache.materializeBIN(envImpl, ohBytes); } } if (child == null) { final WholeEntry wholeEntry = envImpl.getLogManager(). getLogEntryAllowInvisibleAtRecovery( lsn, getLastLoggedSize(idx)); final LogEntry logEntry = wholeEntry.getEntry(); child = (IN) logEntry.getResolvedItem(databaseImpl); isMiss = true; } child.latchNoUpdateLRU(databaseImpl); if (ohBytes != null) { child.postLoadInit(this, idx); } else { child.postFetchInit(databaseImpl, lsn); } attachNode(idx, child, null); child.releaseLatch(); } catch (FileNotFoundException e) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, makeFetchErrorMsg(null, lsn, idx), e); } catch (ErasedException e) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_CHECKSUM, "IN is erased unexpectedly, implied corruption. " + makeFetchErrorMsg(null, lsn, idx), e); } catch (EnvironmentFailureException e) { e.addErrorMessage(makeFetchErrorMsg(null, lsn, idx)); throw e; } catch (RuntimeException e) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_INTEGRITY, makeFetchErrorMsg(e.toString(), lsn, idx), e); } } assert(hasCachedChildren() == hasCachedChildrenFlag()); child.incFetchStats(envImpl, isMiss); return child; } /** * Returns the idx-th child of "this" upper IN, loading the child from * off-heap and attaching it to its parent if it is not already attached * and is cached off-heap. This method does not fetch from disk, and will * return null if the child is not in the main or off-heap cache. * * On entry, the parent must be EX-latched already and it stays EX-latched * for the duration of this method and on return (even in case of * exceptions). * * @param idx The slot of the child to fetch. * * @return null if the LN is not in the main or off-heap cache. */ public IN loadIN(int idx, CacheMode cacheMode) { assert(isUpperIN()); if (!isLatchExclusiveOwner()) { throw unexpectedState("EX-latch not held before load"); } IN child = (IN) entryTargets.get(idx); if (child != null) { return child; } if (getNormalizedLevel() != 2) { return null; } final EnvironmentImpl envImpl = getEnv(); final OffHeapCache ohCache = envImpl.getOffHeapCache(); final long lsn = getLsn(idx); try { final byte[] ohBytes = ohCache.getBINBytes(this, idx); if (ohBytes == null) { return null; } child = ohCache.materializeBIN(envImpl, ohBytes); child.latchNoUpdateLRU(databaseImpl); child.postLoadInit(this, idx); attachNode(idx, child, null); child.releaseLatch(); return child; } catch (RuntimeException e) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_INTEGRITY, makeFetchErrorMsg(e.toString(), lsn, idx), e); } } /** * Returns the target of the idx'th entry, fetching from disk if necessary. * * Null is returned in the following cases: * * 1. if the LSN is null and the KnownDeleted flag is set; or * 2. if the LSN's file has been cleaned and: * a. the PendingDeleted or KnownDeleted flag is set, or * b. the entry is "probably expired". * * Note that checking for PD/KD before calling this method is not * sufficient to ensure that null is not returned, because null is also * returned for expired records. * * When null is returned, the caller must treat the record as deleted. * * Note that null can only be returned for a slot that could contain an LN, * not other node types and not a DupCountLN since DupCountLNs are never * deleted or expired. * * An exclusive latch must be held on this BIN. * * @return the LN or null. */ public final LN fetchLN(int idx, CacheMode cacheMode) { return (LN) fetchLN(idx, cacheMode, false); } /* * This method may return either an LN or a DIN child of a BIN. It is meant * to be used from DupConvert only. */ public final Node fetchLNOrDIN(int idx, CacheMode cacheMode) { return fetchLN(idx, cacheMode, true); } /* * Underlying implementation of the above fetchLNXXX methods. */ private Node fetchLN(int idx, CacheMode cacheMode, boolean dupConvert) { assert(isBIN()); if (!isLatchExclusiveOwner()) { throw unexpectedState("EX-latch not held before fetch"); } if (isEntryKnownDeleted(idx)) { return null; } final BIN bin = (BIN) this; final EnvironmentImpl envImpl = getEnv(); final OffHeapCache ohCache = envImpl.getOffHeapCache(); boolean isMiss = false; Node child = entryTargets.get(idx); /* Fetch it from disk. */ if (child == null) { final long lsn = getLsn(idx); if (lsn == DbLsn.NULL_LSN) { throw unexpectedState(makeFetchErrorMsg( "NULL_LSN without KnownDeleted", lsn, idx)); } /* * Fetch of immediately obsolete LN not allowed. The only exception * is during conversion of an old-style dups DB. */ if (isEmbeddedLN(idx) || (databaseImpl.isLNImmediatelyObsolete() && !dupConvert)) { throw unexpectedState("May not fetch immediately obsolete LN"); } try { byte[] lnSlotKey = null; child = ohCache.loadLN(bin, idx, cacheMode); if (child == null) { final WholeEntry wholeEntry = envImpl.getLogManager(). getLogEntryAllowInvisibleAtRecovery( lsn, getLastLoggedSize(idx)); /* Last logged size is not present before log version 9. */ setLastLoggedSize( idx, wholeEntry.getHeader().getEntrySize()); final LogEntry logEntry = wholeEntry.getEntry(); if (logEntry instanceof LNLogEntry) { final LNLogEntry lnEntry = (LNLogEntry) wholeEntry.getEntry(); lnEntry.postFetchInit(databaseImpl); lnSlotKey = lnEntry.getKey(); if (cacheMode != CacheMode.EVICT_LN && cacheMode != CacheMode.EVICT_BIN && cacheMode != CacheMode.UNCHANGED && cacheMode != CacheMode.MAKE_COLD) { getEvictor().moveToPri1LRU(this); } } child = (Node) logEntry.getResolvedItem(databaseImpl); isMiss = true; } child.postFetchInit(databaseImpl, lsn); attachNode(idx, child, lnSlotKey); } catch (FileNotFoundException e) { /* * Cleaner got to the log file. Do not consider this an * integrity error if deleted, likely expired, EXTINCT or * MAYBE_EXTINCT. It is safe to ignore a deleted file for a * KD or PD entry because files with active txns will not be * cleaned. */ if (!bin.isDeleted(idx) && !bin.isProbablyExpired(idx) && envImpl.getExtinctionState( databaseImpl, bin.getKey(idx)) == NOT_EXTINCT) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND, makeFetchErrorMsg(null, lsn, idx), e); } return null; } catch (ErasedException e) { /* * Eraser got to the log file. Do not consider this an * integrity error if EXTINCT or MAYBE_EXTINCT. */ if (envImpl.getExtinctionState( databaseImpl, bin.getKey(idx)) == NOT_EXTINCT) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_CHECKSUM, "LN is erased unexpectedly, implied corruption. " + makeFetchErrorMsg(null, lsn, idx), e); } return null; } catch (EnvironmentFailureException e) { e.addErrorMessage(makeFetchErrorMsg(null, lsn, idx)); throw e; } catch (RuntimeException e) { throw new EnvironmentFailureException( envImpl, EnvironmentFailureReason.LOG_INTEGRITY, makeFetchErrorMsg(e.toString(), lsn, idx), e); } } if (child.isLN()) { final LN ln = (LN) child; if (cacheMode != CacheMode.UNCHANGED && cacheMode != CacheMode.MAKE_COLD) { ln.setFetchedCold(false); } ohCache.freeRedundantLN(bin, idx, ln, cacheMode); } child.incFetchStats(envImpl, isMiss); return child; } /** * Return the idx'th LN target, enforcing rules defined by the cache modes * for the LN. This method should be called instead of getTarget when a * cache mode applies to user operations such as reads and updates. */ public final LN getLN(int idx, CacheMode cacheMode) { assert isBIN(); final LN ln = (LN) entryTargets.get(idx); if (ln == null) { return null; } if (cacheMode != CacheMode.UNCHANGED && cacheMode != CacheMode.MAKE_COLD) { ln.setFetchedCold(false); } final OffHeapCache ohCache = getOffHeapCache(); if (ohCache.isEnabled()) { ohCache.freeRedundantLN((BIN) this, idx, ln, cacheMode); } return ln; } /** * Initialize a node that has been read in from the log. */ @Override public final void postFetchInit(DatabaseImpl db, long fetchedLsn) { assert isLatchExclusiveOwner(); commonInit(db); setLastLoggedLsn(fetchedLsn); convertDupKeys(); // must be after initMemorySize addToMainCache(); if (isBIN()) { setFetchedCold(true); } /* See Database.mutateDeferredWriteBINDeltas. */ if (db.isDeferredWriteMode()) { mutateToFullBIN(false); } } /** * Initialize a BIN loaded from off-heap cache. * * Does not call setLastLoggedLsn because materialization of the off-heap * BIN initializes all fields including the last logged/delta LSNs. */ private void postLoadInit(IN parent, int idx) { assert isLatchExclusiveOwner(); commonInit(parent.databaseImpl); addToMainCache(); if (isBIN()) { setFetchedCold(true); setFetchedColdOffHeap(true); } getEnv().getOffHeapCache().postBINLoad(parent, idx, (BIN) this); } /** * Initialize a node read in during recovery. */ public final void postRecoveryInit(DatabaseImpl db, long lastLoggedLsn) { commonInit(db); setLastLoggedLsn(lastLoggedLsn); } /** * Common actions of postFetchInit, postLoadInit and postRecoveryInit. */ private void commonInit(DatabaseImpl db) { setDatabase(db); initMemorySize(); // compute before adding to IN list } /** * Add to INList and perform eviction related initialization. */ private void addToMainCache() { getEnv().getInMemoryINs().add(this); if (!isDIN() && !isDBIN()) { if (isUpperIN() && traceLRU) { LoggerUtils.envLogMsg( traceLevel, getEnv(), Thread.currentThread().getId() + "-" + Thread.currentThread().getName() + "-" + getEnv().getName() + " postFetchInit(): " + " Adding UIN to LRU: " + getNodeId()); } getEvictor().addBack(this); } /* Compress full BINs after fetching or loading. */ if (!(this instanceof DBIN || this instanceof DIN)) { getEnv().lazyCompress(this); } } /** * Needed only during duplicates conversion, not during normal operation. * The needDupKeyConversion field will only be true when first upgrading * from JE 4.1. After the first time an IN is converted, it will be * written out in a later file format, so the needDupKeyConversion field * will be false and this method will do nothing. See * DupConvert.convertInKeys. */ private void convertDupKeys() { /* Do not convert more than once. */ if (!needDupKeyConversion) { return; } needDupKeyConversion = false; DupConvert.convertInKeys(databaseImpl, this); } /** * @see Node#incFetchStats */ @Override final void incFetchStats(EnvironmentImpl envImpl, boolean isMiss) { Evictor e = envImpl.getEvictor(); if (isBIN()) { e.incBINFetchStats(isMiss, isBINDelta(false/*checLatched*/)); } else { e.incUINFetchStats(isMiss); } } public String makeFetchErrorMsg( final String msg, final long lsn, final int idx) { final byte state = idx >= 0 ? entryStates[idx] : 0; final long expirationTime; if (isBIN() && idx >= 0) { final BIN bin = (BIN) this; expirationTime = TTL.expirationToSystemTime( bin.getExpiration(idx), isExpirationInHours()); } else { expirationTime = 0; } return makeFetchErrorMsg(msg, this, lsn, state, expirationTime); } /** * @param in parent IN. Is null when root is fetched. */ static String makeFetchErrorMsg( String msg, IN in, long lsn, byte state, long expirationTime) { /* * Bolster the exception with the LSN, which is critical for * debugging. Otherwise, the exception propagates upward and loses the * problem LSN. */ StringBuilder sb = new StringBuilder(); if (in == null) { sb.append("fetchRoot of "); } else if (in.isBIN()) { sb.append("fetchLN of "); } else { sb.append("fetchIN of "); } if (lsn == DbLsn.NULL_LSN) { sb.append("null lsn"); } else { sb.append(DbLsn.getNoFormatString(lsn)); } if (in != null) { sb.append(" parent IN=").append(in.getNodeId()); sb.append(" IN class=").append(in.getClass().getName()); sb.append(" lastFullLsn="); sb.append(DbLsn.getNoFormatString(in.getLastFullLsn())); sb.append(" lastLoggedLsn="); sb.append(DbLsn.getNoFormatString(in.getLastLoggedLsn())); sb.append(" parent.getDirty()=").append(in.getDirty()); } sb.append(" state=").append(state); sb.append(" expires="); if (expirationTime != 0) { sb.append(TTL.formatExpirationTime(expirationTime)); } else { sb.append("never"); } if (msg != null) { sb.append(" ").append(msg); } return sb.toString(); } public final int findEntry( byte[] key, boolean indicateIfDuplicate, boolean exact) { return findEntry(key, indicateIfDuplicate, exact, null /*Comparator*/); } /** * Find the entry in this IN for which key is LTE the key arg. * * Currently uses a binary search, but eventually, this may use binary or * linear search depending on key size, number of entries, etc. * * This method guarantees that the key parameter, which is the user's key * parameter in user-initiated search operations, is always the left hand * parameter to the Comparator.compare method. This allows a comparator * to perform specialized searches, when passed down from upper layers. * * This is public so that DbCursorTest can access it. * * Note that the 0'th entry's key is treated specially in an IN. It always * compares lower than any other key. * * @param key - the key to search for. * @param indicateIfDuplicate - true if EXACT_MATCH should * be or'd onto the return value if key is already present in this node. * @param exact - true if an exact match must be found. * @return offset for the entry that has a key LTE the arg. 0 if key * is less than the 1st entry. -1 if exact is true and no exact match * is found. If indicateIfDuplicate is true and an exact match was found * then EXACT_MATCH is or'd onto the return value. */ public final int findEntry( byte[] key, boolean indicateIfDuplicate, boolean exact, Comparator comparator) { assert idKeyIsSlotKey(); int high = nEntries - 1; int low = 0; int middle = 0; if (comparator == null) { comparator = databaseImpl.getKeyComparator(); } /* * Special Treatment of 0th Entry * ------------------------------ * IN's are special in that they have a entry[0] where the key is a * virtual key in that it always compares lower than any other key. * BIN's don't treat key[0] specially. But if the caller asked for an * exact match or to indicate duplicates, then use the key[0] and * forget about the special entry zero comparison. * * We always use inexact searching to get down to the BIN, and then * call findEntry separately on the BIN if necessary. So the behavior * of findEntry is different for BINs and INs, because it's used in * different ways. * * Consider a tree where the lowest key is "b" and we want to insert * "a". If we did the comparison (with exact == false), we wouldn't * find the correct (i.e. the left) path down the tree. So the * virtual key ensures that "a" gets inserted down the left path. * * The insertion case is a good specific example. findBinForInsert * does inexact searching in the INs only, not the BIN. * * There's nothing special about the 0th key itself, only the use of * the 0th key in the comparison algorithm. */ boolean entryZeroSpecialCompare = isUpperIN() && !exact && !indicateIfDuplicate; assert nEntries >= 0; while (low <= high) { middle = (high + low) / 2; int s; if (middle == 0 && entryZeroSpecialCompare) { s = 1; } else { s = entryKeys.compareKeys( key, keyPrefix, middle, haveEmbeddedData(middle), comparator); } if (s < 0) { high = middle - 1; } else if (s > 0) { low = middle + 1; } else { int ret; if (indicateIfDuplicate) { ret = middle | EXACT_MATCH; } else { ret = middle; } if ((ret >= 0) && exact && isEntryKnownDeleted(ret & 0xffff)) { return -1; } else { return ret; } } } /* * No match found. Either return -1 if caller wanted exact matches * only, or return entry whose key is < search key. */ if (exact) { return -1; } else { return high; } } /** * Inserts a slot with the given key, lsn and child node into this IN, if * a slot with the same key does not exist already. The state of the new * slot is set to DIRTY. Assumes this node is already latched by the * caller. * * @return true if the entry was successfully inserted, false * if it was a duplicate. * * @throws EnvironmentFailureException if the node is full * (it should have been split earlier). */ public final boolean insertEntry( Node child, byte[] key, long childLsn) throws DatabaseException { assert(!isBINDelta()); int res = insertEntry1( child, key, null, childLsn, EntryStates.DIRTY_BIT, false); return (res & INSERT_SUCCESS) != 0; } /** * Inserts a slot with the given key, lsn and child node into this IN, if * a slot with the same key does not exist already. The state of the new * slot is set to DIRTY. Assumes this node is already latched by the * caller. * * @param data If the data portion of a record must be embedded in this * BIN, "data" stores the record's data. Null otherwise. See also comment * for the keyEntries field. * * @return either (1) the index of location in the IN where the entry was * inserted |'d with INSERT_SUCCESS, or (2) the index of the duplicate in * the IN if the entry was found to be a duplicate. * * @throws EnvironmentFailureException if the node is full (it should have * been split earlier). */ public final int insertEntry1( Node child, byte[] key, byte[] data, long childLsn, boolean blindInsertion) { return insertEntry1( child, key, data, childLsn, EntryStates.DIRTY_BIT, blindInsertion); } /** * Inserts a slot with the given key, lsn, state, and child node into this * IN, if a slot with the same key does not exist already. Assumes this * node is already latched by the caller. * * This returns a failure if there's a duplicate match. The caller must do * the processing to check if the entry is actually deleted and can be * overwritten. This is foisted upon the caller rather than handled in this * object because there may be some latch releasing/retaking in order to * check a child LN. * * @param data If the data portion of a record must be embedded in this * BIN, "data" stores the record's data. Null otherwise. See also comment * for the keyEntries field. * * @return either (1) the index of location in the IN where the entry was * inserted |'d with INSERT_SUCCESS, or (2) the index of the duplicate in * the IN if the entry was found to be a duplicate. * * @throws EnvironmentFailureException if the node is full (it should have * been split earlier). */ public final int insertEntry1( Node child, byte[] key, byte[] data, long childLsn, byte state, boolean blindInsertion) { /* * Search without requiring an exact match, but do let us know the * index of the match if there is one. */ int index = findEntry(key, true, false); if (index >= 0 && (index & EXACT_MATCH) != 0) { /* * There is an exact match. Don't insert; let the caller decide * what to do with this duplicate. */ return index & ~IN.EXACT_MATCH; } /* * There was no key match, but if this is a bin delta, there may be an * exact match in the full bin. Mutate to full bin and search again. * However, if we know for sure that the key does not exist in the full * BIN, then don't mutate, unless there is no space in the delta to do * the insertion. */ if (isBINDelta()) { BIN bin = (BIN)this; boolean doBlindInsertion = (nEntries < getMaxEntries()); if (doBlindInsertion && !blindInsertion && bin.mayHaveKeyInFullBin(key)) { doBlindInsertion = false; } if (!doBlindInsertion) { mutateToFullBIN(true /*leaveFreeSlot*/); index = findEntry(key, true, false); if (index >= 0 && (index & EXACT_MATCH) != 0) { return index & ~IN.EXACT_MATCH; } } else { getEvictor().incBinDeltaBlindOps(); if (traceDeltas) { LoggerUtils.envLogMsg( traceLevel, getEnv(), Thread.currentThread().getId() + "-" + Thread.currentThread().getName() + "-" + getEnv().getName() + (blindInsertion ? " Blind insertion in BIN-delta " : " Blind put in BIN-delta ") + getNodeId() + " nEntries = " + nEntries + " max entries = " + getMaxEntries() + " full BIN entries = " + bin.getFullBinNEntries() + " full BIN max entries = " + bin.getFullBinMaxEntries()); } } } if (nEntries >= getMaxEntries()) { throw unexpectedState( getEnv(), "Node " + getNodeId() + " should have been split before calling insertEntry" + " is BIN-delta: " + isBINDelta() + " num entries: " + nEntries + " max entries: " + getMaxEntries()); } /* There was no key match, so insert to the right of this entry. */ index++; /* We found a spot for insert, shift entries as needed. */ if (index < nEntries) { int oldSize = computeLsnOverhead(); /* Adding elements to the LSN array can change the space used. */ shiftEntriesRight(index); updateMemorySize(computeLsnOverhead() - oldSize); } else { nEntries++; } if (isBINDelta()) { ((BIN)this).incFullBinNEntries(); } int oldSize = computeLsnOverhead(); if (data == null || databaseImpl.isDeferredWriteMode()) { setTarget(index, child); } setLsnInternal(index, childLsn); boolean multiSlotChange = insertKey(index, key, data); /* * Do this after calling insert key to overwrite whatever state changes * were done by the insertEntry() call. */ entryStates[index] = state; if (data != null) { setEmbeddedLN(index); if (data.length == 0) { setNoDataLN(index); } } adjustCursorsForInsert(index); updateMemorySize(oldSize, getEntryInMemorySize(index) + computeLsnOverhead()); if (multiSlotChange) { updateMemorySize(inMemorySize, computeMemorySize()); } setDirty(true); assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag()); return (index | INSERT_SUCCESS); } /** * Removes the slot at index from this IN. Assumes this node is already * latched by the caller. * * @param index The index of the entry to delete from the IN. */ public void deleteEntry(int index) { deleteEntry(index, true /*makeDirty*/, true /*validate*/); } /** * Variant that allows specifying whether the IN is dirtied and whether * validation takes place. 'validate' should be false only in tests. * * See BIN.compress and INCompressor for a discussion about why slots can * be deleted without dirtying the BIN, and why the next delta is * prohibited when the slot is dirty. */ void deleteEntry(int index, boolean makeDirty, boolean validate) { assert !isBINDelta(); assert index >= 0 && index < nEntries; assert !validate || validateSubtreeBeforeDelete(index); if (makeDirty) { setDirty(true); } if (isDirty(index)) { setProhibitNextDelta(true); } Node child = getTarget(index); final OffHeapCache ohCache = getEnv().getOffHeapCache(); final int level = getNormalizedLevel(); if (level == 1) { ohCache.freeLN((BIN) this, index); } else if (level == 2) { ohCache.freeBIN((BIN) child, this, index); } if (child != null && child.isIN()) { IN childIN = (IN)child; getEnv().getInMemoryINs().remove(childIN); } updateMemorySize(getEntryInMemorySize(index), 0); int oldLSNArraySize = computeLsnOverhead(); /* * Do the actual deletion. Note: setTarget() must be called before * copyEntries() so that the hasCachedChildrenFlag will be properly * maintained. */ setTarget(index, null); copyEntries(index + 1, index, nEntries - index - 1); nEntries--; /* cleanup what used to be the last entry */ clearEntry(nEntries); /* setLsnInternal can mutate to an array of longs. */ updateMemorySize(oldLSNArraySize, computeLsnOverhead()); assert(isBIN() || hasCachedChildrenFlag() == hasCachedChildren()); /* * Note that we don't have to adjust cursors for delete, since * there should be nothing pointing at this record. */ traceDelete(Level.FINEST, index); } /** * WARNING: clearEntry() calls entryTargets.set() directly, instead of * setTarget(). As a result, the hasCachedChildren flag of the IN is not * updated here. The caller is responsible for updating this flag, if * needed. */ void clearEntry(int idx) { entryTargets = entryTargets.set(idx, null, this); entryKeys = entryKeys.set(idx, null, this); offHeapBINIds = offHeapBINIds.set(idx, 0, this); setLsnInternal(idx, DbLsn.NULL_LSN); entryStates[idx] = 0; } /** * This method is called after the idx'th child of this node gets logged, * and changes position as a result. * * @param newLSN The new on-disk position of the child. * * @param newVLSN The VLSN of the logrec at the new position. * For LN children only. * * @param newSize The size of the logrec at the new position. * For LN children only. */ public final void updateEntry( int idx, long newLSN, long newVLSN, int newSize) { setLsn(idx, newLSN); if (isBIN()) { if (isEmbeddedLN(idx)) { ((BIN)this).setCachedVLSN(idx, newVLSN); } else { setLastLoggedSize(idx, newSize); } } setDirty(true); } /** * This method is called only from BIN.applyDelta(). It applies the info * extracted from a delta slot to the corresponding slot in the full BIN. * * Unlike other update methods, the LSN may be NULL_LSN if the KD flag is * set. This allows applying a BIN-delta with a NULL_LSN and KD, for an * invisible log entry for example. * * No need to do memory counting in this method because the BIN is not * yet attached to the tree. */ final void applyDeltaSlot( int idx, Node node, long lsn, int lastLoggedSize, byte state, byte[] key, byte[] data) { assert(isBIN()); assert(!isBINDelta()); assert(lsn != DbLsn.NULL_LSN || (state & EntryStates.KNOWN_DELETED_BIT) != 0); assert(node == null || data == null); assert(!getInListResident()); ((BIN) this).freeOffHeapLN(idx); setLsn(idx, lsn, false/*check*/); setLastLoggedSize(idx, lastLoggedSize); setTarget(idx, node); updateLNSlotKey(idx, key, data); assert(isEmbeddedLN(idx) == isEmbeddedLN(state)); assert(isNoDataLN(idx) == isNoDataLN(state)); entryStates[idx] = state; setDirty(true); } /** * Update the idx slot of this BIN to reflect a record insertion in an * existing KD slot. It is called from CursorImpl.insertRecordInternal(), * after logging the insertion op. * * @param newLN The LN associated with the new record. * * @param newLSN The LSN of the insertion logrec. * * @param newSize The size of the insertion logrec. * * @param newKey The value for the record's key. It is equal to the current * key value in the slot, but may not be identical to that value if a * custom comparator is used. * * @param newData If the record's data must be embedded in this BIN, "data" * stores the record's data. Null otherwise. See also comment for the * keyEntries field. */ public final void insertRecord( int idx, LN newLN, long newLSN, int newSize, byte[] newKey, byte[] newData, int expiration, boolean expirationInHours) { assert(isBIN()); final BIN bin = (BIN) this; bin.freeOffHeapLN(idx); // old version of the LN is stale long oldSlotSize = getEntryInMemorySize(idx); setLsn(idx, newLSN); boolean multiSlotChange = updateLNSlotKey(idx, newKey, newData); if (isEmbeddedLN(idx)) { clearLastLoggedSize(idx); bin.setCachedVLSN(idx, newLN.getVLSNSequence()); if (databaseImpl.isDeferredWriteMode()) { setTarget(idx, newLN); } } else { setTarget(idx, newLN); setLastLoggedSize(idx, newSize); } bin.setExpiration(idx, expiration, expirationInHours); if (multiSlotChange) { updateMemorySize(inMemorySize, computeMemorySize()); } else { long newSlotSize = getEntryInMemorySize(idx); updateMemorySize(oldSlotSize, newSlotSize); } clearKnownDeleted(idx); clearPendingDeleted(idx); setDirty(true); assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag()); } /** * Update the idx slot of this BIN to reflect an update of the associated * record. It is called from CursorImpl.updateRecordInternal(), after * logging the update op. * * @param oldMemSize If the child LN was cached before the update op, it has * already been updated in-place by the caller. In this case, oldMemSize * stores the size of the child LN before the update, and it is used to do * memory counting. Otherwise oldMemSize is 0 and the newly created LN has * not been attached to the tree; it will be attached later by the caller, * if needed. * * @param newLSN The LSN of the update logrec. * * @param newVLSN The VLSN of the update logrec. * * @param newSize The on-disk size of the update logrec. * * @param newKey The new value for the record's key. It is equal to the * current value, but may not be identical to the current value if a * custom comparator is used. It may be null, if the caller knows for * sure that the key does not change. * * @param newData If the record's data must be embedded in this BIN, "data" * stores the record's data. Null otherwise. See also comment for the * keyEntries field. */ public final void updateRecord( int idx, long oldMemSize, long newLSN, long newVLSN, int newSize, byte[] newKey, byte[] newData, int expiration, boolean expirationInHours) { assert(isBIN()); final BIN bin = (BIN) this; bin.freeOffHeapLN(idx); // old version of the LN is stale long oldSlotSize = getEntryInMemorySize(idx); setLsn(idx, newLSN); boolean multiSlotChange = updateLNSlotKey(idx, newKey, newData); if (isEmbeddedLN(idx)) { clearLastLoggedSize(idx); ((BIN)this).setCachedVLSN(idx, newVLSN); } else { setLastLoggedSize(idx, newSize); } bin.setExpiration(idx, expiration, expirationInHours); if (multiSlotChange) { updateMemorySize(inMemorySize, computeMemorySize()); } else { /* Update mem size for key change. */ long newSlotSize = getEntryInMemorySize(idx); updateMemorySize(oldSlotSize, newSlotSize); /* Update mem size for node change. */ Node newLN = entryTargets.get(idx); long newMemSize = (newLN != null ? newLN.getMemorySizeIncludedByParent() : 0); updateMemorySize(oldMemSize, newMemSize); } setDirty(true); } /** * Update the idx slot slot of this BIN to reflect a deletion of the * associated record. It is called from CursorImpl.deleteCurrentRecord(), * after logging the deletion op. * * @param oldMemSize If the child LN was cached before the deletion, it * has already been updated in-place by the caller (the ln contents have * been deleted). In this case, oldMemSize stores the in-memory size of * the child LN before the update, and it is used to do memory counting. * Otherwise oldMemSize is 0 and the newly created LN has not been attached * to the tree; it will be attached later by the caller, if needed. * * @param newLSN The LSN of the deletion logrec. * * @param newVLSN The VLSN of the deletion logrec. * * @param newSize The on-disk size of the deletion logrec. */ public final void deleteRecord( int idx, long oldMemSize, long newLSN, long newVLSN, int newSize) { assert(isBIN()); final BIN bin = (BIN) this; bin.freeOffHeapLN(idx); // old version of the LN is stale setLsn(idx, newLSN); if (isEmbeddedLN(idx)) { clearLastLoggedSize(idx); bin.setCachedVLSN(idx, newVLSN); } else { setLastLoggedSize(idx, newSize); } if (entryTargets.get(idx) != null) { /* Update mem size for node change. */ assert(oldMemSize != 0); Node newLN = entryTargets.get(idx); long newMemSize = newLN.getMemorySizeIncludedByParent(); updateMemorySize(oldMemSize, newMemSize); } else { assert(oldMemSize == 0); } setPendingDeleted(idx); setDirty(true); } /** * This method is used by the RecoveryManager to change the current version * of a record, either to a later version (in case of redo), or to an * earlier version (in case of undo). The current version may or may not be * cached as a child LN of this BIN (it may be only in case of txn abort * during normal processing). If it is, it is evicted. The new version is * not attached to the in-memory tree, to save memory during crash * recovery. * * @param idx The BIN slot for the record. * * @param lsn The LSN of the new record version. It may be null in case of * undo, if the logrec that is being undone is an insertion and the record * did not exist at all in the DB before that insertion. * * @param knownDeleted True if the new version is a committed deletion. * * @param pendingDeleted True if the new version is a deletion, which * may or may not be committed. * * @param key The key of the new version. It is null only if we are undoing * and the revert-to version was not embedded (in this case the key of the * revert-to version is not stored in the logrec). If it is null and the * DB allows key updates, the new record version is fetched from disk to * retrieve its key, so that the key values stored in the BIN slots are * always transactionally correct. * * @param data The data of the new version. It is non-null if and only if * the new version must be embedded in the BIN. * * @param vlsn The VLSN of the new version. * * @param logrecSize The on-disk size of the logrec corresponding to the * new version. It may be 0 (i.e. unknown) in case of undo. */ public final void recoverRecord( int idx, long lsn, boolean knownDeleted, boolean pendingDeleted, byte[] key, byte[] data, long vlsn, int logrecSize, int expiration, boolean expirationInHours) { assert(isBIN()); BIN bin = (BIN) this; bin.freeOffHeapLN(idx); // old version of the LN is stale if (lsn == DbLsn.NULL_LSN) { /* * A NULL lsn means that we are undoing an insertion that was done * without slot reuse. To undo such an insertion we evict the * current version (it may cached only in case of normal txn abort) * and set the KD flag in the slot. We also set the LSN to null to * ensure that the slot does not point to a logrec that does not * reflect the slot's current state. The slot can then be put on * the compressor for complete removal. */ setKnownDeletedAndEvictLN(idx); setLsnInternal(idx, DbLsn.NULL_LSN); bin.queueSlotDeletion(idx); return; } if (key == null && databaseImpl.allowsKeyUpdates() && !knownDeleted) { try { WholeEntry wholeEntry = getEnv().getLogManager(). getLogEntryAllowInvisibleAtRecovery( lsn, getLastLoggedSize(idx)); LNLogEntry logrec = (LNLogEntry) wholeEntry.getEntry(); logrec.postFetchInit(getDatabase()); key = logrec.getKey(); logrecSize = wholeEntry.getHeader().getEntrySize(); } catch (FileNotFoundException e) { throw new EnvironmentFailureException( getEnv(), EnvironmentFailureReason.LOG_FILE_NOT_FOUND, makeFetchErrorMsg(null, lsn, idx), e); } catch (ErasedException e) { throw new EnvironmentFailureException( getEnv(), EnvironmentFailureReason.LOG_CHECKSUM, "LN is erased unexpectedly, implied corruption. " + makeFetchErrorMsg(null, lsn, idx), e); } } long oldSlotSize = getEntryInMemorySize(idx); setLsn(idx, lsn); setTarget(idx, null); boolean multiSlotChange = updateLNSlotKey(idx, key, data); if (isEmbeddedLN(idx)) { clearLastLoggedSize(idx); bin.setCachedVLSN(idx, vlsn); } else { setLastLoggedSize(idx, logrecSize); } if (knownDeleted) { assert(!pendingDeleted); setKnownDeleted(idx); bin.queueSlotDeletion(idx); } else { clearKnownDeleted(idx); if (pendingDeleted) { setPendingDeleted(idx); bin.queueSlotDeletion(idx); } else { clearPendingDeleted(idx); } } bin.setExpiration(idx, expiration, expirationInHours); if (multiSlotChange) { updateMemorySize(inMemorySize, computeMemorySize()); } else { long newSlotSize = getEntryInMemorySize(idx); updateMemorySize(oldSlotSize, newSlotSize); } setDirty(true); } /** * Update the cached-child and LSN properties of the idx-th slot. This * method is used by the RecoveryManager.recoverChildIN() to change the * version of a child IN, a later version The child IN may or may not be * already attached to the tree. */ public final void recoverIN( int idx, Node node, long lsn, int lastLoggedSize) { long oldSlotSize = getEntryInMemorySize(idx); /* * If we are about to detach a cached child IN, make sure that it is * not in the INList. This is correct, because this method is called * during the recovery phase where the INList is disabled, */ Node child = getTarget(idx); assert(child == null || !((IN)child).getInListResident() || child == node/* this is needed by a unit test*/); setLsn(idx, lsn); setLastLoggedSize(idx, lastLoggedSize); setTarget(idx, node); long newSlotSize = getEntryInMemorySize(idx); updateMemorySize(oldSlotSize, newSlotSize); setDirty(true); assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag()); } /** * Attach the given node as the idx-th child of "this" node. If the child * node is an LN, update the key of the parent slot to the given key value, * if that value is non-null and an update is indeed necessary. * * This method is called after the child node has been either (a) fetched * in from disk and is not dirty, or (b) is a newly created instance that * will be written out later by something like a checkpoint. In either * case, the slot LSN does not need to be updated. * * Note: does not dirty the node unless the LN slot key is changed. */ public final void attachNode(int idx, Node node, byte[] newKey) { assert !(node instanceof IN) || ((IN) node).isLatchExclusiveOwner(); long oldSlotSize = getEntryInMemorySize(idx); /* Make sure we are not using this method to detach a cached child */ assert(getTarget(idx) == null); setTarget(idx, node); boolean multiSlotChange = false; if (isBIN() && newKey != null) { assert(!haveEmbeddedData(idx)); multiSlotChange = updateLNSlotKey(idx, newKey, null); } if (multiSlotChange) { updateMemorySize(inMemorySize, computeMemorySize()); } else { long newSlotSize = getEntryInMemorySize(idx); updateMemorySize(oldSlotSize, newSlotSize); } assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag()); } /* * Detach from the tree the child node at the idx-th slot. * * The most common caller of this method is the evictor. If the child * being evicted was dirty, it has just been logged and the lsn of the * slot must be updated. */ public final void detachNode(int idx, boolean updateLsn, long newLsn) { long oldSlotSize = getEntryInMemorySize(idx); Node child = getTarget(idx); if (updateLsn) { setLsn(idx, newLsn); setDirty(true); } setTarget(idx, null); long newSlotSize = getEntryInMemorySize(idx); updateMemorySize(oldSlotSize, newSlotSize); if (child != null && child.isIN()) { getEnv().getInMemoryINs().remove((IN) child); } assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag()); } /** * This method is used in DupConvert, where it is called to convert the * keys of an upper IN that has just been fetched from the log and is not * attached to in-memory tree yet. */ public final void convertKey(int idx, byte[] newKey) { long oldSlotSize = getEntryInMemorySize(idx); boolean multiSlotChange = updateKey(idx, newKey, null); if (multiSlotChange) { updateMemorySize(inMemorySize, computeMemorySize()); } else { long newSlotSize = getEntryInMemorySize(idx); updateMemorySize(oldSlotSize, newSlotSize); } setDirty(true); assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag()); } void copyEntries(final int from, final int to, final int n) { entryTargets = entryTargets.copy(from, to, n, this); entryKeys = entryKeys.copy(from, to, n, this); offHeapBINIds = offHeapBINIds.copy(from, to, n, this); System.arraycopy(entryStates, from, entryStates, to, n); if (entryLsnLongArray == null) { final int fromOff = from << 2; final int toOff = to << 2; final int nBytes = n << 2; System.arraycopy(entryLsnByteArray, fromOff, entryLsnByteArray, toOff, nBytes); } else { System.arraycopy(entryLsnLongArray, from, entryLsnLongArray, to, n); } } /** * Return true if this node needs splitting. For the moment, needing to be * split is defined by there being no free entries available. */ public final boolean needsSplitting() { if (isBINDelta()) { BIN bin = (BIN)this; int fullBinNEntries = bin.getFullBinNEntries(); int fullBinMaxEntries = bin.getFullBinMaxEntries(); if (fullBinNEntries < 0) { /* fullBinNEntries is unknown in logVersions < 10 */ mutateToFullBIN(false /*leaveFreeSlot*/); } else { assert(fullBinNEntries > 0); return ((fullBinMaxEntries - fullBinNEntries) < 1); } } return ((getMaxEntries() - nEntries) < 1); } /** * Split this into two nodes. Parent IN is passed in parent and should be * latched by the caller. * * childIndex is the index in parent of where "this" can be found. */ public final IN split( IN parent, int childIndex, IN grandParent, int maxEntries) { return splitInternal(parent, childIndex, grandParent, maxEntries, -1); } /** * Called when we know we are about to split on behalf of a key that is the * minimum (leftSide) or maximum (!leftSide) of this node. This is * achieved by just forcing the split to occur either one element in from * the left or the right (i.e. splitIndex is 1 or nEntries - 1). */ IN splitSpecial( IN parent, int parentIndex, IN grandParent, int maxEntriesPerNode, byte[] key, boolean leftSide) { int index = findEntry(key, false, false); if (leftSide && index == 0) { return splitInternal( parent, parentIndex, grandParent, maxEntriesPerNode, 1); } else if (!leftSide && index == (nEntries - 1)) { return splitInternal( parent, parentIndex, grandParent, maxEntriesPerNode, nEntries - 1); } else { return split( parent, parentIndex, grandParent, maxEntriesPerNode); } } final IN splitInternal( final IN parent, final int childIndex, final IN grandParent, final int maxEntries, int splitIndex) throws DatabaseException { assert(!isBINDelta()); /* * Find the index of the existing identifierKey so we know which IN * (new or old) to put it in. */ if (identifierKey == null) { throw unexpectedState(); } final int idKeyIndex = findEntry(identifierKey, false, false); if (splitIndex < 0) { splitIndex = nEntries / 2; } /* Range of entries to copy to new sibling. */ final int low, high; if (idKeyIndex < splitIndex) { /* * Current node (this) keeps left half entries. Right half entries * will go in the new node. */ low = splitIndex; high = nEntries; } else { /* * Current node (this) keeps right half entries. Left half entries * will go in the new node. */ low = 0; high = splitIndex; } final byte[] newIdKey = getKey(low); long parentLsn; /* * Ensure that max entries is large enough to hold the slots being * moved to the new sibling, with one spare slot for insertions. This * is important when the maxEntries param is less than nEntries in this * node, which can occur when the user reduces the fanout or when this * node has temporarily grown beyond its original fanout. */ final IN newSibling = createNewInstance( newIdKey, Math.max(maxEntries, high - low + 1), level); newSibling.latch(CacheMode.UNCHANGED); try { boolean addedNewSiblingToCompressorQueue = false; final int newSiblingNEntries = (high - low); final boolean haveCachedChildren = hasCachedChildrenFlag(); assert(isBIN() || haveCachedChildren == hasCachedChildren()); final BIN bin = isBIN() ? (BIN) this : null; /** * Distribute entries among the split node and the new sibling. */ for (int i = low; i < high; i++) { if (!addedNewSiblingToCompressorQueue && bin != null && bin.isDefunct(i)) { addedNewSiblingToCompressorQueue = true; getEnv().addToCompressorQueue((BIN) newSibling); } newSibling.appendEntryFromOtherNode(this, i); clearEntry(i); } if (low == 0) { shiftEntriesLeft(newSiblingNEntries); } nEntries -= newSiblingNEntries; setDirty(true); if (isUpperIN() && haveCachedChildren) { setHasCachedChildrenFlag(hasCachedChildren()); } assert(isBIN() || hasCachedChildrenFlag() == hasCachedChildren()); assert(isBIN() || newSibling.hasCachedChildrenFlag() == newSibling.hasCachedChildren()); adjustCursors(newSibling, low, high); /* * If this node has no key prefix, calculate it now that it has * been split. This must be done before logging, to ensure the * prefix information is made persistent [#20799]. */ byte[] newKeyPrefix = computeKeyPrefix(-1); recalcSuffixes(newKeyPrefix, null, null, -1); /* Apply compaction after prefixing [#20799]. */ entryKeys = entryKeys.compact(this); /* Only recalc if there are multiple entries in newSibling. */ if (newSibling.getNEntries() > 1) { byte[] newSiblingPrefix = newSibling.computeKeyPrefix(-1); newSibling.recalcSuffixes(newSiblingPrefix, null, null, -1); /* initMemorySize calls entryKeys.compact. */ newSibling.initMemorySize(); } assert idKeyIsSlotKey(); assert newSibling.idKeyIsSlotKey(); /* * Update size. newSibling and parent are correct, but this IN has * had its entries shifted and is not correct. * * Also, inMemorySize does not reflect changes that may have * resulted from key prefixing related changes, it needs to be * brought up to date, so update it appropriately for this and the * above reason. */ EnvironmentImpl env = getEnv(); INList inMemoryINs = env.getInMemoryINs(); long oldMemorySize = inMemorySize; long newSize = computeMemorySize(); updateMemorySize(oldMemorySize, newSize); /* * Parent refers to child through an element of the entries array. * Depending on which half of the BIN we copied keys from, we * either have to adjust one pointer and add a new one, or we have * to just add a new pointer to the new sibling. * * We must use the provisional logging for two reasons: * * 1) All three log entries must be read atomically. The parent * must get logged last, as all referred-to children must precede * it. Provisional entries guarantee that all three are processed * as a unit. Recovery skips provisional entries, so the changed * children are only used if the parent makes it out to the log. * * 2) We log all they way to the root to avoid the "great aunt" * problem (see RecoveryManager.buildINs), and provisional * logging is necessary during a checkpoint for levels less than * maxFlushLevel. * * We prohibit compression during logging because there should be * at least one entry in each IN. Note the use of getKey(0) below. */ long newSiblingLsn = newSibling.optionalLogProvisionalNoCompress(parent); long myNewLsn = optionalLogProvisionalNoCompress(parent); assert nEntries > 0; /* * When we update the parent entry, we make sure that we don't * replace the parent's key that points at 'this' with a key that * is > than the existing one. Replacing the parent's key with * something > would effectively render a piece of the subtree * inaccessible. So only replace the parent key with something * <= the existing one. See tree/SplitTest.java for more details * on the scenario. */ if (low == 0) { /* * Change the original entry to point to the new child and add * an entry to point to the newly logged version of this * existing child. */ parent.prepareForSlotReuse(childIndex); parent.updateSplitSlot( childIndex, newSibling, newSiblingLsn, newIdKey); boolean inserted = parent.insertEntry( this, getKey(0), myNewLsn); assert inserted; } else { /* * Update the existing child's LSN to reflect the newly logged * version and insert new child into parent. */ parent.updateSplitSlot(childIndex, this, myNewLsn, getKey(0)); boolean inserted = parent.insertEntry( newSibling, newIdKey, newSiblingLsn); assert inserted; } inMemoryINs.add(newSibling); /* * Log the parent. Note that the root slot or grandparent slot is * not updated with the parent's LSN here; this is done by * Tree.forceSplit. */ if (parent.isRoot()) { parentLsn = parent.optionalLog(); } else { parentLsn = parent.optionalLogProvisional(grandParent); } /* Coordinate the split with an in-progress checkpoint. */ env.getCheckpointer().coordinateSplitWithCheckpoint(newSibling); /* * Check whether either the old or the new sibling must be added * to the LRU (priority-1 LRUSet). */ assert(!isDIN() && !isDBIN()); if(isBIN() || !newSibling.hasCachedChildrenFlag()) { if (isUpperIN() && traceLRU) { LoggerUtils.envLogMsg( traceLevel, getEnv(), "split-newSibling " + Thread.currentThread().getId() + "-" + Thread.currentThread().getName() + "-" + getEnv().getName() + " Adding UIN to LRU: " + newSibling.getNodeId()); } getEvictor().addBack(newSibling); } if (isUpperIN() && haveCachedChildren && !hasCachedChildrenFlag()) { if (traceLRU) { LoggerUtils.envLogMsg( traceLevel, getEnv(), "split-oldSibling " + Thread.currentThread().getId() + "-" + Thread.currentThread().getName() + "-" + getEnv().getName() + " Adding UIN to LRU: " + getNodeId()); } getEvictor().addBack(this); } /* Debug log this information. */ traceSplit(Level.FINE, parent, newSibling, parentLsn, myNewLsn, newSiblingLsn, splitIndex, idKeyIndex, childIndex); } finally { newSibling.releaseLatch(); } return newSibling; } /** * Used for moving entries between BINs during splits. */ void appendEntryFromOtherNode(IN from, int fromIdx) { assert(!isBINDelta()); final Node target = from.entryTargets.get(fromIdx); final int ohBinId = from.getOffHeapBINId(fromIdx); final boolean ohBinPri2 = from.isOffHeapBINPri2(fromIdx); final boolean ohBinDirty = from.isOffHeapBINDirty(fromIdx); final long lsn = from.getLsn(fromIdx); final byte state = from.entryStates[fromIdx]; final byte[] key = from.getKey(fromIdx); final byte[] data = (from.haveEmbeddedData(fromIdx) ? from.getData(fromIdx) : null); long oldSize = computeLsnOverhead(); ++nEntries; int idx = nEntries - 1; /* * When calling setTarget for an IN child we must latch it, because * setTarget sets the parent. */ if (target != null && target.isIN()) { final IN in = (IN) target; in.latchNoUpdateLRU(databaseImpl); setTarget(idx, target); in.releaseLatch(); } else { setTarget(idx, target); } boolean multiSlotChange = insertKey(idx, key, data); /* setLsnInternal can mutate to an array of longs. */ setLsnInternal(idx, lsn); entryStates[idx] = state; if (ohBinId >= 0) { setOffHeapBINId(idx, ohBinId, ohBinPri2, ohBinDirty); getOffHeapCache().setOwner(ohBinId, this); } if (multiSlotChange) { updateMemorySize(inMemorySize, computeMemorySize()); } else { long newSize = getEntryInMemorySize(idx) + computeLsnOverhead(); updateMemorySize(oldSize, newSize); } setDirty(true); } /** * Update a slot that is being split. The slot to be updated here is the * one that existed before the split. * * @param child The new child to be placed under the slot. May be the * newly created sibling or the pre-existing sibling. * @param lsn The new lsn of the child (the child was logged just before * calling this method, so its slot lsn must be updated) * @param key The new key for the slot. We should not actually update the * slot key, because its value is the lower bound of the key range covered * by the slot, and this lower bound does not change as a result of the * split (the new slot created as a result of the split is placed to the * right of the pre-existing slot). There is however one exception: the * key can be updated if "idx" is the 0-slot. The 0-slot key is not a true * lower bound; the actual lower bound for the 0-slot is the key in the * parent slot for this IN. So, in this case, if the given key is less * than the current one, it is better to update the key in order to better * approximate the real lower bound (and thus make the isKeyInBounds() * method more effective). */ private void updateSplitSlot( int idx, IN child, long lsn, byte[] key) { assert(isUpperIN()); long oldSize = getEntryInMemorySize(idx); setLsn(idx, lsn); setTarget(idx, child); if (idx == 0) { int s = entryKeys.compareKeys( key, keyPrefix, idx, haveEmbeddedData(idx), getKeyComparator()); boolean multiSlotChange = false; if (s < 0) { multiSlotChange = updateKey(idx, key, null/*data*/); } if (multiSlotChange) { updateMemorySize(inMemorySize, computeMemorySize()); } else { long newSize = getEntryInMemorySize(idx); updateMemorySize(oldSize, newSize); } } else { long newSize = getEntryInMemorySize(idx); updateMemorySize(oldSize, newSize); } setDirty(true); assert(hasCachedChildren() == hasCachedChildrenFlag()); } /** * Shift entries to the right by one position, starting with (and * including) the entry at index. Increment nEntries by 1. Called * in insertEntry1() * * @param index - The position to start shifting from. */ private void shiftEntriesRight(int index) { copyEntries(index, index + 1, nEntries - index); clearEntry(index); nEntries++; setDirty(true); } /** * Shift entries starting at the byHowMuch'th element to the left, thus * removing the first byHowMuch'th elements of the entries array. This * always starts at the 0th entry. Caller is responsible for decrementing * nEntries. * * @param byHowMuch - The number of entries to remove from the left side * of the entries array. */ private void shiftEntriesLeft(int byHowMuch) { copyEntries(byHowMuch, 0, nEntries - byHowMuch); for (int i = nEntries - byHowMuch; i < nEntries; i++) { clearEntry(i); } setDirty(true); } void adjustCursors( IN newSibling, int newSiblingLow, int newSiblingHigh) { /* Cursors never refer to IN's. */ } void adjustCursorsForInsert(int insertIndex) { /* Cursors never refer to IN's. */ } /** * Called prior to changing a slot to contain a different logical node. * * Necessary to support assertions for transient LSNs in shouldUpdateLsn. * Examples: LN slot reuse, and splits where a new node is placed in an * existing slot. * * Also needed to free the off-heap BIN associated with the old node. * * TODO: This method is no longer used for LN slot reuse, and freeing of * the off-heap BIN could be done by the only caller, splitInternal, and * then this method could be removed. */ public void prepareForSlotReuse(int idx) { if (databaseImpl.isDeferredWriteMode()) { setLsn(idx, DbLsn.NULL_LSN, false/*check*/); } final OffHeapCache ohCache = getOffHeapCache(); if (ohCache.isEnabled() && getNormalizedLevel() == 2) { ohCache.freeBIN((BIN) getTarget(idx), this, idx); } } /* * Get the current memory consumption of this node */ public long getInMemorySize() { return inMemorySize; } /** * Compute the current memory consumption of this node, after putting * its keys in their compact representation, if possible. */ private void initMemorySize() { entryKeys = entryKeys.compact(this); inMemorySize = computeMemorySize(); } /** * Count up the memory usage attributable to this node alone. LNs children * are counted by their BIN parents, but INs are not counted by their * parents because they are resident on the IN list. The identifierKey is * "intentionally" not kept track of in the memory budget. */ public long computeMemorySize() { long calcMemorySize = getFixedMemoryOverhead(); calcMemorySize += MemoryBudget.byteArraySize(entryStates.length); calcMemorySize += computeLsnOverhead(); for (int i = 0; i < nEntries; i++) { calcMemorySize += getEntryInMemorySize(i); } if (keyPrefix != null) { calcMemorySize += MemoryBudget.byteArraySize(keyPrefix.length); } if (provisionalObsolete != null) { calcMemorySize += provisionalObsolete.getMemorySize(); } calcMemorySize += entryTargets.calculateMemorySize(); calcMemorySize += entryKeys.calculateMemorySize(); if (offHeapBINIds != null) { calcMemorySize += offHeapBINIds.getMemorySize(); } return calcMemorySize; } /* * Overridden by subclasses. */ protected long getFixedMemoryOverhead() { return MemoryBudget.IN_FIXED_OVERHEAD; } /* * Compute the memory consumption for storing this node's LSNs */ private int computeLsnOverhead() { return (entryLsnLongArray == null) ? MemoryBudget.byteArraySize(entryLsnByteArray.length) : MemoryBudget.ARRAY_OVERHEAD + (entryLsnLongArray.length * MemoryBudget.PRIMITIVE_LONG_ARRAY_ITEM_OVERHEAD); } private long getEntryInMemorySize(int idx) { /* * Do not count state size here, since it is counted as overhead * during initialization. */ long ret = 0; /* * Don't count the key size if the representation has already * accounted for it. */ if (!entryKeys.accountsForKeyByteMemUsage()) { /* * Materialize the key object only if needed, thus avoiding the * object allocation cost when possible. */ final byte[] key = entryKeys.get(idx); if (key != null) { ret += MemoryBudget.byteArraySize(key.length); } } final Node target = entryTargets.get(idx); if (target != null) { ret += target.getMemorySizeIncludedByParent(); } return ret; } /** * Compacts the representation of the IN, if possible. * * Called by the evictor to reduce memory usage. Should not be called too * often (e.g., every CRUD operation), since this could cause lots of * memory allocations as the representations contract and expend, resulting * in expensive GC. * * @return number of bytes reclaimed. */ public long compactMemory() { final long oldSize = inMemorySize; final INKeyRep oldKeyRep = entryKeys; entryTargets = entryTargets.compact(this); entryKeys = entryKeys.compact(this); offHeapBINIds = offHeapBINIds.compact(this, EMPTY_OFFHEAP_BIN_IDS); /* * Note that we only need to account for mem usage changes in the key * rep here, not the target rep. The target rep, unlike the key rep, * updates its mem usage internally, and the responsibility for mem * usage of contained nodes is fixed -- it is always managed by the IN. * * When the key rep changes, the accountsForKeyByteMemUsage property * also changes. Recalc the size of the entire IN, because * responsibility for managing contained key byte mem usage has shifted * between the key rep and the IN parent. */ if (entryKeys != oldKeyRep) { updateMemorySize(inMemorySize, computeMemorySize()); } return oldSize - inMemorySize; } /** * Returns the amount of memory currently budgeted for this IN. */ public long getBudgetedMemorySize() { return inMemorySize - accumulatedDelta; } /** * Called as part of a memory budget reset (during a checkpoint) to clear * the accumulated delta and return the total memory size. */ public long resetAndGetMemorySize() { accumulatedDelta = 0; return inMemorySize; } protected void updateMemorySize(long oldSize, long newSize) { long delta = newSize - oldSize; updateMemorySize(delta); } /* * Called when a cached child is replaced by another cached child. */ void updateMemorySize(Node oldNode, Node newNode) { long delta = 0; if (newNode != null) { delta = newNode.getMemorySizeIncludedByParent(); } if (oldNode != null) { delta -= oldNode.getMemorySizeIncludedByParent(); } updateMemorySize(delta); } /* * Change this.onMemorySize by the given delta and update the memory * budget for the cache, but only if the accummulated delta for this * node exceeds the ACCUMULATED_LIMIT threshold and this IN is actually * on the IN list. (For example, when we create new INs, they are * manipulated off the IN list before being added; if we updated the * environment wide cache then, we'd end up double counting.) */ void updateMemorySize(long delta) { if (delta == 0) { return; } inMemorySize += delta; if (getInListResident()) { /* * This assertion is disabled if the environment is invalid to * avoid spurious assertions during testing of IO errors. If the * environment is invalid, memory budgeting errors are irrelevant. * [#21929] */ assert inMemorySize >= getFixedMemoryOverhead() || !getEnv().isValid(): "delta: " + delta + " inMemorySize: " + inMemorySize + " overhead: " + getFixedMemoryOverhead() + " computed: " + computeMemorySize() + " dump: " + toString() + assertPrintMemorySize(); accumulatedDelta += delta; if (accumulatedDelta > ACCUMULATED_LIMIT || accumulatedDelta < -ACCUMULATED_LIMIT) { updateMemoryBudget(); } } } /** * Move the accumulated delta to the memory budget. */ public void updateMemoryBudget() { final EnvironmentImpl env = getEnv(); env.getInMemoryINs().memRecalcUpdate(this, accumulatedDelta); env.getMemoryBudget().updateTreeMemoryUsage(accumulatedDelta); accumulatedDelta = 0; } /* * Utility method used during unit testing. */ protected long printMemorySize() { final long inOverhead = getFixedMemoryOverhead(); final long statesOverhead = MemoryBudget.byteArraySize(entryStates.length); final int lsnOverhead = computeLsnOverhead(); int entryOverhead = 0; for (int i = 0; i < nEntries; i++) { entryOverhead += getEntryInMemorySize(i); } final int keyPrefixOverhead = (keyPrefix != null) ? MemoryBudget.byteArraySize(keyPrefix.length) : 0; final int provisionalOverhead = (provisionalObsolete != null) ? provisionalObsolete.getMemorySize() : 0; final long targetRepOverhead = entryTargets.calculateMemorySize(); final long keyRepOverhead = entryKeys.calculateMemorySize(); final long total = inOverhead + statesOverhead + lsnOverhead + entryOverhead + keyPrefixOverhead + provisionalOverhead + targetRepOverhead + keyRepOverhead; final long offHeapBINIdOverhead = offHeapBINIds.getMemorySize(); System.out.println(" nEntries:" + nEntries + "/" + entryStates.length + " in: " + inOverhead + " states: " + statesOverhead + " entry: " + entryOverhead + " lsn: " + lsnOverhead + " keyPrefix: " + keyPrefixOverhead + " provisional: " + provisionalOverhead + " targetRep(" + entryTargets.getType() + "): " + targetRepOverhead + " keyRep(" + entryKeys.getType() +"): " + keyRepOverhead + " offHeapBINIds: " + offHeapBINIdOverhead + " Total: " + total + " inMemorySize: " + inMemorySize); return total; } /* Utility method used to print memory size in an assertion. */ private boolean assertPrintMemorySize() { printMemorySize(); return true; } public boolean verifyMemorySize() { long calcMemorySize = computeMemorySize(); if (calcMemorySize != inMemorySize) { String msg = "-Warning: Out of sync. Should be " + calcMemorySize + " / actual: " + inMemorySize + " node: " + getNodeId(); LoggerUtils.envLogMsg(Level.INFO, getEnv(), msg); System.out.println(msg); printMemorySize(); return false; } return true; } /** * Adds (increments) or removes (decrements) the cache stats for the key * and target representations. Used when rep objects are being replaced * with a new instance, rather than by calling their mutator methods. * Specifically, it is called when mutating from full bin to bin delta * or vice-versa. */ protected void updateRepCacheStats(boolean increment) { assert(isBIN()); entryKeys.updateCacheStats(increment, this); entryTargets.updateCacheStats(increment, this); } protected int getCompactMaxKeyLength() { return getEnv().getCompactMaxKeyLength(); } /** * Called when adding/removing this IN to/from the INList. */ public void setInListResident(boolean resident) { if (!resident) { /* Decrement the stats before clearing its residency */ entryTargets.updateCacheStats(false, this); entryKeys.updateCacheStats(false, this); } if (resident) { flags |= IN_RESIDENT_BIT; } else { flags &= ~IN_RESIDENT_BIT; } if (resident) { /* Increment the stats after setting its residency. */ entryTargets.updateCacheStats(true, this); entryKeys.updateCacheStats(true, this); } } /** * Returns whether this IN is on the INList. */ public boolean getInListResident() { return (flags & IN_RESIDENT_BIT) != 0; } public IN getPrevLRUNode() { return prevLRUNode; } public void setPrevLRUNode(IN node) { prevLRUNode = node; } public IN getNextLRUNode() { return nextLRUNode; } public void setNextLRUNode(IN node) { nextLRUNode = node; } /** * Try to compact or otherwise reclaim memory in this IN and return the * number of bytes reclaimed. For example, a BIN should evict LNs, if * possible. * * Used by the evictor to reclaim memory by some means short of evicting * the entire node. If a positive value is returned, the evictor will * postpone full eviction of this node. */ public long partialEviction() { return 0; } /** * Returns whether any child is non-null in the main or off-heap cache. */ public boolean hasCachedChildren() { assert isLatchOwner(); for (int i = 0; i < getNEntries(); i++) { if (entryTargets.get(i) != null) { return true; } } return false; } /** * Disallow delta on next log. Set to true (a) when we we delete a slot * from a BIN, (b) when the cleaner marks a BIN as dirty so that it will * be migrated during the next checkpoint. */ public void setProhibitNextDelta(boolean val) { if (!isBIN()) { return; } if (val) { flags |= IN_PROHIBIT_NEXT_DELTA_BIT; } else { flags &= ~IN_PROHIBIT_NEXT_DELTA_BIT; } } public boolean getProhibitNextDelta() { return (flags & IN_PROHIBIT_NEXT_DELTA_BIT) != 0; } /* * Validate the subtree that we're about to delete. Make sure there aren't * more than one valid entry on each IN and that the last level of the tree * is empty. Also check that there are no cursors on any bins in this * subtree. Assumes caller is holding the latch on this parent node. * * While we could latch couple down the tree, rather than hold latches as * we descend, we are presumably about to delete this subtree so * concurrency shouldn't be an issue. * * @return true if the subtree rooted at the entry specified by "index" is * ok to delete. * * Overriden by BIN class. */ boolean validateSubtreeBeforeDelete(int index) throws DatabaseException { if (index >= nEntries) { /* * There's no entry here, so of course this entry is deletable. */ return true; } else { IN child = fetchIN(index, CacheMode.UNCHANGED); boolean needToLatch = !child.isLatchExclusiveOwner(); try { if (needToLatch) { child.latch(CacheMode.UNCHANGED); } return child.isValidForDelete(); } finally { if (needToLatch && isLatchOwner()) { child.releaseLatch(); } } } } /** * Check if this node fits the qualifications for being part of a deletable * subtree. It can only have one IN child and no LN children. * * Note: the method is overwritten by BIN and LN. * BIN.isValidForDelete() will not fetch any child LNs. * LN.isValidForDelete() simply returns false. * * We assume that this is only called under an assert. */ @Override boolean isValidForDelete() throws DatabaseException { assert(!isBINDelta()); /* * Can only have one valid child, and that child should be * deletable. */ if (nEntries > 1) { // more than 1 entry. return false; } else if (nEntries == 1) { // 1 entry, check child IN child = fetchIN(0, CacheMode.UNCHANGED); boolean needToLatch = !child.isLatchExclusiveOwner(); if (needToLatch) { child.latch(CacheMode.UNCHANGED); } boolean ret = false; try { if (child.isBINDelta()) { return false; } ret = child.isValidForDelete(); } finally { if (needToLatch) { child.releaseLatch(); } } return ret; } else { // 0 entries. return true; } } /** * Add self and children to this in-memory IN list. Called by recovery, can * run with no latching. */ @Override final void rebuildINList(INList inList) throws DatabaseException { /* * Recompute your in memory size first and then add yourself to the * list. */ initMemorySize(); inList.add(this); boolean hasCachedChildren = false; /* * Add your children if they're resident. (LNs know how to stop the * flow). */ for (int i = 0; i < nEntries; i++) { Node n = getTarget(i); if (n != null) { n.rebuildINList(inList); hasCachedChildren = true; } if (getOffHeapBINId(i) >= 0) { hasCachedChildren = true; } } if (isUpperIN()) { if (hasCachedChildren) { setHasCachedChildrenFlag(true); } else { setHasCachedChildrenFlag(false); if (!isDIN()) { if (traceLRU) { LoggerUtils.envLogMsg( traceLevel, getEnv(), "rebuildINList " + Thread.currentThread().getId() + "-" + Thread.currentThread().getName() + "-" + getEnv().getName() + " Adding UIN to LRU: " + getNodeId()); } getEvictor().addBack(this); } } } else if (isBIN() && !isDBIN()) { getEvictor().addBack(this); } } /* * DbStat support. */ void accumulateStats(TreeWalkerStatsAccumulator acc) { acc.processIN(this, getNodeId(), getLevel()); } /** * Sets the last logged LSN, which for a BIN may be a delta. * * It is called from IN.postFetch/RecoveryInit(). If the logrec we have * just read was a BINDelta, this.lastFullVersion has already been set (in * BINDeltaLogEntry.readMainItem() or in OldBinDelta.reconstituteBIN()). * So, this method will set this.lastDeltaVersion. Otherwise, if the * logrec was a full BIN, this.lastFullVersion has not been set yet, * and it will be set here. In this case, this.lastDeltaVersion will * remain NULL. */ public void setLastLoggedLsn(long lsn) { if (isBIN()) { if (getLastFullLsn() == DbLsn.NULL_LSN) { setLastFullLsn(lsn); } else { ((BIN)this).setLastDeltaLsn(lsn); } } else { setLastFullLsn(lsn); } } /** * Returns the LSN of the last last logged version of this IN, or NULL_LSN * if never logged. */ public final long getLastLoggedLsn() { if (isBIN()) { return (getLastDeltaLsn() != DbLsn.NULL_LSN ? getLastDeltaLsn() : getLastFullLsn()); } return getLastFullLsn(); } /** * Sets the last full version LSN. */ public final void setLastFullLsn(long lsn) { lastFullVersion = lsn; } /** * Returns the last full version LSN, or NULL_LSN if never logged. */ public final long getLastFullLsn() { return lastFullVersion; } /** * Returns the last delta version LSN, or NULL_LSN if a delta was not last * logged. For BINs, it just returns the value of the lastDeltaVersion * field. Public for unit testing. */ public long getLastDeltaLsn() { return DbLsn.NULL_LSN; } /* * Logging support */ /** * When splits and checkpoints intermingle in a deferred write databases, * a checkpoint target may appear which has a valid target but a null LSN. * Deferred write dbs are written out in checkpoint style by either * Database.sync() or a checkpoint which has cleaned a file containing * deferred write entries. For example, * INa * | * BINb * * A checkpoint or Database.sync starts * The INList is traversed, dirty nodes are selected * BINb is bypassed on the INList, since it's not dirty * BINb is split, creating a new sibling, BINc, and dirtying INa * INa is selected as a dirty node for the ckpt * * If this happens, INa is in the selected dirty set, but not its dirty * child BINb and new child BINc. * * In a durable db, the existence of BINb and BINc are logged * anyway. But in a deferred write db, there is an entry that points to * BINc, but no logged version. * * This will not cause problems with eviction, because INa can't be * evicted until BINb and BINc are logged, are non-dirty, and are detached. * But it can cause problems at recovery, because INa will have a null LSN * for a valid entry, and the LN children of BINc will not find a home. * * To prevent this, search for all dirty children that might have been * missed during the selection phase, and write them out. It's not * sufficient to write only null-LSN children, because the existing sibling * must be logged lest LN children recover twice (once in the new sibling, * once in the old existing sibling. * * TODO: * Would the problem above be solved by logging dirty nodes using a tree * traversal (post-order), rather than using the dirty map? */ public void logDirtyChildren() throws DatabaseException { assert(!isBINDelta()); EnvironmentImpl envImpl = getDatabase().getEnv(); /* Look for targets that are dirty. */ for (int i = 0; i < getNEntries(); i++) { IN child = (IN) getTarget(i); if (child != null) { child.latch(CacheMode.UNCHANGED); try { if (child.getDirty()) { /* Ask descendants to log their children. */ child.logDirtyChildren(); long childLsn = child.log(false, // allowDeltas true, // isProvisional true, // backgroundIO this); // parent updateEntry( i, childLsn, VLSN.NULL_VLSN_SEQUENCE, 0/*lastLoggedSize*/); } } finally { child.releaseLatch(); } } } } public final long log() { return logInternal( this, null, false /*allowDeltas*/, true /*allowCompress*/, Provisional.NO, false /*backgroundIO*/, null /*parent*/); } public final long log( boolean allowDeltas, boolean isProvisional, boolean backgroundIO, IN parent) { return logInternal( this, null, allowDeltas, true /*allowCompress*/, isProvisional ? Provisional.YES : Provisional.NO, backgroundIO, parent); } public final long log( boolean allowDeltas, Provisional provisional, boolean backgroundIO, IN parent) { return logInternal( this, null, allowDeltas, true /*allowCompress*/, provisional, backgroundIO, parent); } final long optionalLog() { if (databaseImpl.isDeferredWriteMode()) { return getLastLoggedLsn(); } else { return logInternal( this, null, false /*allowDeltas*/, true /*allowCompress*/, Provisional.NO, false /*backgroundIO*/, null /*parent*/); } } long optionalLogProvisional(IN parent) { return optionalLogProvisional(parent, true /*allowCompress*/); } long optionalLogProvisionalNoCompress(IN parent) { return optionalLogProvisional(parent, false /*allowCompress*/); } private long optionalLogProvisional(IN parent, boolean allowCompress) { if (databaseImpl.isDeferredWriteMode()) { return getLastLoggedLsn(); } else { return logInternal( this, null, false /*allowDeltas*/, allowCompress, Provisional.YES, false /*backgroundIO*/, parent); } } public static long logEntry( INLogEntry logEntry, Provisional provisional, boolean backgroundIO, IN parent) { return logInternal( null, logEntry, true /*allowDeltas*/, false /*allowCompress*/, provisional, backgroundIO, parent); } /** * Bottleneck method for all IN logging. * * If 'node' is non-null, 'logEntry' must be null. * If 'node' is null, 'logEntry' and 'parent' must be non-null. * * When 'logEntry' is non-null we are logging an off-heap BIN, and it is * not resident in the main cache. The lastFull/DeltaLsns are not updated * here, and this must be done instead by the caller. * * When 'node' is non-null, 'parent' may or may not be null. It must be * non-null when logging provisionally, since obsolete LSNs are added to * the parent's collection. */ private static long logInternal( final IN node, INLogEntry logEntry, final boolean allowDeltas, final boolean allowCompress, final Provisional provisional, final boolean backgroundIO, final IN parent) { assert node == null || node.isLatchExclusiveOwner(); assert parent == null || parent.isLatchExclusiveOwner(); assert node != null || parent != null; assert (node == null) != (logEntry == null); final DatabaseImpl dbImpl = (node != null) ? node.getDatabase() : parent.getDatabase(); final EnvironmentImpl envImpl = dbImpl.getEnv(); final boolean countObsoleteNow = provisional != Provisional.YES || dbImpl.isTemporary(); final boolean isBin = (node != null) ? node.isBIN() : (parent.getNormalizedLevel() == 2); final BIN bin = (node != null && isBin) ? ((BIN) node) : null; final boolean isDelta; if (isBin) { if (logEntry != null) { /* * When a logEntry is supplied (node/bin are null), the logic * below is implemented by OffHeapCache.createBINLogEntry. */ isDelta = logEntry.isBINDelta(); } else { /* Compress non-dirty slots before determining delta status. */ if (allowCompress) { envImpl.lazyCompress(bin, false /*compressDirtySlots*/); } isDelta = bin.isBINDelta() || (allowDeltas && bin.shouldLogDelta()); /* Be sure that we didn't illegally mutate to a delta. */ assert (!(isDelta && bin.isDeltaProhibited())); /* Also compress dirty slots, if we will not log a delta. */ if (allowCompress && !isDelta) { envImpl.lazyCompress(bin, true /*compressDirtySlots*/); } /* * Write dirty LNs in deferred-write databases after * compression to reduce total logging, at least for temp DBs. */ if (dbImpl.isDeferredWriteMode()) { bin.logDirtyChildren(); } logEntry = isDelta ? (new BINDeltaLogEntry(bin)) : (new INLogEntry<>(bin)); } } else { assert node != null; isDelta = false; logEntry = new INLogEntry<>(node); } final LogParams params = new LogParams(); params.entry = logEntry; params.provisional = provisional; params.repContext = ReplicationContext.NO_REPLICATE; params.nodeDb = dbImpl; params.backgroundIO = backgroundIO; /* * For delta logging: * + Count lastDeltaVersion obsolete, if non-null. * + Set lastDeltaVersion to newly logged LSN. * + Leave lastFullVersion unchanged. * * For full version logging: * + Count lastFullVersion and lastDeltaVersion obsolete, if non-null. * + Set lastFullVersion to newly logged LSN. * + Set lastDeltaVersion to null. */ final long oldLsn = isDelta ? DbLsn.NULL_LSN : logEntry.getPrevFullLsn(); final long auxOldLsn = logEntry.getPrevDeltaLsn(); /* * Determine whether to count the prior version of an IN (as well as * accumulated provisionally obsolete LSNs for child nodes) obsolete * when logging the new version. * * True is set if we are logging the IN non-provisionally, since the * non-provisional version durably replaces the prior version and * causes all provisional children to also become durable. * * True is also set if the database is temporary. Since we never use a * temporary DB past recovery, prior versions of an IN are never used. * [#16928] */ if (countObsoleteNow) { params.oldLsn = oldLsn; params.auxOldLsn = auxOldLsn; params.packedObsoleteInfo = (node != null) ? node.provisionalObsolete : null; } /* Log it. */ final LogItem item = envImpl.getLogManager().log(params); if (node != null) { node.setDirty(false); } if (countObsoleteNow) { if (node != null) { node.discardProvisionalObsolete(); } } else if (parent != null) { parent.trackProvisionalObsolete(node, oldLsn); parent.trackProvisionalObsolete(node, auxOldLsn); /* * TODO: * The parent is null and provisional is YES when evicting the root * of a DW DB. How does obsolete counting happen? */ } if (bin != null) { /* * When a logEntry is supplied (node/bin are null), the logic * below is implemented by OffHeapCache.postBINLog. */ if (isDelta) { bin.setLastDeltaLsn(item.lsn); } else { bin.setLastFullLsn(item.lsn); bin.setLastDeltaLsn(DbLsn.NULL_LSN); } bin.setProhibitNextDelta(false); } else if (node != null) { node.setLastFullLsn(item.lsn); } final Evictor evictor = envImpl.getEvictor(); if (node != null && evictor.getUseDirtyLRUSet()) { /* * To capture all cases where a node needs to be moved to the * priority-1 LRUSet after being cleaned, we invoke moveToPri1LRU() * from IN.afterLog(). This includes the case where the node is * being logged as part of being evicted, in which case we don't * really want it to go back to the LRU. However, this is ok * because moveToPri1LRU() checks whether the node is actually * in the priority-2 LRUSet before moving it to the priority-1 * LRUSet. */ if (traceLRU && node.isUpperIN()) { LoggerUtils.envLogMsg( traceLevel, envImpl, Thread.currentThread().getId() + "-" + Thread.currentThread().getName() + "-" + envImpl.getName() + " afterLogCommon(): " + " Moving UIN to mixed LRU: " + node.getNodeId()); } evictor.moveToPri1LRU(node); } return item.lsn; } /** * Adds the given obsolete LSN and any tracked obsolete LSNs for the given * child IN to this IN's tracking list. This method is called to track * obsolete LSNs when a child IN is logged provisionally. Such LSNs * cannot be considered obsolete until an ancestor IN is logged * non-provisionally. */ void trackProvisionalObsolete(final IN childIN, final long obsoleteLsn) { final boolean moveChildInfo = (childIN != null && childIN.provisionalObsolete != null); final boolean addChildLsn = (obsoleteLsn != DbLsn.NULL_LSN); if (!moveChildInfo && !addChildLsn) { return; } final int oldMemSize = (provisionalObsolete != null) ? provisionalObsolete.getMemorySize() : 0; if (moveChildInfo) { if (provisionalObsolete != null) { /* Append child info to parent info. */ provisionalObsolete.copyObsoleteInfo (childIN.provisionalObsolete); } else { /* Move reference from child to parent. */ provisionalObsolete = childIN.provisionalObsolete; } childIN.updateMemorySize( 0 - childIN.provisionalObsolete.getMemorySize()); childIN.provisionalObsolete = null; } if (addChildLsn) { if (provisionalObsolete == null) { provisionalObsolete = new PackedObsoleteInfo(); } provisionalObsolete.addObsoleteInfo(obsoleteLsn); } updateMemorySize(oldMemSize, (provisionalObsolete != null) ? provisionalObsolete.getMemorySize() : 0); } /** * Discards the provisional obsolete tracking information in this node * after it has been counted in the live tracker. This method is called * after this node is logged non-provisionally. */ private void discardProvisionalObsolete() throws DatabaseException { if (provisionalObsolete != null) { updateMemorySize(0 - provisionalObsolete.getMemorySize()); provisionalObsolete = null; } } /* * NOOP for upper INs. Overriden by BIN class. */ public void mutateToFullBIN(boolean leaveFreeSlot) { } /** * Only write dirty slots when logging a BIN-delta. * * Don't write slots that are extinct when logging a full BIN, to support * erasure of extinct records. Skipping the extinct slots during logging * is necessary because extinct slots cannot be deleted by BIN.compress * when a cursor is present. * * Otherwise, when logging an IN, write all slots. */ private int getNEntriesToWrite(boolean deltasOnly) { if (deltasOnly) { return getNDeltas(); } if (!isBIN()) { return nEntries; } int n = 0; for (int i = 0; i < nEntries; i++) { if (canDeleteExtinctSlot(i)) { continue; } n += 1; } return n; } /** * Returns whether the given slot is extinct and can be deleted during * logging. * * Check KnownDeleted before checking for an extinct slot, to * reduce calls to the more expensive getExtinctionState method. * KnownDeleted is set on extinct slots by the ExtinctionScanner. */ private boolean canDeleteExtinctSlot(int idx) { assert isBIN(); final EnvironmentImpl envImpl = getEnv(); /* * The check for isValid is necessary because we cannot call the * extinction filter until recovery is complete and we have the DB * name and type [#27022]. In the future we may wish to implement a * more comprehensive check in getExtinctionState. */ return envImpl.isValid() && isEntryKnownDeleted(idx) && envImpl.getExtinctionState(databaseImpl, getKey(idx)) == EXTINCT; } public final int getNDeltas() { int n = 0; for (int i = 0; i < nEntries; i++) { if (!isDirty(i)) { continue; } n += 1; } return n; } /** * @see Node#getGenericLogType */ @Override public final LogEntryType getGenericLogType() { return getLogType(); } /** * Get the log type of this node. */ public LogEntryType getLogType() { return LogEntryType.LOG_IN; } /** * @see Loggable#getLogSize * * Overrriden by DIN and DBIN classes. */ @Override public int getLogSize() { return getLogSize(false); } public final int getLogSize(boolean deltasOnly) { BIN bin = (isBIN() ? (BIN)this : null); boolean haveVLSNCache = (bin != null && bin.isVLSNCachingEnabled()); int size = 0; boolean haveExpiration = false; if (bin != null) { int base = bin.getExpirationBase(); haveExpiration = (base != -1); size += LogUtils.getPackedIntLogSize(base); } size += LogUtils.getPackedLongLogSize(nodeId); size += LogUtils.getByteArrayLogSize(identifierKey); // identifier key if (keyPrefix != null) { size += LogUtils.getByteArrayLogSize(keyPrefix); } size += 1; // one byte for boolean flags final int nEntriesToWrite = getNEntriesToWrite(deltasOnly); final int maxEntriesToWrite = (!deltasOnly ? getMaxEntries() : bin.getDeltaCapacity(nEntriesToWrite)); size += LogUtils.getPackedIntLogSize(nEntriesToWrite); size += LogUtils.getPackedIntLogSize(level); size += LogUtils.getPackedIntLogSize(maxEntriesToWrite); final boolean compactLsnsRep = (entryLsnLongArray == null); size += LogUtils.getBooleanLogSize(); // compactLsnsRep if (compactLsnsRep) { size += LogUtils.INT_BYTES; // baseFileNumber } for (int i = 0; i < nEntries; i++) { // entries /* See getNEntriesToWrite and canDeleteExtinctSlot. */ if (deltasOnly) { if (!isDirty(i)) { continue; } } else { if (bin != null && canDeleteExtinctSlot(i)) { continue; } } size += LogUtils.getByteArrayLogSize(entryKeys.get(i)) + // key (compactLsnsRep ? LogUtils.INT_BYTES : LogUtils.getLongLogSize()) + // LSN 1; // state if (isLastLoggedSizeStored(i)) { size += LogUtils.getPackedIntLogSize(getLastLoggedSize(i)); } if (haveVLSNCache && isEmbeddedLN(i)) { size += LogUtils.getPackedLongLogSize(bin.getCachedVLSN(i)); } if (haveExpiration) { size += LogUtils.getPackedIntLogSize(bin.getExpirationOffset(i)); } } if (deltasOnly) { size += LogUtils.getPackedIntLogSize(bin.getFullBinNEntries()); size += LogUtils.getPackedIntLogSize(bin.getFullBinMaxEntries()); size += bin.getBloomFilterLogSize(); } return size; } /* * Overridden by DIN and DBIN classes. */ @Override public void writeToLog(ByteBuffer logBuffer) { serialize(logBuffer, false /*deltasOnly*/, true /*clearDirtyBits*/); } public void writeToLog(ByteBuffer logBuffer, boolean deltasOnly) { serialize(logBuffer, deltasOnly, !deltasOnly /*clearDirtyBits*/); } /** * WARNING: In the case of BINs this method is not only used for logging * but also for off-heap caching. Therefore, this method should not have * side effects unless the clearDirtyBits param is true. */ public final void serialize(ByteBuffer logBuffer, boolean deltasOnly, boolean clearDirtyBits) { assert(!deltasOnly || isBIN()); BIN bin = (isBIN() ? (BIN)this : null); byte[] bloomFilter = (deltasOnly ? bin.createBloomFilter() : null); boolean haveExpiration = false; if (bin != null) { int base = bin.getExpirationBase(); haveExpiration = (base != -1); LogUtils.writePackedInt(logBuffer, base); } LogUtils.writePackedLong(logBuffer, nodeId); LogUtils.writeByteArray(logBuffer, identifierKey); boolean hasKeyPrefix = (keyPrefix != null); boolean mayHaveLastLoggedSize = mayHaveLastLoggedSizeStored(); boolean haveVLSNCache = (bin != null && bin.isVLSNCachingEnabled()); byte booleans = (byte) (isRoot() ? 1 : 0); booleans |= (hasKeyPrefix ? 2 : 0); booleans |= (mayHaveLastLoggedSize ? 4 : 0); booleans |= (bloomFilter != null ? 8 : 0); booleans |= (haveVLSNCache ? 16 : 0); booleans |= (isExpirationInHours() ? 32 : 0); logBuffer.put(booleans); if (hasKeyPrefix) { LogUtils.writeByteArray(logBuffer, keyPrefix); } final int nEntriesToWrite = getNEntriesToWrite(deltasOnly); final int maxEntriesToWrite = (!deltasOnly ? getMaxEntries() : bin.getDeltaCapacity(nEntriesToWrite)); /* if (deltasOnly) { BIN bin = (BIN)this; System.out.println( "Logging BIN-delta: " + getNodeId() + " is delta = " + isBINDelta() + " nEntries = " + nEntriesToWrite + " max entries = " + maxEntriesToWrite + " full BIN entries = " + bin.getFullBinNEntries() + " full BIN max entries = " + bin.getFullBinMaxEntries()); } */ LogUtils.writePackedInt(logBuffer, nEntriesToWrite); LogUtils.writePackedInt(logBuffer, level); LogUtils.writePackedInt(logBuffer, maxEntriesToWrite); /* true if compact representation. */ boolean compactLsnsRep = (entryLsnLongArray == null); LogUtils.writeBoolean(logBuffer, compactLsnsRep); if (compactLsnsRep) { LogUtils.writeInt(logBuffer, (int) baseFileNumber); } for (int i = 0; i < nEntries; i++) { /* See getNEntriesToWrite and canDeleteExtinctSlot. */ if (deltasOnly) { if (!isDirty(i)) { continue; } } else { if (bin != null && canDeleteExtinctSlot(i)) { continue; } } LogUtils.writeByteArray(logBuffer, entryKeys.get(i)); /* * A NULL_LSN may be stored when an incomplete insertion occurs, * but in that case the KnownDeleted flag must be set. See * Tree.insert. [#13126] */ assert checkForNullLSN(i) : "logging IN " + getNodeId() + " with null lsn child " + " db=" + databaseImpl.getName() + " isDeferredWriteMode=" + databaseImpl.isDeferredWriteMode() + " isTemporary=" + databaseImpl.isTemporary(); if (compactLsnsRep) { int offset = i << 2; int fileOffset = getFileOffset(offset); logBuffer.put(getFileNumberOffset(offset)); logBuffer.put((byte) (fileOffset & 0xff)); logBuffer.put((byte) ((fileOffset >>> 8) & 0xff)); logBuffer.put((byte) ((fileOffset >>> 16) & 0xff)); } else { LogUtils.writeLong(logBuffer, entryLsnLongArray[i]); } logBuffer.put( (byte) (entryStates[i] & EntryStates.CLEAR_TRANSIENT_BITS)); if (clearDirtyBits) { entryStates[i] &= EntryStates.CLEAR_DIRTY_BIT; } if (isLastLoggedSizeStored(i)) { LogUtils.writePackedInt(logBuffer, getLastLoggedSize(i)); } if (haveVLSNCache && isEmbeddedLN(i)) { LogUtils.writePackedLong(logBuffer, bin.getCachedVLSN(i)); } if (haveExpiration) { LogUtils.writePackedInt( logBuffer, bin.getExpirationOffset(i)); } } if (deltasOnly) { LogUtils.writePackedInt(logBuffer, bin.getFullBinNEntries()); LogUtils.writePackedInt(logBuffer, bin.getFullBinMaxEntries()); if (bloomFilter != null) { BINDeltaBloomFilter.writeToLog(bloomFilter, logBuffer); } } } /* * Used for assertion to prevent writing a null lsn to the log. */ private boolean checkForNullLSN(int index) { boolean ok; if (isBIN()) { ok = !(getLsn(index) == DbLsn.NULL_LSN && (entryStates[index] & EntryStates.KNOWN_DELETED_BIT) == 0); } else { ok = (getLsn(index) != DbLsn.NULL_LSN); } return ok; } /** * Returns whether the given serialized IN is a BIN that may have * expiration values. */ public boolean mayHaveExpirationValues( ByteBuffer itemBuffer, int entryVersion) { if (!isBIN() || entryVersion < 12) { return false; } itemBuffer.mark(); int expirationBase = LogUtils.readPackedInt(itemBuffer); itemBuffer.reset(); return (expirationBase != -1); } @Override public void readFromLog( ByteBuffer itemBuffer, int entryVersion) { materialize( itemBuffer, entryVersion, false /*deltasOnly*/, true /*clearDirtyBits*/); } public void readFromLog( ByteBuffer itemBuffer, int entryVersion, boolean deltasOnly) { materialize( itemBuffer, entryVersion, deltasOnly, !deltasOnly /*clearDirtyBits*/); } /** * WARNING: In the case of BINs this method is used not only for logging * but also for off-heap caching. Therefore, this method should not have * side effects unless the clearDirtyBits param is true or an older log * version is passed (off-heap caching uses the current version). */ public final void materialize( ByteBuffer itemBuffer, int entryVersion, boolean deltasOnly, boolean clearDirtyBits) { assert(!deltasOnly || isBIN()); BIN bin = (isBIN() ? (BIN)this : null); boolean unpacked = (entryVersion < 6); boolean haveExpiration = false; if (bin != null && entryVersion >= 12) { int base = LogUtils.readPackedInt(itemBuffer); haveExpiration = (base != -1); bin.setExpirationBase(base); } nodeId = LogUtils.readLong(itemBuffer, unpacked); identifierKey = LogUtils.readByteArray(itemBuffer, unpacked); byte booleans = itemBuffer.get(); setIsRootFlag((booleans & 1) != 0); if ((booleans & 2) != 0) { keyPrefix = LogUtils.readByteArray(itemBuffer, unpacked); } boolean mayHaveLastLoggedSize = ((booleans & 4) != 0); assert !(mayHaveLastLoggedSize && (entryVersion < 9)); boolean hasBloomFilter = ((booleans & 8) != 0); assert(!hasBloomFilter || (entryVersion >= 10 && deltasOnly)); boolean haveVLSNCache = ((booleans & 16) != 0); assert !(haveVLSNCache && (entryVersion < 11)); setExpirationInHours((booleans & 32) != 0); nEntries = LogUtils.readInt(itemBuffer, unpacked); level = LogUtils.readInt(itemBuffer, unpacked); int length = LogUtils.readInt(itemBuffer, unpacked); entryTargets = INTargetRep.NONE; entryKeys = new INKeyRep.Default(length); baseFileNumber = -1; long storedBaseFileNumber = -1; if (disableCompactLsns) { entryLsnByteArray = null; entryLsnLongArray = new long[length]; } else { entryLsnByteArray = new byte[length << 2]; entryLsnLongArray = null; } entryStates = new byte[length]; boolean compactLsnsRep = false; if (entryVersion > 1) { compactLsnsRep = LogUtils.readBoolean(itemBuffer); if (compactLsnsRep) { baseFileNumber = LogUtils.readInt(itemBuffer); storedBaseFileNumber = baseFileNumber; } } for (int i = 0; i < nEntries; i++) { entryKeys = entryKeys.set( i, LogUtils.readByteArray(itemBuffer, unpacked), this); long lsn; if (compactLsnsRep) { /* LSNs in compact form. */ byte fileNumberOffset = itemBuffer.get(); int fileOffset = (itemBuffer.get() & 0xff); fileOffset |= ((itemBuffer.get() & 0xff) << 8); fileOffset |= ((itemBuffer.get() & 0xff) << 16); if (fileOffset == THREE_BYTE_NEGATIVE_ONE) { lsn = DbLsn.NULL_LSN; } else { lsn = DbLsn.makeLsn (storedBaseFileNumber + fileNumberOffset, fileOffset); } } else { /* LSNs in long form. */ lsn = LogUtils.readLong(itemBuffer); // LSN } setLsnInternal(i, lsn); byte entryState = itemBuffer.get(); // state if (clearDirtyBits) { entryState &= EntryStates.CLEAR_DIRTY_BIT; } /* * The MIGRATE_BIT (now the transient OFFHEAP_DIRTY_BIT) was * accidentally written in a pre-JE 6 log version. */ if (entryVersion < 9) { entryState &= EntryStates.CLEAR_TRANSIENT_BITS; } /* * A NULL_LSN is the remnant of an incomplete insertion and the * KnownDeleted flag should be set. But because of bugs in prior * releases, the KnownDeleted flag may not be set. So set it here. * See Tree.insert. [#13126] */ if (entryVersion < 9 && lsn == DbLsn.NULL_LSN) { entryState |= EntryStates.KNOWN_DELETED_BIT; } entryStates[i] = entryState; if (mayHaveLastLoggedSize && !isEmbeddedLN(i)) { setLastLoggedSizeUnconditional( i, LogUtils.readPackedInt(itemBuffer)); } if (haveVLSNCache && isEmbeddedLN(i)) { bin.setCachedVLSNUnconditional( i, LogUtils.readPackedLong(itemBuffer)); } if (haveExpiration) { bin.setExpirationOffset(i, LogUtils.readPackedInt(itemBuffer)); } } if (deltasOnly) { setBINDelta(true); if (entryVersion >= 10) { bin.setFullBinNEntries(LogUtils.readPackedInt(itemBuffer)); bin.setFullBinMaxEntries(LogUtils.readPackedInt(itemBuffer)); if (hasBloomFilter) { bin.bloomFilter = BINDeltaBloomFilter.readFromLog( itemBuffer, entryVersion); } } } /* Dup conversion will be done by postFetchInit. */ needDupKeyConversion = (entryVersion < 8); } /** * @see Loggable#logicalEquals * Always return false, this item should never be compared. */ @Override public final boolean logicalEquals(Loggable other) { return false; } /** * @see Loggable#dumpLog */ @Override public final void dumpLog(StringBuilder sb, boolean verbose) { sb.append(beginTag()); sb.append(""); sb.append(Key.dumpString(identifierKey, "idKey", 0)); // isRoot sb.append(""); // level sb.append(""); if (keyPrefix != null) { sb.append(Key.dumpString(keyPrefix, "keyPrefix", 0)); } // nEntries, length of entries array sb.append(""); if (verbose) { for (int i = 0; i < nEntries; i++) { sb.append(""); sb.append(Key.dumpString(getKey(i), 0)); if (isEmbeddedLN(i)) { sb.append(Key.dumpString(getData(i), "data", 0)); } sb.append(DbLsn.toString(getLsn(i))); sb.append(""); } } sb.append(""); if (isBINDelta(false)) { if (bin.bloomFilter != null) { BINDeltaBloomFilter.dumpLog(bin.bloomFilter, sb, verbose); } } /* Add on any additional items from subclasses before the end tag. */ dumpLogAdditional(sb); sb.append(endTag()); } /** * Allows subclasses to add additional fields before the end tag. If they * just overload dumpLog, the xml isn't nested. */ protected void dumpLogAdditional(StringBuilder sb) { } public String beginTag() { return BEGIN_TAG; } public String endTag() { return END_TAG; } /** * For unit test support: * @return a string that dumps information about this IN, without */ @Override public String dumpString(int nSpaces, boolean dumpTags) { StringBuilder sb = new StringBuilder(); if (dumpTags) { sb.append(TreeUtils.indent(nSpaces)); sb.append(beginTag()); sb.append('\n'); } if (dumpTags) { sb.append(TreeUtils.indent(nSpaces)); sb.append(""); } else { sb.append(nodeId); } sb.append('\n'); BIN bin = null; if (isBIN()) { bin = (BIN) this; } sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); sb.append(identifierKey == null ? "" : Key.dumpString(identifierKey, 0)); sb.append(""); sb.append('\n'); sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); sb.append(keyPrefix == null ? "" : Key.dumpString(keyPrefix, 0)); sb.append("\n"); sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); sb.append('\n'); sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); sb.append('\n'); sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); sb.append('\n'); sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); sb.append(TreeUtils.indent(nSpaces + 2)); sb.append( ""); if (bin != null) { sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); } sb.append('\n'); sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); sb.append('\n'); for (int i = 0; i < nEntries; i++) { sb.append(TreeUtils.indent(nSpaces + 4)); sb.append("\n"); if (getLsn(i) == DbLsn.NULL_LSN) { sb.append(TreeUtils.indent(nSpaces + 6)); sb.append(""); } else { sb.append(DbLsn.dumpString(getLsn(i), nSpaces + 6)); } sb.append('\n'); if (entryKeys.get(i) == null) { sb.append(TreeUtils.indent(nSpaces + 6)); sb.append(""); } else { sb.append(Key.dumpString(entryKeys.get(i), (nSpaces + 6))); } sb.append('\n'); if (getOffHeapBINId(i) >= 0) { sb.append("\n"); } if (bin != null && bin.getOffHeapLNId(i) != 0) { sb.append("\n"); } if (entryTargets.get(i) == null) { sb.append(TreeUtils.indent(nSpaces + 6)); sb.append(""); } else { sb.append(entryTargets.get(i).dumpString(nSpaces + 6, true)); } sb.append('\n'); sb.append(TreeUtils.indent(nSpaces + 4)); sb.append(""); sb.append('\n'); } sb.append(TreeUtils.indent(nSpaces + 2)); sb.append(""); sb.append('\n'); if (dumpTags) { sb.append(TreeUtils.indent(nSpaces)); sb.append(endTag()); } return sb.toString(); } private void dumpSlotState(StringBuilder sb, int i, BIN bin) { sb.append(" kd=\"").append(isEntryKnownDeleted(i)); sb.append("\" pd=\"").append(isEntryPendingDeleted(i)); sb.append("\" dirty=\"").append(isDirty(i)); sb.append("\" embedded=\"").append(isEmbeddedLN(i)); sb.append("\" noData=\"").append(isNoDataLN(i)); if (bin != null) { sb.append("\" logSize=\""); sb.append(bin.getLastLoggedSizeUnconditional(i)); long vlsn = bin.getCachedVLSN(i); if (!VLSN.isNull(vlsn)) { sb.append("\" vlsn=\"").append(vlsn); } } if (bin != null && bin.getExpiration(i) != 0) { sb.append("\" expires=\""); sb.append(TTL.formatExpiration( bin.getExpiration(i), bin.isExpirationInHours())); } sb.append("\""); } /** * Converts to an identifying string that is safe to output in a log. * Keys are not included for security/privacy reasons. */ public String toSafeString(final int... slotIndexes) { final BIN bin = isBIN() ? (BIN) this : null; final StringBuilder sb = new StringBuilder(); sb.append("IN nodeId=").append(getNodeId()); sb.append(" lastLoggedLSN="); sb.append(DbLsn.getNoFormatString(getLastLoggedLsn())); sb.append(" lastFulLSN="); sb.append(DbLsn.getNoFormatString(getLastFullLsn())); sb.append(" level=").append(Integer.toHexString(getLevel())); sb.append(" flags=").append(Integer.toHexString(flags)); sb.append(" isBINDelta=").append(isBINDelta()); sb.append(" nSlots=").append(getNEntries()); if (slotIndexes != null) { for (final int i : slotIndexes) { sb.append(" slot-").append(i).append(":["); sb.append("lsn="); sb.append(DbLsn.getNoFormatString(getLsn(i))); sb.append(" offset="); sb.append(DbLsn.getFileOffset(getLsn(i))); if (bin != null) { sb.append(" offset+logSize="); sb.append(DbLsn.getFileOffset(getLsn(i)) + bin.getLastLoggedSizeUnconditional(i)); } dumpSlotState(sb, i, bin); sb.append("]"); } } return sb.toString(); } @Override public String toString() { return dumpString(0, true); } public String shortClassName() { return "IN"; } /** * Send trace messages to the java.util.logger. Don't rely on the logger * alone to conditionalize whether we send this message, we don't even want * to construct the message if the level is not enabled. */ private void traceSplit(Level level, IN parent, IN newSibling, long parentLsn, long myNewLsn, long newSiblingLsn, int splitIndex, int idKeyIndex, int childIndex) { Logger logger = getEnv().getLogger(); if (logger.isLoggable(level)) { StringBuilder sb = new StringBuilder(); sb.append(TRACE_SPLIT); sb.append(" parent="); sb.append(parent.getNodeId()); sb.append(" child="); sb.append(getNodeId()); sb.append(" newSibling="); sb.append(newSibling.getNodeId()); sb.append(" parentLsn = "); sb.append(DbLsn.getNoFormatString(parentLsn)); sb.append(" childLsn = "); sb.append(DbLsn.getNoFormatString(myNewLsn)); sb.append(" newSiblingLsn = "); sb.append(DbLsn.getNoFormatString(newSiblingLsn)); sb.append(" splitIdx="); sb.append(splitIndex); sb.append(" idKeyIdx="); sb.append(idKeyIndex); sb.append(" childIdx="); sb.append(childIndex); LoggerUtils.logMsg(logger, databaseImpl.getEnv(), level, sb.toString()); } } /** * Send trace messages to the java.util.logger. Don't rely on the logger * alone to conditionalize whether we send this message, we don't even want * to construct the message if the level is not enabled. */ private void traceDelete(Level level, int index) { Logger logger = databaseImpl.getEnv().getLogger(); if (logger.isLoggable(level)) { StringBuilder sb = new StringBuilder(); sb.append(TRACE_DELETE); sb.append(" in=").append(getNodeId()); sb.append(" index="); sb.append(index); LoggerUtils.logMsg(logger, databaseImpl.getEnv(), level, sb.toString()); } } public final void setFetchINHook(TestHook hook) { fetchINHook = hook; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy