com.sleepycat.je.tree.IN Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of starrocks-bdb-je Show documentation
starrocks managed bdb je
The newest version!
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.tree;

import static com.sleepycat.je.EnvironmentFailureException.unexpectedState;
import static com.sleepycat.je.ExtinctionFilter.ExtinctionStatus.EXTINCT;
import static com.sleepycat.je.ExtinctionFilter.ExtinctionStatus.NOT_EXTINCT;

import java.io.FileNotFoundException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Comparator;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.sleepycat.je.CacheMode;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.cleaner.PackedObsoleteInfo;
import com.sleepycat.je.dbi.DatabaseId;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.DbTree;
import com.sleepycat.je.dbi.EnvironmentFailureReason;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.INList;
import com.sleepycat.je.dbi.MemoryBudget;
import com.sleepycat.je.dbi.TTL;
import com.sleepycat.je.evictor.Evictor;
import com.sleepycat.je.evictor.OffHeapCache;
import com.sleepycat.je.latch.LatchContext;
import com.sleepycat.je.latch.LatchFactory;
import com.sleepycat.je.latch.LatchSupport;
import com.sleepycat.je.latch.LatchTable;
import com.sleepycat.je.latch.SharedLatch;
import com.sleepycat.je.log.ErasedException;
import com.sleepycat.je.log.LogEntryType;
import com.sleepycat.je.log.LogItem;
import com.sleepycat.je.log.LogParams;
import com.sleepycat.je.log.LogUtils;
import com.sleepycat.je.log.Loggable;
import com.sleepycat.je.log.Provisional;
import com.sleepycat.je.log.ReplicationContext;
import com.sleepycat.je.log.WholeEntry;
import com.sleepycat.je.log.entry.BINDeltaLogEntry;
import com.sleepycat.je.log.entry.INLogEntry;
import com.sleepycat.je.log.entry.LNLogEntry;
import com.sleepycat.je.log.entry.LogEntry;
import com.sleepycat.je.tree.dupConvert.DBIN;
import com.sleepycat.je.tree.dupConvert.DIN;
import com.sleepycat.je.tree.dupConvert.DupConvert;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.SizeofMarker;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;
import com.sleepycat.je.utilint.VLSN;

/**
 * An IN represents an Internal Node in the JE tree.
 *
 * Explanation of KD (KnownDeleted) and PD (PendingDelete) entry flags
 * ===================================================================
 *
 * PD: set for all LN entries that are deleted, even before the LN is
 * committed.  Is used as an authoritative (transactionally correct) indication
 * that an LN is deleted. PD will be cleared if the txn for the deleted LN is
 * aborted.
 *
 * KD: set under special conditions for entries containing LNs which are known
 * to be obsolete.  Not used for entries in an active/uncommitted transaction.
 *
 * First notice that IN.fetchLN will allow a FileNotFoundException when the
 * PD or KD flag is set on the entry.  And it will allow a NULL_LSN when the KD
 * flag is set.
 *
 * KD was implemented first, and was originally used when the cleaner attempts
 * to migrate an LN and discovers it is deleted (see Cleaner.migrateLN). We
 * need KD because the INCompressor may not have run, and may not have
 * compressed the BIN. There's the danger that we'll try to fetch that entry,
 * and that the file was deleted by the cleaner.
 *
 * KD was used more recently when an unexpected exception occurs while logging
 * an LN, after inserting the entry.  Rather than delete the entry to clean up,
 * we mark the entry KD so it won't cause a fetch error later.  In this case
 * the entry LSN is NULL_LSN. See Tree.insertNewSlot.
 *
 * PD is closely related to the first use of KD above (for cleaned deleted LNs)
 * and came about because of a cleaner optimization we make. The cleaner
 * considers all deleted LN log entries to be obsolete, without doing a tree
 * lookup, and without any record of an obsolete offset.  This makes the cost
 * of cleaning of deleted LNs very low.  For example, if the log looks like
 * this:
 *
 * 100  LNA
 * 200  delete of LNA
 *
 * then LSN 200 will be considered obsolete when this file is processed by the
 * cleaner. After all, only two things can happen: (1) the txn commits, and we
 * don't need LSN 200, because we can wipe this LN out of the tree, or (2) the
 * txn aborts, and we don't need LSN 200, because we are going to revert to LSN
 * 100/LNA.
 *
 * We set PD for the entry of a deleted LN at the time of the operation, and we
 * clear PD if the transaction aborts.  There is no real danger that this log
 * entry will be processed by the cleaner before it's committed, because
 * cleaning can only happen after the first active LSN.
 *
 * Just as in the first use of KD above, setting PD is necessary to avoid a
 * fetch error, when the file is deleted by the cleaner but the entry
 * containing the deleted LN has not been deleted by the INCompressor.
 *
 * PD is also set in replication rollback, when LNs are marked as
 * invisible.
 *
 * When LSN locking was implemented (see CursorImpl.lockLN), the PD flag took
 * on additional meaning.  PD is used to determine whether an LN is deleted
 * without fetching it, and therefore is relied on to be transactionally
 * correct.
 *
 * In addition to the setting and use of the KD/PD flags described above, the
 * situation is complicated by the fact that we must restore the state of these
 * flags during abort, recovery, and set them properly during slot reuse.
 *
 * We have been meaning to consider whether PD and KD can be consolidated into
 * one flag: simply the Deleted flag.  The Deleted flag would be set in the
 * same way as PD is currently set, as well as the second use of KD described
 * above (when the LSN is NULL_LSN after an insertion error).  The use of KD
 * and PD for invisible entries and recovery rollback should also be
 * considered.
 *
 * If we consolidate the two flags and set the Deleted flag during a delete
 * operation (like PD), we'll have to remove optimizations (in CursorImpl for
 * example) that consider a slot deleted when KD is set.  Since KD is rarely
 * set currently, this shouldn't have a noticeable performance impact.
 */
public class IN extends Node implements Comparable, LatchContext {

    private static final String BEGIN_TAG = "";
    private static final String END_TAG = "";
    private static final String TRACE_SPLIT = "Split:";
    private static final String TRACE_DELETE = "Delete:";

    private static final int BYTES_PER_LSN_ENTRY = 4;
    public static final int MAX_FILE_OFFSET = 0xfffffe;
    private static final int THREE_BYTE_NEGATIVE_ONE = 0xffffff;

    /**
     * Used as the "empty rep" for the INLongRep offHeapBINIds field.
     *
     * minLength is 3 because BIN IDs are LRU list indexes. Initially 100k
     * indexes are allocated and the largest values are used first.
     *
     * allowSparseRep is true because some workloads will only load BIN IDs for
     * a subset of the BINs in the IN.
     */
    private static final INLongRep.EmptyRep EMPTY_OFFHEAP_BIN_IDS =
        new INLongRep.EmptyRep(3, true);

    /*
     * Levels:
     * The mapping tree has levels in the 0x20000 -> 0x2ffff number space.
     * The main tree has levels in the 0x10000 -> 0x1ffff number space.
     * The duplicate tree levels are in 0-> 0xffff number space.
     */
    public static final int DBMAP_LEVEL = 0x20000;
    public static final int MAIN_LEVEL = 0x10000;
    public static final int LEVEL_MASK = 0x0ffff;
    public static final int MIN_LEVEL = -1;
    public static final int BIN_LEVEL = MAIN_LEVEL | 1;

    /* Used to indicate that an exact match was found in findEntry. */
    public static final int EXACT_MATCH = (1 << 16);

    /* Used to indicate that an insert was successful. */
    public static final int INSERT_SUCCESS = (1 << 17);

    /*
     * A bit flag set in the return value of partialEviction() to indicate
     * whether the IN is evictable or not.
     */
    public static final long NON_EVICTABLE_IN = (1L << 62);

    /*
     * Boolean properties of an IN, encoded as bits inside the flags
     * data member.
     */
    private static final int IN_DIRTY_BIT = 0x1;
    private static final int IN_RECALC_TOGGLE_BIT = 0x2;
    private static final int IN_IS_ROOT_BIT = 0x4;
    private static final int IN_HAS_CACHED_CHILDREN_BIT = 0x8;
    private static final int IN_PRI2_LRU_BIT = 0x10;
    private static final int IN_DELTA_BIT = 0x20;
    private static final int IN_FETCHED_COLD_BIT = 0x40;
    private static final int IN_FETCHED_COLD_OFFHEAP_BIT = 0x80;
    private static final int IN_RESIDENT_BIT = 0x100;
    private static final int IN_PROHIBIT_NEXT_DELTA_BIT = 0x200;
    private static final int IN_EXPIRATION_IN_HOURS = 0x400;

    /* Tracing for LRU-related ops */
    private static final boolean traceLRU = false;
    private static final boolean traceDeltas = false;
    private static final Level traceLevel = Level.INFO;

    DatabaseImpl databaseImpl;

    private int level;

    /* The unique id of this node. */
    long nodeId;

    /* Some bits are persistent and some are not, see serialize. */
    int flags;

    /*
     * The identifier key is a key that can be used used to search for this IN.
     * Initially it is the key of the zeroth slot, but insertions prior to slot
     * zero make this no longer true.  It is always equal to some key in the
     * IN, and therefore it is changed by BIN.compress when removing slots.
     */
    private byte[] identifierKey;

    int nEntries;

    byte[] entryStates;

    /*
     * entryKeys contains the keys in their entirety if key prefixing is not
     * being used. If prefixing is enabled, then keyPrefix contains the prefix
     * and entryKeys contains the suffixes. Records with small enough data
     * (smaller than the value je.tree.maxEmbeddedLN param) are stored in
     * their entirity (both key (or key suffix) and data) inside BINs. This is
     * done by combining the record key and data as a two-part key (see the
     * dbi/DupKeyData class) and storing the resulting array in entryKeys.
     * A special case is when the record to be embedded has no data. Then,
     * the two-part key format is not used, but instead the NO_DATA_LN_BIT
     * is turned on in the slot's state. This saves the space overhead of
     * using the two-part key format.
     */
    INKeyRep entryKeys;
    byte[] keyPrefix;

    /*
     * The following entryLsnXXX fields are used for storing LSNs.  There are
     * two possible representations: a byte array based rep, and a long array
     * based one.  For compactness, the byte array rep is used initially.  A
     * single byte[] that uses four bytes per LSN is used. The baseFileNumber
     * field contains the lowest file number of any LSN in the array.  Then for
     * each entry (four bytes each), the first byte contains the offset from
     * the baseFileNumber of that LSN's file number.  The remaining three bytes
     * contain the file offset portion of the LSN.  Three bytes will hold a
     * maximum offset of 16,777,214 (0xfffffe), so with the default JE log file
     * size of 10,000,000 bytes this works well.
     *
     * If either (1) the difference in file numbers exceeds 127
     * (Byte.MAX_VALUE) or (2) the file offset is greater than 16,777,214, then
     * the byte[] based rep mutates to a long[] based rep.
     *
     * In the byte[] rep, DbLsn.NULL_LSN is represented by setting the file
     * offset bytes for a given entry to -1 (0xffffff).
     *
     * Note: A compact representation will be changed to the non-compact one,
     * if needed, but in the current implementation, the reverse mutation
     * (from long to compact) never takes place.
     */
    long baseFileNumber;
    byte[] entryLsnByteArray;
    long[] entryLsnLongArray;
    public static boolean disableCompactLsns; // DbCacheSize only

    /*
     * The children of this IN. Only the ones that are actually in the cache
     * have non-null entries. Specialized sparse array represents are used to
     * represent the entries. The representation can mutate as modifications
     * are made to it.
     */
    INTargetRep entryTargets;

    /*
     * In a level 2 IN, the LRU IDs of the child BINs.
     */
    private INLongRep offHeapBINIds = EMPTY_OFFHEAP_BIN_IDS;

    long inMemorySize;

    /*
     * accumluted memory budget delta.  Once this exceeds
     * MemoryBudget.ACCUMULATED_LIMIT we inform the MemoryBudget that a change
     * has occurred.  See SR 12273.
     */
    private int accumulatedDelta = 0;

    /*
     * Max allowable accumulation of memory budget changes before MemoryBudget
     * should be updated. This allows for consolidating multiple calls to
     * updateXXXMemoryBudget() into one call.  Not declared final so that the
     * unit tests can modify it.  See SR 12273.
     */
    public static final int ACCUMULATED_LIMIT_DEFAULT = 1000;
    public static int ACCUMULATED_LIMIT = ACCUMULATED_LIMIT_DEFAULT;

    /**
     * References to the next and previous nodes in an LRU list. If the node
     * is not in any LRUList, both of these will be null. If the node is at
     * the front/back of an LRUList, prevLRUNode/nextLRUNode will point to
     * the node itself.
     */
    private IN nextLRUNode = null;
    private IN prevLRUNode = null;

    /*
     * Let L be the most recently written logrec for this IN instance.
     * (a) If this is a UIN, lastFullVersion is the lsn of L.
     * (b) If this is a BIN instance and L is a full-version logrec,
     *     lastFullVersion is the lsn of L.
     * (c) If this is a BIN instance and L is a delta logrec, lastFullVersion
     *     is the lsn of the most recently written full-version logrec for the
     *     same BIN.
     *
     * It is set in 2 cases:
     *
     * (a) after "this" is created via reading a logrec L, lastFullVersion is
     * set to L's lsn, if L is a UIN or a full BIN. (this is done in
     * IN.postFetch/RecoveryInit(), via IN.setLastLoggedLsn()). If L is a BIN
     * delta, lastFullVersion is set by BINDeltaLogEntry.readEntry() to
     * L.prevFullLsn.
     *
     * (b) After logging a UIN or a full-BIN logrec, it is set to the LSN of
     * the logrec written. This is done in IN.afterLog().
     *
     * Notice that this is a persistent field, but except from case (c), when
     * reading a logrec L, it is set not to the value found in L, but to the
     * lsn of L. This is why its read/write is managed by the INLogEntry class
     * rather than the IN readFromLog/writeFromLog methods.
     */
    long lastFullVersion = DbLsn.NULL_LSN;

    /*
     * BINs have a lastDeltaVersion data field as well, which is defined as
     * follows:
     *
     * Let L be the most recently written logrec for this BIN instance. If
     * L is a full-version logrec, lastDeltaVersion is NULL; otherwise it
     * is the lsn of L.
     *
     * It is used for obsolete tracking.
     *
     * It is set in 2 cases:
     *
     * (a) after "this" is created via reading a logrec L, lastDeltaVersion
     * is set to L's lsn, if L is a BIN-delta logrec, or to NULL if L is a
     * full-BIN logrec (this is done in IN.postFetch/RecoveryInit(), via
     * BIN.setLastLoggedLsn()).
     *
     * (b) After we write a logrec L for this BIN instance, lastDeltaVersion
     * is set to NULL if L is a full-BIN logrec, or to L's lsn, if L is a
     * BIN-delta logrec (this is done in BIN.afterLog()).
     *
     * Notice that this is a persistent field, but when reading a logrec L,
     * it is set not to the value found in L, but to the lsn of L. This is why
     * its read/write is managed by the INLogEntry class rather than the IN
     * readFromLog/writeFromLog methods.
     *
     * private long lastDeltaVersion = DbLsn.NULL_LSN;
     */


    /*
     * A sequence of obsolete info that cannot be counted as obsolete until an
     * ancestor IN is logged non-provisionally.
     */
    private PackedObsoleteInfo provisionalObsolete;

    /* See convertDupKeys. */
    private boolean needDupKeyConversion;

    private int pinCount = 0;

    private SharedLatch latch;

    private IN parent;

    private TestHook fetchINHook;

    /**
     * Create an empty IN, with no node ID, to be filled in from the log.
     */
    public IN() {
        init(null, Key.EMPTY_KEY, 0, 0);
    }

    /**
     * Create a new IN.
     */
    public IN(
        DatabaseImpl dbImpl,
        byte[] identifierKey,
        int capacity,
        int level) {

        nodeId = dbImpl.getEnv().getNodeSequence().getNextLocalNodeId();

        init(dbImpl, identifierKey, capacity,
            generateLevel(dbImpl.getId(), level));

        initMemorySize();
    }

    /**
     * For Sizeof.
     */
    public IN(@SuppressWarnings("unused") SizeofMarker marker) {

        /*
         * Set all variable fields to null, since they are not part of the
         * fixed overhead.
         */
        entryTargets = null;
        entryKeys = null;
        keyPrefix = null;
        entryLsnByteArray = null;
        entryLsnLongArray = null;
        entryStates = null;

        latch = LatchFactory.createSharedLatch(
            LatchSupport.DUMMY_LATCH_CONTEXT, isAlwaysLatchedExclusively());

        /*
         * Use the latch to force it to grow to "runtime size".
         */
        latch.acquireExclusive();
        latch.release();
        latch.acquireExclusive();
        latch.release();
    }

    /**
     * Create a new IN.  Need this because we can't call newInstance() without
     * getting a 0 for nodeId.
     */
    IN createNewInstance(
        byte[] identifierKey,
        int maxEntries,
        int level) {
        return new IN(databaseImpl, identifierKey, maxEntries, level);
    }

    /**
     * Initialize IN object.
     */
    protected void init(
        DatabaseImpl db,
        @SuppressWarnings("hiding")
        byte[] identifierKey,
        int initialCapacity,
        @SuppressWarnings("hiding")
        int level) {

        setDatabase(db);
        latch = LatchFactory.createSharedLatch(
            this, isAlwaysLatchedExclusively());
        flags = 0;
        nEntries = 0;
        this.identifierKey = identifierKey;
        entryTargets = INTargetRep.NONE;
        entryKeys = new INKeyRep.Default(initialCapacity);
        keyPrefix = null;
        baseFileNumber = -1;

        /*
         * Normally we start out with the compact LSN rep and then mutate to
         * the long rep when needed.  But for some purposes (DbCacheSize) we
         * start out with the long rep and never use the compact rep.
         */
        if (disableCompactLsns) {
            entryLsnByteArray = null;
            entryLsnLongArray = new long[initialCapacity];
        } else {
            entryLsnByteArray = new byte[initialCapacity << 2];
            entryLsnLongArray = null;
        }

        entryStates = new byte[initialCapacity];
        this.level = level;
    }

    @Override
    public final boolean isIN() {
        return true;
    }

    @Override
    public final boolean isUpperIN() {
        return !isBIN();
    }

    @Override
    public final String getLatchName() {
        return shortClassName() + getNodeId();
    }

    @Override
    public final int getLatchTimeoutMs() {
        return databaseImpl.getEnv().getLatchTimeoutMs();
    }

    @Override
    public final LatchTable getLatchTable() {
        return LatchSupport.btreeLatchTable;
    }

    /*
     * Return whether the shared latch for this kind of node should be of the
     * "always exclusive" variety.  Presently, only IN's are actually latched
     * shared.  BINs are latched exclusive only.
     */
    boolean isAlwaysLatchedExclusively() {
        return false;
    }

    /**
     * Latch this node if it is not latched by another thread. Update the LRU
     * using the given cacheMode if the latch succeeds.
     */
    public final boolean latchNoWait(CacheMode cacheMode) {
        if (!latch.acquireExclusiveNoWait()) {
            return false;
        }
        updateLRU(cacheMode);
        return true;
    }

    /**
     * Latch this node exclusive and update the LRU using the given cacheMode.
     */
    public void latch(CacheMode cacheMode) {
        latch.acquireExclusive();
        updateLRU(cacheMode);
    }

    /**
     * Latch this node exclusive and update the LRU using the default cacheMode.
     */
    public final void latch() {
        latch(CacheMode.DEFAULT);
    }

    /**
     * Latch this node shared and update the LRU using the given cacheMode.
     */
    @Override
    public void latchShared(CacheMode cacheMode) {
        latch.acquireShared();
        updateLRU(cacheMode);
    }

    /**
     * Latch this node shared and update the LRU using the default cacheMode.
     */
    @Override
    public final void latchShared() {
        latchShared(CacheMode.DEFAULT);
    }

    /**
     * Latch this node exclusive and do not update the LRU or cause other
     * related side effects.
     *
     * @param db is passed in order to initialize the database for an
     * uninitialized node, which is necessary in order to latch it.
     */
    public final void latchNoUpdateLRU(DatabaseImpl db) {
        if (databaseImpl == null) {
            databaseImpl = db;
        }
        latch.acquireExclusive();
    }

    /**
     * Latch this node exclusive and do not update the LRU or cause other
     * related side effects.
     */
    public final void latchNoUpdateLRU() {
        assert databaseImpl != null;
        latch.acquireExclusive();
    }

    /**
     * Release the latch on this node.
     */
    @Override
    public final void releaseLatch() {
        latch.release();
    }

    /**
     * Release the latch on this node if it is owned.
     */
    public final void releaseLatchIfOwner() {
        latch.releaseIfOwner();
    }

    /**
     * @return true if this thread holds the IN's latch
     */
    public final boolean isLatchOwner() {
        return latch.isOwner();
    }

    public final boolean isLatchExclusiveOwner() {
        return latch.isExclusiveOwner();
    }

    /* For unit testing. */
    public final int getLatchNWaiters() {
        return latch.getNWaiters();
    }

    public final void updateLRU(CacheMode cacheMode) {

        if (!getInListResident()) {
            return;
        }

        switch (cacheMode) {
        case UNCHANGED:
        case MAKE_COLD:
            break;
        case DEFAULT:
        case EVICT_LN:
        case EVICT_BIN:
        case KEEP_HOT:
            setFetchedCold(false);
            setFetchedColdOffHeap(false);

            if (isBIN() || !hasCachedChildrenFlag()) {
                assert(isBIN() || !hasCachedChildren());
                getEvictor().moveBack(this);
            }
            break;
        default:
            assert false;
        }
    }

    /**
     * This method should be used carefully. Unless this node and the parent
     * are already known to be latched, call latchParent instead to access the
     * parent safely.
     */
    public IN getParent() {
        return parent;
    }

    public void setParent(IN in) {
        assert in != null;

        /*
         * Must hold EX-latch when changing a non-null parent. But when setting
         * the parent initially (it is currently null), we assume it is being
         * attached and no other threads have access to it.
         */
        if (parent != null && !isLatchExclusiveOwner()) {
            throw unexpectedState();
        }

        parent = in;
    }

    /**
     * Latches the parent exclusively, leaving this node latched. The parent
     * must not already be latched.
     *
     * This node must be latched on entry and will be latched on exit. This
     * node's latch may be released temporarily, in which case it will be
     * ex-latched (since the parent is ex-latched, this isn't a drawback).
     *
     * Does not perform cache mode processing, since this node is already
     * latched.
     *
     * @return the ex-latched parent, for which calling getKnownChildIndex with
     * this node is guaranteed to succeed.
     *
     * @throws EnvironmentFailureException (fatal) if the parent latch is
     * already held.
     */
    public final IN latchParent() {

        assert latch.isOwner();
        assert !isRoot();
        assert getParent() != null;

        while (true) {
            final IN p = getParent();

            if (p.latch.acquireExclusiveNoWait()) {
                return p;
            }

            pin();
            try {
                latch.release();
                p.latch.acquireExclusive();
                latch.acquireExclusive();
            } finally {
                unpin();
            }

            if (getParent() == p) {
                return p;
            }

            p.latch.release();
        }
    }

    /**
     * Returns the index of the given child. Should only be called when the
     * caller knows that the given child is resident.
     */
    public int getKnownChildIndex(final Node child) {

        for (int i = 0; i < nEntries; i += 1) {
            if (getTarget(i) == child) {
                return i;
            }
        }

        throw unexpectedState();
    }

    public final synchronized void pin() {
        assert(isLatchOwner());
        assert(pinCount >= 0);
        ++pinCount;
    }

    public final synchronized void unpin() {
        assert(pinCount > 0);
        --pinCount;
    }

    public final synchronized boolean isPinned() {
        assert(isLatchExclusiveOwner());
        assert(pinCount >= 0);
        return pinCount > 0;
    }

    /**
     * Get the database for this IN.
     */
    public final DatabaseImpl getDatabase() {
        return databaseImpl;
    }

    /**
     * Set the database reference for this node.
     */
    public final void setDatabase(DatabaseImpl db) {
        databaseImpl = db;
    }

    /*
     * Get the database id for this node.
     */
    public final DatabaseId getDatabaseId() {
        return databaseImpl.getId();
    }

    @Override
    public final EnvironmentImpl getEnvImplForFatalException() {
        return databaseImpl.getEnv();
    }

    public final EnvironmentImpl getEnv() {
        return databaseImpl.getEnv();
    }

    final Evictor getEvictor() {
        return databaseImpl.getEnv().getEvictor();
    }

    final OffHeapCache getOffHeapCache() {
        return databaseImpl.getEnv().getOffHeapCache();
    }

    /**
     * Convenience method to return the database key comparator.
     */
    public final Comparator getKeyComparator() {
        return databaseImpl.getKeyComparator();
    }

    @Override
    public final int getLevel() {
        return level;
    }

    public final int getNormalizedLevel() {
        return level & LEVEL_MASK;
    }

    private static int generateLevel(DatabaseId dbId, int newLevel) {
        if (dbId.equals(DbTree.ID_DB_ID)) {
            return newLevel | DBMAP_LEVEL;
        } else {
            return newLevel | MAIN_LEVEL;
        }
    }

    public final long getNodeId() {
        return nodeId;
    }

    /* For unit tests only. */
    final void setNodeId(long nid) {
        nodeId = nid;
    }

    /**
     * We would like as even a hash distribution as possible so that the
     * Evictor's LRU is as accurate as possible.  ConcurrentHashMap takes the
     * value returned by this method and runs its own hash algorithm on it.
     * So a bit complement of the node ID is sufficient as the return value and
     * is a little better than returning just the node ID.  If we use a
     * different container in the future that does not re-hash the return
     * value, we should probably implement the Wang-Jenkins hash function here.
     */
    @Override
    public final int hashCode() {
        return (int) ~getNodeId();
    }

    @Override
    public final boolean equals(Object obj) {
        if (!(obj instanceof IN)) {
            return false;
        }
        IN in = (IN) obj;
        return (this.getNodeId() == in.getNodeId());
    }

    /**
     * Sort based on equality key.
     */
    public final int compareTo(IN argIN) {
        long argNodeId = argIN.getNodeId();
        long myNodeId = getNodeId();

        if (myNodeId < argNodeId) {
            return -1;
        } else if (myNodeId > argNodeId) {
            return 1;
        } else {
            return 0;
        }
    }

    public final boolean getDirty() {
        return (flags & IN_DIRTY_BIT) != 0;
    }

    public final void setDirty(boolean dirty) {
        if (dirty) {
            flags |= IN_DIRTY_BIT;
        } else {
            flags &= ~IN_DIRTY_BIT;
        }
    }

    @Override
    public final boolean isBINDelta() {
        assert(isUpperIN() || isLatchOwner());
        return (flags & IN_DELTA_BIT) != 0;
    }

    /*
     * This version of isBINDelta() takes a checkLatched param to allow
     * for cases where it is ok to call the method without holding the
     * BIN latch (e.g. in single-threaded tests, or when the BIN is not
     * attached to the tree (and thus inaccessible from other threads)).
     */
    @Override
    public final boolean isBINDelta(boolean checkLatched) {
        assert(!checkLatched || isUpperIN() || isLatchOwner());
        return (flags & IN_DELTA_BIT) != 0;
    }

    final void setBINDelta(boolean delta) {
        if (delta) {
            flags |= IN_DELTA_BIT;
        } else {
            flags &= ~IN_DELTA_BIT;
        }
    }

    /**
     * Indicates that the BIN was fetched from disk, or loaded from the
     * off-heap cache, using CacheMode.UNCHANGED, and has not been accessed
     * with another CacheMode. BINs in this state should be evicted from main
     * cache as soon as they are no longer referenced by a cursor. If they were
     * loaded from off-heap cache, they should be stored off-heap when they are
     * evicted from main. The FetchedColdOffHeap flag indicates whether the
     * BIN was loaded from off-heap cache.
     */
    public final boolean getFetchedCold() {
        return (flags & IN_FETCHED_COLD_BIT) != 0;
    }

    /** @see #getFetchedCold() */
    public final void setFetchedCold(boolean val) {
        if (val) {
            flags |= IN_FETCHED_COLD_BIT;
        } else {
            flags &= ~IN_FETCHED_COLD_BIT;
        }
    }

    /** @see #getFetchedCold() */
    public final boolean getFetchedColdOffHeap() {
        return (flags & IN_FETCHED_COLD_OFFHEAP_BIT) != 0;
    }

    /** @see #getFetchedCold() */
    public final void setFetchedColdOffHeap(boolean val) {
        if (val) {
            flags |= IN_FETCHED_COLD_OFFHEAP_BIT;
        } else {
            flags &= ~IN_FETCHED_COLD_OFFHEAP_BIT;
        }
    }

    public final boolean getRecalcToggle() {
        return (flags & IN_RECALC_TOGGLE_BIT) != 0;
    }

    public final void setRecalcToggle(boolean toggle) {
        if (toggle) {
            flags |= IN_RECALC_TOGGLE_BIT;
        } else {
            flags &= ~IN_RECALC_TOGGLE_BIT;
        }
    }

    public final boolean isRoot() {
        return (flags & IN_IS_ROOT_BIT) != 0;
    }

    final void setIsRoot(boolean isRoot) {
        setIsRootFlag(isRoot);
        setDirty(true);
    }

    private void setIsRootFlag(boolean isRoot) {
        if (isRoot) {
            flags |= IN_IS_ROOT_BIT;
        } else {
            flags &= ~IN_IS_ROOT_BIT;
        }
    }

    public final boolean hasCachedChildrenFlag() {
        return (flags & IN_HAS_CACHED_CHILDREN_BIT) != 0;
    }

    private void setHasCachedChildrenFlag(boolean value) {
        if (value) {
            flags |= IN_HAS_CACHED_CHILDREN_BIT;
        } else {
            flags &= ~IN_HAS_CACHED_CHILDREN_BIT;
        }
    }

    public final boolean isInPri2LRU() {
        return (flags & IN_PRI2_LRU_BIT) != 0;
    }

    /* public for unit tests */
    public final void setInPri2LRU(boolean value) {
        if (value) {
            flags |= IN_PRI2_LRU_BIT;
        } else {
            flags &= ~IN_PRI2_LRU_BIT;
        }
    }

    public boolean isExpirationInHours() {
        return (flags & IN_EXPIRATION_IN_HOURS) != 0;
    }

    void setExpirationInHours(boolean value) {
        if (value) {
            flags |= IN_EXPIRATION_IN_HOURS;
        } else {
            flags &= ~IN_EXPIRATION_IN_HOURS;
        }
    }

    /**
     * @return the identifier key for this node.
     */
    public final byte[] getIdentifierKey() {
        return identifierKey;
    }

    /**
     * Set the identifier key for this node.
     *
     * @param key - the new identifier key for this node.
     *
     * @param makeDirty should normally be true, but may be false when an
     * expired slot containing the identifier key has been deleted.
     */
    public final void setIdentifierKey(byte[] key, boolean makeDirty) {

        assert(!isBINDelta());

        /*
         * The identifierKey is "intentionally" not kept track of in the
         * memory budget.  If we did, then it would look like this:

         int oldIDKeySz = (identifierKey == null) ?
                           0 :
                           MemoryBudget.byteArraySize(identifierKey.length);

         int newIDKeySz = (key == null) ?
                           0 :
                           MemoryBudget.byteArraySize(key.length);
         updateMemorySize(newIDKeySz - oldIDKeySz);

         */
        identifierKey = key;

        if (makeDirty) {
            setDirty(true);
        }
    }

    /**
     * @return the number of entries in this node.
     */
    public final int getNEntries() {
        return nEntries;
    }

    /**
     * @return the maximum number of entries in this node.
     *
     * Overriden by TestIN in INEntryTestBase.java
     */
    public int getMaxEntries() {
        return entryStates.length;
    }

    public final byte getState(int idx) {
        return entryStates[idx];
    }

    /**
     * @return true if the object is dirty.
     */
    public final boolean isDirty(int idx) {
        return ((entryStates[idx] & EntryStates.DIRTY_BIT) != 0);
    }

    /**
     * @return true if the idx'th entry has been deleted, although the
     * transaction that performed the deletion may not be committed.
     */
    public final boolean isEntryPendingDeleted(int idx) {
        return ((entryStates[idx] & EntryStates.PENDING_DELETED_BIT) != 0);
    }

    /**
     * Set pendingDeleted to true.
     */
    public final void setPendingDeleted(int idx) {

        entryStates[idx] |= EntryStates.PENDING_DELETED_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Set pendingDeleted to false.
     */
    final void clearPendingDeleted(int idx) {

        entryStates[idx] &= EntryStates.CLEAR_PENDING_DELETED_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * @return true if the idx'th entry is deleted for sure.  If a transaction
     * performed the deletion, it has been committed.
     */
    public final boolean isEntryKnownDeleted(int idx) {
        return ((entryStates[idx] & EntryStates.KNOWN_DELETED_BIT) != 0);
    }

    /**
     * Set KD flag to true and clear the PD flag (PD does not need to be on
     * if KD is on).
     */
    public final void setKnownDeleted(int idx) {

        assert(isBIN());

        entryStates[idx] |= EntryStates.KNOWN_DELETED_BIT;
        entryStates[idx] &= EntryStates.CLEAR_PENDING_DELETED_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Set knownDeleted flag to true and evict the child LN if cached. The
     * child LN is evicted to save memory, since it will never be fetched
     * again.
     */
    public final void setKnownDeletedAndEvictLN(int index) {

        assert(isBIN());

        setKnownDeleted(index);

        LN oldLN = (LN) getTarget(index);
        if (oldLN != null) {
            updateMemorySize(oldLN, null /* newNode */);
        }
        setTarget(index, null);
    }

    /**
     * Set knownDeleted to false.
     */
    final void clearKnownDeleted(int idx) {

        assert(isBIN());

        entryStates[idx] &= EntryStates.CLEAR_KNOWN_DELETED_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /*
     * In the future we may want to move the following static methods to an
     * EntryState utility class and share all state bit twidling among IN,
     * ChildReference, and DeltaInfo.
     */

    /**
     * Returns true if the given state is known deleted.
     */
    static boolean isStateKnownDeleted(byte state) {
        return ((state & EntryStates.KNOWN_DELETED_BIT) != 0);
    }

    /**
     * Returns true if the given state is pending deleted.
     */
    static boolean isStatePendingDeleted(byte state) {
        return ((state & EntryStates.PENDING_DELETED_BIT) != 0);
    }

    /**
     * Return true if the LN at the given slot is embedded.
     */
    public final boolean isEmbeddedLN(int idx) {
        return ((entryStates[idx] & EntryStates.EMBEDDED_LN_BIT) != 0);
    }

    public static boolean isEmbeddedLN(byte state) {
        return ((state & EntryStates.EMBEDDED_LN_BIT) != 0);
    }

    /**
     * Set embeddedLN to true.
     */
    private void setEmbeddedLN(int idx) {

        entryStates[idx] |= EntryStates.EMBEDDED_LN_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Set embeddedLN to false.
     */
    private void clearEmbeddedLN(int idx) {

        entryStates[idx] &= EntryStates.CLEAR_EMBEDDED_LN_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Return true if the LN at the given slot is an embedded LN with no data.
     */
    public final boolean isNoDataLN(int idx) {
        return ((entryStates[idx] & EntryStates.NO_DATA_LN_BIT) != 0);
    }

    public static boolean isNoDataLN(byte state) {
        return ((state & EntryStates.NO_DATA_LN_BIT) != 0);
    }

    /**
     * Set noDataLN to true.
     */
    void setNoDataLN(int idx) {

        entryStates[idx] |= EntryStates.NO_DATA_LN_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /**
     * Set noDataLN to false.
     */
    private void clearNoDataLN(int idx) {

        entryStates[idx] &= EntryStates.CLEAR_NO_DATA_LN_BIT;
        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);
    }

    /*
     *
     */
    public final boolean haveEmbeddedData(int idx) {
        return (isEmbeddedLN(idx) && !isNoDataLN(idx));
    }

    /* For unit testing */
    public final int getNumEmbeddedLNs() {
        int res = 0;
        for (int i = 0; i < getNEntries(); ++i) {
            if (isEmbeddedLN(i)) {
                ++res;
            }
        }

        return res;
    }

    /* For unit testing */
    public final INKeyRep getKeyVals() {
        return entryKeys;
    }

    public final byte[] getKeyPrefix() {
        return keyPrefix;
    }

    /*
     * For unit testing only
     */
    public final boolean hasKeyPrefix() {
        return keyPrefix != null;
    }

    /* This has default protection for access by the unit tests. */
    final void setKeyPrefix(byte[] keyPrefix) {

        assert databaseImpl != null;

        int prevLength = (this.keyPrefix == null) ? 0 : this.keyPrefix.length;
        this.keyPrefix = keyPrefix;
        /* Update the memory budgeting to reflect changes in the key prefix. */
        int currLength = (keyPrefix == null) ? 0 : keyPrefix.length;
        updateMemorySize(prevLength, currLength);
    }

    /**
     * Return the idx'th key. If prefixing is enabled, construct a new byte[]
     * containing the prefix and suffix. If prefixing is not enabled, just
     * return the current byte[] in entryKeys.
     */
    public final byte[] getKey(int idx) {

        assert idx < nEntries;

        byte[] key = entryKeys.getFullKey(
            keyPrefix, idx, haveEmbeddedData(idx));

        assert(key != null);

        return key;
    }

    public final byte[] getData(int idx) {

        if (haveEmbeddedData(idx)) {
            return entryKeys.getData(idx);
        }

        if (isNoDataLN(idx)) {
            return Key.EMPTY_KEY;
        }

        return null;
    }

    /**
     * Returns the size of the key that is stored persistently, which will be
     * the combined key-data for an embedded LN or duplicated DB record.
     */
    int getStoredKeySize(int idx) {
        return entryKeys.size(idx);
    }

    /**
     * Updates the key in the idx-th slot of this BIN, if the DB allows key
     * updates and the new key is not identical to the current key in the slot.
     * It also updates the data (if any) that is embedded with the key in the
     * idx-slot, or embeds new data in that slot, is the "data" param is
     * non-null, or removes embedded data, if "data" is null. Finally, it
     * sets the EMBEDDED_LN_BIT and NO_DATA_LN_BIT flags in the slot's state.
     *
     * @param key is the key to set in the slot and is the LN key.
     *
     * @param data If the data portion of a record must be embedded in this
     * BIN, "data" stores the record's data. Null otherwise. See also comment
     * for the keyEntries field. 
     * 
     * @return true if a multi-slot change was made and the complete IN memory
     * size must be updated.
     */
    private boolean updateLNSlotKey(int idx, byte[] key, byte[] data) {

        assert(isBIN());

        boolean haveEmbeddedData = haveEmbeddedData(idx);

        if (data == null) {
            if (isEmbeddedLN(idx)) {
                clearEmbeddedLN(idx); 
                clearNoDataLN(idx);
            }
        } else {
            if (!isEmbeddedLN(idx)) {
                setEmbeddedLN(idx);
            }
            if (data.length == 0) {
                setNoDataLN(idx);
            } else {
                clearNoDataLN(idx);
            }
        }

        /*
         * The new key may be null if a dup LN was deleted, in which case there
         * is no need to update it.  There is no need to compare keys if there
         * is no comparator configured, since a key cannot be changed when the
         * default comparator is used.
         */
        if (key != null &&
            (databaseImpl.allowsKeyUpdates() ||
             DupConvert.needsConversion(databaseImpl)) &&
            !Arrays.equals(key, getKey(idx))) {

            setDirty(true);
            return setKey(idx, key, data, false);

        } else if (haveEmbeddedData) {

            /*
             * The key does not change, but the slot contains embedded data,
             * which must now either be removed (if data == null or
             * data.length == 0) or updated.
             * TODO #21488: update the data only if it actually changes.
             */
            setDirty(true);
            entryStates[idx] |= EntryStates.DIRTY_BIT;

            INKeyRep.Type oldRepType = entryKeys.getType();
            entryKeys = entryKeys.setData(idx, data, this);
            return oldRepType != entryKeys.getType();

        } else if (data != null && data.length != 0) {

            /*
             * The key does not change, but we now have to embed data in a slot
             * that does not currently have embedded data.
             */
            setDirty(true);
            entryStates[idx] |= EntryStates.DIRTY_BIT;

            key = entryKeys.getKey(idx, false);
            INKeyRep.Type oldRepType = entryKeys.getType();
            entryKeys = entryKeys.set(idx, key, data, this);
            return oldRepType != entryKeys.getType();

        } else {
            return false;
        }
    }

    /*
     * Convenience wrapper for setKey() method below
     */
    private boolean insertKey(
        int idx,
        byte[] key,
        byte[] data) {

        /*
         * Set the id key when inserting the first entry. This is important
         * when compression removes all entries from a BIN, and then an entry
         * is inserted before the empty BIN is purged.
         */
        if (nEntries == 1 && !isBINDelta()) {
            setIdentifierKey(key, true /*makeDirty*/);
        }

        return setKey(idx, key, data, true);
    }

    // TODO re-enable this and figure out why it is firing

    private boolean idKeyIsSlotKey() {

        if (true) {
            return true;
        }

        if (!isBIN() || nEntries == 0) {
            return true;
        }

        for (int i = 0; i < nEntries; i += 1) {

            if (entryKeys.compareKeys(
                identifierKey,  keyPrefix, i, haveEmbeddedData(i),
                databaseImpl.getKeyComparator()) == 0) {

                return true;
            }
        }

        return false;
    }

    /*
     * Convenience wrapper for setKey() method below. It is used for
     * upper INs only, so no need to worry about the EMBEDDED_LN_BIT
     * and NO_DATA_LN_BIT flags.
     */
    private boolean updateKey(
        int idx,
        byte[] key,
        byte[] data) {
        return setKey(idx, key, data, false);
    }

    /**
     * This method inserts or updates a key at a given slot. In either case,
     * the associated embedded data (if any) is inserted or updated as well,
     * and the key prefix is adjusted, if necessary.
     *
     * In case of insertion (indicated by a true value for the isInsertion
     * param), it is assumed that the idx slot does not store any valid info,
     * so any change to the key prefix (if any) is due to the insertion of
     * this new new key and not to the removal of the current key at the idx
     * slot.
     *
     * In case of update, the method does not check if the current key is
     * indeed different from the new key; it just updates the key
     * unconditionally. If the slot has embedded data, that data will also
     * be updated (if the data param is not null), or be removed (if the data
     * param is null). If the slot does not have embedded data and the data
     * param is not null, the given data will be embedded.
     *
     * Note: For BINs, the maintenance of the EMBEDDED_LN_BIT andNO_DATA_LN_BIT
     * is done by the callers of this method.
     *
     * @param data If the data portion of a record must be embedded in this
     * BIN, "data" stores the record's data. Null otherwise. See also comment
     * for the keyEntries field. 
     *
     * @return true if a multi-slot change was made and the complete IN memory
     * size must be updated.
     */
    public boolean setKey(
        int idx,
        byte[] key,
        byte[] data,
        boolean isInsertion) {

        entryStates[idx] |= EntryStates.DIRTY_BIT;
        setDirty(true);

        /*
         * Only compute key prefix if prefixing is enabled and there's an
         * existing prefix.
         */
        if (databaseImpl.getKeyPrefixing() && keyPrefix != null) {

            int newPrefixLen = Key.getKeyPrefixLength(
                keyPrefix, keyPrefix.length, key);

            if (newPrefixLen < keyPrefix.length) {

                /*
                 * The new key doesn't share the current prefix, so recompute
                 * the prefix and readjust all the existing suffixes.
                 */
                byte[] newPrefix = (isInsertion ?
                                    Key.createKeyPrefix(keyPrefix, key) :
                                    computeKeyPrefix(idx));

                if (newPrefix != null) {
                    /* Take the new key into consideration for new prefix. */
                    newPrefix = Key.createKeyPrefix(newPrefix, key);
                }

                recalcSuffixes(newPrefix, key, data, idx);
                return true;

            } else {

                INKeyRep.Type prevRepType = entryKeys.getType();

                byte[] suffix = computeKeySuffix(keyPrefix, key);
                entryKeys = entryKeys.set(idx, suffix, data, this);

                return prevRepType != entryKeys.getType();
            }

        } else if (keyPrefix != null) {

            /*
             * Key prefixing has been turned off on this database, but there
             * are existing prefixes. Remove prefixes for this IN.
             */
            recalcSuffixes(null, key, data, idx);
            return true;

        } else {
            INKeyRep.Type oldRepType = entryKeys.getType();
            entryKeys = entryKeys.set(idx, key, data, this);
            return oldRepType != entryKeys.getType();
        }
    }

    /*
     * Given 2 byte arrays, "prefix" and "key", where "prefix" is or stores
     * a prefix of "key", allocate and return another byte array that stores
     * the suffix of "key" w.r.t. "prefix".
     */
    private static byte[] computeKeySuffix(byte[] prefix, byte[] key) {

        int prefixLen = (prefix == null ? 0 : prefix.length);

        if (prefixLen == 0) {
            return key;
        }

        int suffixLen = key.length - prefixLen;
        byte[] ret = new byte[suffixLen];
        System.arraycopy(key, prefixLen, ret, 0, suffixLen);
        return ret;
    }

    /*
     * Iterate over all keys in this IN and recalculate their suffixes based on
     * newPrefix.  If keyVal and idx are supplied, it means that entry[idx] is
     * about to be changed to keyVal so use that instead of
     * entryKeys.get(idx) when computing the new suffixes. If idx is < 0,
     * and keyVal is null, then recalculate suffixes for all entries in this.
     */
    private void recalcSuffixes(
        byte[] newPrefix,
        byte[] key,
        byte[] data,
        int idx) {

        for (int i = 0; i < nEntries; i++) {

            byte[] curKey = (i == idx ? key : getKey(i));

            byte[] curData = null;

            if (i == idx) {
                curData = data;
            } else if (haveEmbeddedData(i)) {
                curData = entryKeys.getData(i);
            }

            byte[] suffix = computeKeySuffix(newPrefix, curKey);

            entryKeys = entryKeys.set(i, suffix, curData, this);
        }

        setKeyPrefix(newPrefix);
    }

    /**
     * Forces computation of the key prefix, without requiring a split.
     * Is public for use by DbCacheSize.
     */
    public final void recalcKeyPrefix() {

        assert(!isBINDelta());

        recalcSuffixes(computeKeyPrefix(-1), null, null, -1);
    }

    /*
     * Computes a key prefix based on all the keys in 'this'.  Return null if
     * the IN is empty or prefixing is not enabled or there is no common
     * prefix for the keys.
     */
    private byte[] computeKeyPrefix(int excludeIdx) {

        if (!databaseImpl.getKeyPrefixing() || nEntries <= 1) {
            return null;
        }

        int firstIdx = (excludeIdx == 0) ? 1 : 0;
        byte[] curPrefixKey = getKey(firstIdx);
        int prefixLen = curPrefixKey.length;

        /*
         * Only need to look at first and last entries when keys are ordered
         * byte-by-byte.  But when there is a comparator, keys are not
         * necessarily ordered byte-by-byte.  [#21328]
         */
        boolean byteOrdered;
        if (true) {
            /* Disable optimization for now.  Needs testing. */
            byteOrdered = false;
        } else {
            byteOrdered = (databaseImpl.getKeyComparator() == null);
        }

        if (byteOrdered) {
            int lastIdx = nEntries - 1;
            if (lastIdx == excludeIdx) {
                lastIdx -= 1;
            }
            if (lastIdx > firstIdx) {
                byte[] lastKey = getKey(lastIdx);
                int newPrefixLen = Key.getKeyPrefixLength(
                    curPrefixKey, prefixLen, lastKey);

                if (newPrefixLen < prefixLen) {
                    curPrefixKey = lastKey;
                    prefixLen = newPrefixLen;
                }
            }
        } else {
            for (int i = firstIdx + 1; i < nEntries; i++) {

                if (i == excludeIdx) {
                    continue;
                }

                byte[] curKey = getKey(i);

                int newPrefixLen = Key.getKeyPrefixLength(
                    curPrefixKey, prefixLen, curKey);

                if (newPrefixLen < prefixLen) {
                    curPrefixKey = curKey;
                    prefixLen = newPrefixLen;
                }
            }
        }

        byte[] ret = new byte[prefixLen];
        System.arraycopy(curPrefixKey, 0, ret, 0, prefixLen);

        return ret;
    }

    /*
     * For debugging.
     */
    final boolean verifyKeyPrefix() {

        byte[] computedKeyPrefix = computeKeyPrefix(-1);
        if (keyPrefix == null) {
            return computedKeyPrefix == null;
        }

        if (computedKeyPrefix == null ||
            computedKeyPrefix.length < keyPrefix.length) {
            System.out.println("VerifyKeyPrefix failed");
            System.out.println(dumpString(0, false));
            return false;
        }
        for (int i = 0; i < keyPrefix.length; i++) {
            if (keyPrefix[i] != computedKeyPrefix[i]) {
                System.out.println("VerifyKeyPrefix failed");
                System.out.println(dumpString(0, false));
                return false;
            }
        }
        return true;
    }

    /**
     * Returns whether the given key is greater than or equal to the first key
     * in the IN and less than or equal to the last key in the IN.  This method
     * is used to determine whether a key to be inserted belongs in this IN,
     * without doing a tree search.  If false is returned it is still possible
     * that the key belongs in this IN, but a tree search must be performed to
     * find out.
     */
    public final boolean isKeyInBounds(byte[] key) {

        assert(!isBINDelta());

        if (nEntries < 2) {
            return false;
        }

        Comparator comparator = getKeyComparator();
        int cmp;

        /* Compare key given to my first key. */
        cmp = entryKeys.compareKeys(
            key, keyPrefix, 0, haveEmbeddedData(0), comparator);

        if (cmp < 0) {
            return false;
        }

        /* Compare key given to my last key. */
        int idx =  nEntries - 1;
        cmp = entryKeys.compareKeys(
            key, keyPrefix, idx, haveEmbeddedData(idx), comparator);

        return cmp <= 0;
    }

    /**
     * Return the idx'th LSN for this entry.
     *
     * @return the idx'th LSN for this entry.
     */
    public final long getLsn(int idx) {

        if (entryLsnLongArray == null) {
            int offset = idx << 2;
            int fileOffset = getFileOffset(offset);
            if (fileOffset == -1) {
                return DbLsn.NULL_LSN;
            } else {
                return DbLsn.makeLsn((baseFileNumber +
                                      getFileNumberOffset(offset)),
                                     fileOffset);
            }
        } else {
            return entryLsnLongArray[idx];
        }
    }

    /**
     * Set the LSN of the idx'th slot, mark the slot dirty, and update
     * memory consuption. Throw exception if the update is not legitimate.
     */
    public void setLsn(int idx, long lsn) {
        setLsn(idx, lsn, true);
    }

    /**
     * Set the LSN of the idx'th slot, mark the slot dirty, and update
     * memory consuption. If "check" is true, throw exception if the
     * update is not legitimate.
     */
    private void setLsn(int idx, long lsn, boolean check) {

        if (!check || shouldUpdateLsn(getLsn(idx), lsn)) {

            int oldSize = computeLsnOverhead();

            /* setLsnInternal can mutate to an array of longs. */
            setLsnInternal(idx, lsn);

            updateMemorySize(computeLsnOverhead() - oldSize);
            entryStates[idx] |= EntryStates.DIRTY_BIT;
            setDirty(true);
        }
    }

    /*
     * Set the LSN of the idx'th slot. If the current storage for LSNs is the
     * compact one, mutate it to the the non-compact, if necessary.
     */
    final void setLsnInternal(int idx, long value) {

        /* Will implement this in the future. Note, don't adjust if mutating.*/
        //maybeAdjustCapacity(offset);
        if (entryLsnLongArray != null) {
            entryLsnLongArray[idx] = value;
            return;
        }

        int offset = idx << 2;

        if (value == DbLsn.NULL_LSN) {
            setFileNumberOffset(offset, (byte) 0);
            setFileOffset(offset, -1);
            return;
        }

        long thisFileNumber = DbLsn.getFileNumber(value);

        if (baseFileNumber == -1) {
            /* First entry. */
            baseFileNumber = thisFileNumber;
            setFileNumberOffset(offset, (byte) 0);

        } else {

            if (thisFileNumber < baseFileNumber) {
                if (!adjustFileNumbers(thisFileNumber)) {
                    mutateToLongArray(idx, value);
                    return;
                }
                baseFileNumber = thisFileNumber;
            }

            long fileNumberDifference = thisFileNumber - baseFileNumber;
            if (fileNumberDifference > Byte.MAX_VALUE) {
                mutateToLongArray(idx, value);
                return;
            }

            setFileNumberOffset(
                offset, (byte) (thisFileNumber - baseFileNumber));
            //assert getFileNumberOffset(offset) >= 0;
        }

        int fileOffset = (int) DbLsn.getFileOffset(value);
        if (fileOffset > MAX_FILE_OFFSET) {
            mutateToLongArray(idx, value);
            return;
        }

        setFileOffset(offset, fileOffset);
        //assert getLsn(offset) == value;
    }

    private boolean adjustFileNumbers(long newBaseFileNumber) {

        long oldBaseFileNumber = baseFileNumber;
        for (int i = 0;
             i < entryLsnByteArray.length;
             i += BYTES_PER_LSN_ENTRY) {
            if (getFileOffset(i) == -1) {
                continue;
            }

            long curEntryFileNumber =
                oldBaseFileNumber + getFileNumberOffset(i);
            long newCurEntryFileNumberOffset =
                (curEntryFileNumber - newBaseFileNumber);

            if (newCurEntryFileNumberOffset > Byte.MAX_VALUE) {
                long undoOffset = oldBaseFileNumber - newBaseFileNumber;
                for (int j = i - BYTES_PER_LSN_ENTRY;
                     j >= 0;
                     j -= BYTES_PER_LSN_ENTRY) {
                    if (getFileOffset(j) == -1) {
                        continue;
                    }
                    setFileNumberOffset
                        (j, (byte) (getFileNumberOffset(j) - undoOffset));
                    //assert getFileNumberOffset(j) >= 0;
                }
                return false;
            }
            setFileNumberOffset(i, (byte) newCurEntryFileNumberOffset);

            //assert getFileNumberOffset(i) >= 0;
        }
        return true;
    }

    private void setFileNumberOffset(int offset, byte fileNumberOffset) {
        entryLsnByteArray[offset] = fileNumberOffset;
    }

    private byte getFileNumberOffset(int offset) {
        return entryLsnByteArray[offset];
    }

    private void setFileOffset(int offset, int fileOffset) {
        put3ByteInt(offset + 1, fileOffset);
    }

    private int getFileOffset(int offset) {
        return get3ByteInt(offset + 1);
    }

    private void put3ByteInt(int offset, int value) {
        entryLsnByteArray[offset++] = (byte) value;
        entryLsnByteArray[offset++] = (byte) (value >>> 8);
        entryLsnByteArray[offset]   = (byte) (value >>> 16);
    }

    private int get3ByteInt(int offset) {
        int ret = (entryLsnByteArray[offset++] & 0xFF);
        ret += (entryLsnByteArray[offset++] & 0xFF) << 8;
        ret += (entryLsnByteArray[offset]   & 0xFF) << 16;
        if (ret == THREE_BYTE_NEGATIVE_ONE) {
            ret = -1;
        }

        return ret;
    }

    private void mutateToLongArray(int idx, long value) {
        int nElts = entryLsnByteArray.length >> 2;
        long[] newArr = new long[nElts];
        for (int i = 0; i < nElts; i++) {
            newArr[i] = getLsn(i);
        }
        newArr[idx] = value;
        entryLsnLongArray = newArr;
        entryLsnByteArray = null;
    }

    /**
     * For a deferred write database, ensure that information is not lost when
     * a new LSN is assigned.  Also ensures that a transient LSN is not
     * accidentally assigned to a persistent entry.
     *
     * Because this method uses strict checking, prepareForSlotReuse must
     * sometimes be called when a new logical entry is being placed in a slot,
     * e.g., during an IN split or an LN slot reuse.
     *
     * The following transition is a NOOP and the LSN is not set:
     *   Any LSN to same value.
     * The following transitions are allowed and cause the LSN to be set:
     *   Null LSN to transient LSN
     *   Null LSN to persistent LSN
     *   Transient LSN to persistent LSN
     *   Persistent LSN to new persistent LSN
     * The following transitions should not occur and throw an exception:
     *   Transient LSN to null LSN
     *   Transient LSN to new transient LSN
     *   Persistent LSN to null LSN
     *   Persistent LSN to transient LSN
     *
     * The above imply that a transient or null LSN can overwrite only a null
     * LSN.
     */
    private final boolean shouldUpdateLsn(long oldLsn, long newLsn) {

        /* Save a little computation in packing/updating an unchanged LSN. */
        if (oldLsn == newLsn) {
            return false;
        }
        /* The rules for a new null LSN can be broken in a read-only env. */
        if (newLsn == DbLsn.NULL_LSN && getEnv().isReadOnly()) {
            return true;
        }
        /* Enforce LSN update rules.  Assume lsn != oldLsn. */
        if (databaseImpl.isDeferredWriteMode()) {
            if (oldLsn != DbLsn.NULL_LSN && DbLsn.isTransientOrNull(newLsn)) {
                throw unexpectedState(
                    "DeferredWrite LSN update not allowed" +
                    " oldLsn = " + DbLsn.getNoFormatString(oldLsn) +
                    " newLsn = " + DbLsn.getNoFormatString(newLsn));
            }
        } else {
            if (DbLsn.isTransientOrNull(newLsn)) {
                throw unexpectedState(
                    "LSN update not allowed" +
                    " oldLsn = " + DbLsn.getNoFormatString(oldLsn) +
                    " newLsn = " + DbLsn.getNoFormatString(newLsn));
            }
        }
        return true;
    }

    /* For unit tests. */
    final long[] getEntryLsnLongArray() {
        return entryLsnLongArray;
    }

    /* For unit tests. */
    final byte[] getEntryLsnByteArray() {
        return entryLsnByteArray;
    }

    /* For unit tests. */
    final void initEntryLsn(int capacity) {
        entryLsnLongArray = null;
        entryLsnByteArray = new byte[capacity << 2];
        baseFileNumber = -1;
    }

    /* Will implement this in the future. Note, don't adjust if mutating.*/
    /***
      private void maybeAdjustCapacity(int offset) {
      if (entryLsnLongArray == null) {
      int bytesNeeded = offset + BYTES_PER_LSN_ENTRY;
      int currentBytes = entryLsnByteArray.length;
      if (currentBytes < bytesNeeded) {
      int newBytes = bytesNeeded +
      (GROWTH_INCREMENT * BYTES_PER_LSN_ENTRY);
      byte[] newArr = new byte[newBytes];
      System.arraycopy(entryLsnByteArray, 0, newArr, 0,
      currentBytes);
      entryLsnByteArray = newArr;
      for (int i = currentBytes;
      i < newBytes;
      i += BYTES_PER_LSN_ENTRY) {
      setFileNumberOffset(i, (byte) 0);
      setFileOffset(i, -1);
      }
      }
      } else {
      int currentEntries = entryLsnLongArray.length;
      int idx = offset >> 2;
      if (currentEntries < idx + 1) {
      int newEntries = idx + GROWTH_INCREMENT;
      long[] newArr = new long[newEntries];
      System.arraycopy(entryLsnLongArray, 0, newArr, 0,
      currentEntries);
      entryLsnLongArray = newArr;
      for (int i = currentEntries; i < newEntries; i++) {
      entryLsnLongArray[i] = DbLsn.NULL_LSN;
      }
      }
      }
      }
     ***/

    /**
     * The last logged size is not stored for UINs.
     */
    boolean isLastLoggedSizeStored(int idx) {
        return false;
    }

    boolean mayHaveLastLoggedSizeStored() {
        return false;
    }

    /**
     * The last logged size is not stored for UINs.
     */
    public void setLastLoggedSize(int idx, int lastLoggedSize) {
    }

    /**
     * The last logged size is not stored for UINs.
     */
    public void clearLastLoggedSize(int idx) {
    }

    /**
     * The last logged size is not stored for UINs.
     */
    void setLastLoggedSizeUnconditional(int idx, int lastLoggedSize) {
    }

    /**
     * The last logged size is not stored for UINs.
     */
    public int getLastLoggedSize(int idx) {
        return 0;
    }

    public void setOffHeapBINId(int idx,
                                int val,
                                boolean pri2,
                                boolean dirty) {

        assert getNormalizedLevel() == 2;
        assert val >= 0;

        setOffHeapBINPri2(idx, pri2);
        setOffHeapBINDirty(idx, dirty);

        final long newVal = val + 1;
        final long oldVal = offHeapBINIds.get(idx);

        if (oldVal == newVal) {
            return;
        }

        assert oldVal == 0;

        offHeapBINIds = offHeapBINIds.set(idx, newVal, this);
    }

    public void clearOffHeapBINId(int idx) {

        assert getNormalizedLevel() == 2;

        setOffHeapBINPri2(idx, false);
        setOffHeapBINDirty(idx, false);

        final long oldVal = offHeapBINIds.get(idx);

        if (oldVal == 0) {
            return;
        }

        offHeapBINIds = offHeapBINIds.set(idx, 0, this);

        if (getInListResident() &&
            getNormalizedLevel() == 2 &&
            offHeapBINIds.isEmpty()) {

            getEvictor().moveToPri1LRU(this);
        }
    }

    public int getOffHeapBINId(int idx) {
        return ((int) offHeapBINIds.get(idx)) - 1;
    }

    public boolean hasOffHeapBINIds() {
        return !offHeapBINIds.isEmpty();
    }

    public long getOffHeapBINIdsMemorySize() {
        return offHeapBINIds.getMemorySize();
    }

    private void setOffHeapBINDirty(int idx, boolean val) {
        if (val) {
            entryStates[idx] |= EntryStates.OFFHEAP_DIRTY_BIT;
        } else {
            entryStates[idx] &= EntryStates.CLEAR_OFFHEAP_DIRTY_BIT;
        }
    }

    public boolean isOffHeapBINDirty(int idx) {
        return (entryStates[idx] & EntryStates.OFFHEAP_DIRTY_BIT) != 0;
    }

    private void setOffHeapBINPri2(int idx, boolean val) {
        if (val) {
            entryStates[idx] |= EntryStates.OFFHEAP_PRI2_BIT;
        } else {
            entryStates[idx] &= EntryStates.CLEAR_OFFHEAP_PRI2_BIT;
        }
    }

    public boolean isOffHeapBINPri2(int idx) {
        return (entryStates[idx] & EntryStates.OFFHEAP_PRI2_BIT) != 0;
    }

    public final INTargetRep getTargets() {
        return entryTargets;
    }

    /**
     * Sets the idx'th target. No need to make dirty, that state only applies
     * to key and LSN.
     *
     * WARNING: This method does not update the memory budget.  The caller
     * must update the budget.
     */
    void setTarget(int idx, Node target) {

        assert isLatchExclusiveOwner() :
            "Not latched for write " + getClass().getName() +
            " id=" + getNodeId();

        final Node curChild = entryTargets.get(idx);

        entryTargets = entryTargets.set(idx, target, this);

        if (target != null && target.isIN()) {
            ((IN) target).setParent(this);
        }

        if (isUpperIN()) {
            if (target == null) {

                /*
                 * If this UIN has just lost its last cached child, set its
                 * hasCachedChildren flag to false and put it back to the
                 * LRU list.
                 */
                if (curChild != null &&
                    hasCachedChildrenFlag() &&
                    !hasCachedChildren()) {

                    setHasCachedChildrenFlag(false);

                    if (!isDIN()) {
                        if (traceLRU) {
                            LoggerUtils.envLogMsg(
                                traceLevel, getEnv(),
                                Thread.currentThread().getId() + "-" +
                                    Thread.currentThread().getName() +
                                    "-" + getEnv().getName() +
                                    " setTarget(): " +
                                    " Adding UIN " + getNodeId() +
                                    " to LRU after detaching chld " +
                                    ((IN) curChild).getNodeId());
                        }
                        getEvictor().addBack(this);
                    }
                }
            } else {
                if (curChild == null &&
                    !hasCachedChildrenFlag()) {

                    setHasCachedChildrenFlag(true);

                    if (traceLRU) {
                        LoggerUtils.envLogMsg(
                            traceLevel, getEnv(),
                            Thread.currentThread().getId() + "-" +
                                Thread.currentThread().getName() +
                                "-" + getEnv().getName() +
                                " setTarget(): " +
                                " Removing UIN " + getNodeId() +
                                " after attaching child " +
                                ((IN) target).getNodeId());
                    }
                    getEvictor().remove(this);
                }
            }
        }
    }

    /**
     * Return the idx'th target.
     *
     * This method does not load children from off-heap cache, so it always
     * returns null when then child is not in main cache. Note that when
     * children are INs (this is not a BIN), when this method returns null it
     * is does not imply that the child is non-dirty, because dirty BINs are
     * stored off-heap. To fetch the current version from off-heap cache in
     * that case, call loadIN instead.
     */
    public final Node getTarget(int idx) {
        return entryTargets.get(idx);
    }

    /**
     * Returns the idx-th child of "this" upper IN, fetching the child from
     * the log and attaching it to its parent if it is not already attached.
     * This method is used during tree searches.
     *
     * On entry, the parent must be latched already.
     *
     * If the child must be fetched from the log, the parent is unlatched.
     * After the disk read is done, the parent is relatched. However, due to
     * splits, it may not be the correct parent anymore. If so, the method
     * will return null, and the caller is expected to restart the tree search.
     *
     * On return, the parent will be latched, unless null is returned or an
     * exception is thrown.
     *
     * The "searchKey" param is the key that the caller is looking for. It is
     * used by this method in determining if, after a disk read, "this" is
     * still the correct parent for the child. "searchKey" may be null if the
     * caller is doing a LEFT or RIGHT search.
     */
    final IN fetchINWithNoLatch(int idx,
                                byte [] searchKey) {
        return fetchINWithNoLatch(idx, searchKey, null);
    }

    /**
     * This variant of fetchIN() takes a SearchResult as a param, instead of
     * an idx (it is used by Tree.getParentINForChildIN()). The ordinal number
     * of the child to fetch is specified by result.index. result.index will
     * be adjusted by this method if, after a disk read, the ordinal number
     * of the child changes due to insertions, deletions or splits in the
     * parent.
     */
    final IN fetchINWithNoLatch(SearchResult result,
                                byte [] searchKey) {
        return fetchINWithNoLatch(result.index, searchKey, result);
    }

    /**
     * Provides the implementation of the above two methods.
     */
    private IN fetchINWithNoLatch(
        int idx,
        byte [] searchKey,
        SearchResult result) {

        assert(isUpperIN());
        assert(isLatchOwner());

        final EnvironmentImpl envImpl = getEnv();
        final OffHeapCache ohCache = envImpl.getOffHeapCache();

        boolean isMiss = false;
        boolean success = false;

        IN child = (IN)entryTargets.get(idx);

        if (child == null) {

            final long lsn = getLsn(idx);

            if (lsn == DbLsn.NULL_LSN) {
                throw unexpectedState(makeFetchErrorMsg(
                    "NULL_LSN in upper IN", lsn, idx));
            }

            /*
             * For safety we must get a copy of the BIN off-heap bytes while
             * latched, but we can materialize the bytes while unlatched
             * (further below) to reduce the work done while latched.
             */
            byte[] ohBytes = null;

            if (getNormalizedLevel() == 2) {
                ohBytes = ohCache.getBINBytes(this, idx);
            }

            pin();
            try {
                releaseLatch();

                TestHookExecute.doHookIfSet(fetchINHook);

                if (ohBytes != null) {
                    child = ohCache.materializeBIN(envImpl, ohBytes);
                } else {
                    final WholeEntry wholeEntry = envImpl.getLogManager().
                        getLogEntryAllowInvisibleAtRecovery(
                            lsn, getLastLoggedSize(idx));

                    final LogEntry logEntry = wholeEntry.getEntry();

                    child = (IN) logEntry.getResolvedItem(databaseImpl);

                    isMiss = true;
                }

                latch(CacheMode.UNCHANGED);

                /*
                 * The following if statement relies on splits being logged
                 * immediately, or more precisely, the split node and its
                 * new sibling being logged immediately, while both siblings
                 * and their parent are latched exclusively. The reason for
                 * this is as follows: 
                 * 
                 * Let K be the search key. If we are doing a left-deep or
                 * right-deep search, K is -INF or +INF respectively.
                 *
                 * Let P be the parent IN (i.e., "this") and S be the slot at
                 * the idx position before P was unlatched above. Here, we
                 * view slots as independent objects, not identified by their
                 * position in an IN but by some unique (imaginary) and
                 * immutable id assigned to the slot when it is first inserted
                 * into an IN. 
                 *
                 * Before unlatching P, S was the correct slot to follow down
                 * the tree looking for K. After P is unlatched and then
                 * relatched, let S' be the slot at the idx position, if P
                 * still has an idx position. We consider the following 2 cases:
                 *
                 * 1. S' exists and S'.LSN == S.LSN. Then S and S' are the same
                 * (logical) slot (because two different slots can never cover
                 * overlapping ranges of keys, and as a result, can never point
                 * to the same LSN). Then, S is still the correct slot to take
                 * down the tree, unless the range of keys covered by S has
                 * shrunk while P was unlatched. But the only way for S's key
                 * range to shrink is for its child IN to split, which could
                 * not have happened because if it did, the before and after
                 * LSNs of S would be different, given that splits are logged
                 * immediately. We conclude that the set of keys covered by
                 * S after P is relatched is the same or a superset of the keys
                 * covered by S before P was unlatched, and thus S (at the idx
                 * position) is still the correct slot to follow.
                 *
                 * 2. There is no idx position in P or S'.LSN != S.LSN. In
                 * this case we cannot be sure if S' (if it exists) is the
                 * the correct slot to follow. So, we (re)search for K in P
                 * to check if P is still the correct parent and find the
                 * correct slot to follow. If this search lands on the 1st or
                 * last slot in P, we may return null because using the key
                 * info contained in P only, we do not know the full range of
                 * keys covered by those two slots. If null is returned, the
                 * caller is expected to restart the tree search from the root.
                 *
                 * Notice that the if conditions are necessary before calling
                 * findEntry(). Without them, we could get into an infinite
                 * loop of search re-tries in the scenario where nothing changes
                 * in the tree and findEntry always lands on the 1st or last
                 * slot in P. The conditions guarantee that we may restart the
                 * tree search only if something changes with S while P is
                 * unlatched (S moves to a different position or a different
                 * IN or it points to a different LSN).
                 * 
                 * Notice also that if P does not split while it is unlatched,
                 * the range of keys covered by P does not change either. This
                 * implies that the correct slot to follow *must* be inside P,
                 * and as a result, the 1st and last slots in P can be trusted.
                 * Unfortunately, however, we have no way to detecting reliably
                 * whether P splits or not.
                 * 
                 * Special care for DBs in DW mode:
                 *
                 * For DBs in DW mode, special care must be taken because
                 * splits are not immediately logged. So, for DW DBs, to avoid
                 * a call to findEntry() we require that not only S'.LSN ==
                 * S.LSN, but also the the child is not cached. These 2
                 * conditions together guarantee that the child did not split
                 * while P was unlatched, because if the child did split, it
                 * was fetched and cached first, so after P is relatched,
                 * either the child would be still cached, or if it was evicted
                 * after the split, S.LSN would have changed.
                 */
                if (idx >= nEntries ||
                    getLsn(idx) != lsn ||
                    (databaseImpl.isDeferredWriteMode() &&
                     entryTargets.get(idx) != null)) {

                    if (searchKey == null) {
                        return null;
                    }

                    idx = findEntry(searchKey, false, false);

                    if ((idx == 0 || idx == nEntries - 1) &&
                        !isKeyInBounds(searchKey)) {
                        return null;
                    }
                }

                if (result != null) {
                    result.index = idx;
                }

                /*
                 * "this" is still the correct parent and "idx" points to the
                 * correct slot to follow for the search down the tree. But
                 * what we fetched from the log may be out-of-date by now
                 * (because it was fetched and then updated by other threads)
                 * or it may not be the correct child anymore ("idx" was
                 * changed by the findEntry() call above). We check 5 cases:
                 *
                 * (a) There is already a cached child at the "idx" position.
                 * In this case, we return whatever is there because it has to
                 * be the most recent version of the appropriate child node.
                 * This is true even when a split or reverse split occurred.
                 * The check for isKeyInBounds above is critical in that case.
                 *
                 * (b) There is no cached child at the "idx" slot, but the slot
                 * LSN is not the same as the LSN we read from the log. This is
                 * the case if "idx" was changed by findEntry() or other
                 * threads fetched the same child as this thread, updated it,
                 * and then evicted it. The child we fetched is obsolete and
                 * should not be attached. For simplicity, we just return null
                 * in this (quite rare) case.
                 *
                 * (c) We loaded the BIN from off-heap cache and, similar to
                 * case (b), another thread has loaded the same child, modified
                 * it, and then evicted it, placing it off-heap again. It's LSN
                 * did not change because it wasn't logged. We determine
                 * whether the off-heap BIN has changed, and if so then
                 * null is returned. This is also rare.
                 *
                 * (d) The child was loaded from disk (not off-heap cache) but
                 * an off-heap cache entry for this BIN has appeared. Another
                 * thread loaded the BIN from disk and then it was moved
                 * off-heap, possibly after it was modified. We should use the
                 * off-heap version and for simplicity we return null. This is
                 * also rare.
                 *
                 * (e) Otherwise, we attach the fetched/loaded child to the
                 * parent.
                 */
                if (entryTargets.get(idx) != null) {
                    child = (IN) entryTargets.get(idx);

                } else if (getLsn(idx) != lsn) {
                    return null;

                } else if (ohBytes != null &&
                           ohCache.haveBINBytesChanged(this, idx, ohBytes)) {
                    return null;

                } else if (ohBytes == null &&
                           getOffHeapBINId(idx) >= 0) {
                    return null;

                } else {
                    child.latchNoUpdateLRU(databaseImpl);

                    if (ohBytes != null) {
                        child.postLoadInit(this, idx);
                    } else {
                        child.postFetchInit(databaseImpl, lsn);
                    }

                    attachNode(idx, child, null);

                    child.releaseLatch();
                }

                success = true;

            } catch (FileNotFoundException e) {
                throw new EnvironmentFailureException(
                    envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
                    makeFetchErrorMsg(null, lsn, idx), e);

            } catch (ErasedException e) {
                throw new EnvironmentFailureException(
                    envImpl, EnvironmentFailureReason.LOG_CHECKSUM,
                    "IN is erased unexpectedly, implied corruption. " +
                    makeFetchErrorMsg(null, lsn, idx), e);

            } catch (EnvironmentFailureException e) {
                e.addErrorMessage(makeFetchErrorMsg(null, lsn, idx));
                throw e;

            } catch (RuntimeException e) {
                throw new EnvironmentFailureException(
                    envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
                    makeFetchErrorMsg(e.toString(), lsn, idx), e);
            } finally {
                /*
                 * Release the parent latch if null is being returned. In this
                 * case, the parent was unlatched earlier during the disk read,
                 * and as a result, the caller cannot make any assumptions
                 * about the state of the parent.
                 *
                 * If we are returning or throwing out of this try block, the
                 * parent may or may not be latched. So, only release the latch
                 * if it is currently held.
                 */
                if (!success) {
                    if (child != null) {
                        child.incFetchStats(envImpl, isMiss);
                    }
                    releaseLatchIfOwner();
                }

                unpin();
            }
        }

        assert(hasCachedChildren() == hasCachedChildrenFlag());

        child.incFetchStats(envImpl, isMiss);

        return child;
    }

    /**
     * Returns the idx-th child of "this" upper IN, fetching the child from
     * the log and attaching it to its parent if it is not already attached.
     *
     * On entry, the parent must be EX-latched already and it stays EX-latched
     * for the duration of this method and on return (even in case of
     * exceptions).
     *
     * @param idx The slot of the child to fetch.
     */
    public IN fetchIN(int idx, CacheMode cacheMode) {

        assert(isUpperIN());
        if (!isLatchExclusiveOwner()) {
            throw unexpectedState("EX-latch not held before fetch");
        }

        final EnvironmentImpl envImpl = getEnv();
        final OffHeapCache ohCache = envImpl.getOffHeapCache();
        boolean isMiss = false;

        IN child = (IN) entryTargets.get(idx);

        if (child == null) {

            final long lsn = getLsn(idx);

            if (lsn == DbLsn.NULL_LSN) {
                throw unexpectedState(
                    makeFetchErrorMsg("NULL_LSN in upper IN", lsn, idx));
            }

            try {
                byte[] ohBytes = null;

                if (getNormalizedLevel() == 2) {
                    ohBytes = ohCache.getBINBytes(this, idx);
                    if (ohBytes != null) {
                        child = ohCache.materializeBIN(envImpl, ohBytes);
                    }
                }

                if (child == null) {
                    final WholeEntry wholeEntry = envImpl.getLogManager().
                        getLogEntryAllowInvisibleAtRecovery(
                            lsn, getLastLoggedSize(idx));

                    final LogEntry logEntry = wholeEntry.getEntry();
                    child = (IN) logEntry.getResolvedItem(databaseImpl);

                    isMiss = true;
                }

                child.latchNoUpdateLRU(databaseImpl);

                if (ohBytes != null) {
                    child.postLoadInit(this, idx);
                } else {
                    child.postFetchInit(databaseImpl, lsn);
                }

                attachNode(idx, child, null);

                child.releaseLatch();

            } catch (FileNotFoundException e) {
                throw new EnvironmentFailureException(
                    envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
                    makeFetchErrorMsg(null, lsn, idx), e);

            } catch (ErasedException e) {
                throw new EnvironmentFailureException(
                    envImpl, EnvironmentFailureReason.LOG_CHECKSUM,
                    "IN is erased unexpectedly, implied corruption. " +
                    makeFetchErrorMsg(null, lsn, idx), e);

            } catch (EnvironmentFailureException e) {
                e.addErrorMessage(makeFetchErrorMsg(null, lsn, idx));
                throw e;

            } catch (RuntimeException e) {
                throw new EnvironmentFailureException(
                    envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
                    makeFetchErrorMsg(e.toString(), lsn, idx), e);
            }
        }

        assert(hasCachedChildren() == hasCachedChildrenFlag());

        child.incFetchStats(envImpl, isMiss);

        return child;
    }

    /**
     * Returns the idx-th child of "this" upper IN, loading the child from
     * off-heap and attaching it to its parent if it is not already attached
     * and is cached off-heap. This method does not fetch from disk, and will
     * return null if the child is not in the main or off-heap cache.
     *
     * On entry, the parent must be EX-latched already and it stays EX-latched
     * for the duration of this method and on return (even in case of
     * exceptions).
     *
     * @param idx The slot of the child to fetch.
     *
     * @return null if the LN is not in the main or off-heap cache.
     */
    public IN loadIN(int idx, CacheMode cacheMode) {

        assert(isUpperIN());
        if (!isLatchExclusiveOwner()) {
            throw unexpectedState("EX-latch not held before load");
        }

        IN child = (IN) entryTargets.get(idx);

        if (child != null) {
            return child;
        }

        if (getNormalizedLevel() != 2) {
            return null;
        }

        final EnvironmentImpl envImpl = getEnv();
        final OffHeapCache ohCache = envImpl.getOffHeapCache();

        final long lsn = getLsn(idx);

        try {
            final byte[] ohBytes = ohCache.getBINBytes(this, idx);
            if (ohBytes == null) {
                return null;
            }

            child = ohCache.materializeBIN(envImpl, ohBytes);
            child.latchNoUpdateLRU(databaseImpl);
            child.postLoadInit(this, idx);
            attachNode(idx, child, null);
            child.releaseLatch();

            return child;

        } catch (RuntimeException e) {
            throw new EnvironmentFailureException(
                envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
                makeFetchErrorMsg(e.toString(), lsn, idx), e);
        }
    }

    /**
     * Returns the target of the idx'th entry, fetching from disk if necessary.
     *
     * Null is returned in the following cases:
     *
     * 1. if the LSN is null and the KnownDeleted flag is set; or
     * 2. if the LSN's file has been cleaned and:
     *    a. the PendingDeleted or KnownDeleted flag is set, or
     *    b. the entry is "probably expired".
     *
     * Note that checking for PD/KD before calling this method is not
     * sufficient to ensure that null is not returned, because null is also
     * returned for expired records.
     *
     * When null is returned, the caller must treat the record as deleted.
     *
     * Note that null can only be returned for a slot that could contain an LN,
     * not other node types and not a DupCountLN since DupCountLNs are never
     * deleted or expired.
     *
     * An exclusive latch must be held on this BIN.
     *
     * @return the LN or null.
     */
    public final LN fetchLN(int idx, CacheMode cacheMode) {

        return (LN) fetchLN(idx, cacheMode, false);
    }

    /*
     * This method may return either an LN or a DIN child of a BIN. It is meant
     * to be used from DupConvert only.
     */
    public final Node fetchLNOrDIN(int idx, CacheMode cacheMode) {

        return fetchLN(idx, cacheMode, true);
    }

    /*
     * Underlying implementation of the above fetchLNXXX methods.
     */
    private Node fetchLN(int idx, CacheMode cacheMode, boolean dupConvert) {

        assert(isBIN());

        if (!isLatchExclusiveOwner()) {
            throw unexpectedState("EX-latch not held before fetch");
        }

        if (isEntryKnownDeleted(idx)) {
            return null;
        }

        final BIN bin = (BIN) this;
        final EnvironmentImpl envImpl = getEnv();
        final OffHeapCache ohCache = envImpl.getOffHeapCache();
        boolean isMiss = false;

        Node child = entryTargets.get(idx);

        /* Fetch it from disk. */
        if (child == null) {

            final long lsn = getLsn(idx);

            if (lsn == DbLsn.NULL_LSN) {
                throw unexpectedState(makeFetchErrorMsg(
                    "NULL_LSN without KnownDeleted", lsn, idx));
            }

            /*
             * Fetch of immediately obsolete LN not allowed. The only exception
             * is during conversion of an old-style dups DB.
             */
            if (isEmbeddedLN(idx) ||
                (databaseImpl.isLNImmediatelyObsolete() && !dupConvert)) {
                throw unexpectedState("May not fetch immediately obsolete LN");
            }

            try {
                byte[] lnSlotKey = null;

                child = ohCache.loadLN(bin, idx, cacheMode);

                if (child == null) {
                    final WholeEntry wholeEntry = envImpl.getLogManager().
                        getLogEntryAllowInvisibleAtRecovery(
                            lsn, getLastLoggedSize(idx));

                    /* Last logged size is not present before log version 9. */
                    setLastLoggedSize(
                        idx, wholeEntry.getHeader().getEntrySize());

                    final LogEntry logEntry = wholeEntry.getEntry();

                    if (logEntry instanceof LNLogEntry) {

                        final LNLogEntry lnEntry =
                            (LNLogEntry) wholeEntry.getEntry();

                        lnEntry.postFetchInit(databaseImpl);

                        lnSlotKey = lnEntry.getKey();

                        if (cacheMode != CacheMode.EVICT_LN &&
                            cacheMode != CacheMode.EVICT_BIN &&
                            cacheMode != CacheMode.UNCHANGED &&
                            cacheMode != CacheMode.MAKE_COLD) {
                            getEvictor().moveToPri1LRU(this);
                        }
                    }

                    child = (Node) logEntry.getResolvedItem(databaseImpl);

                    isMiss = true;
                }

                child.postFetchInit(databaseImpl, lsn);
                attachNode(idx, child, lnSlotKey);

            } catch (FileNotFoundException e) {
                /*
                 * Cleaner got to the log file. Do not consider this an
                 * integrity error if deleted, likely expired, EXTINCT or
                 * MAYBE_EXTINCT. It is safe to ignore a deleted file for a
                 * KD or PD entry because files with active txns will not be
                 * cleaned.
                 */
                if (!bin.isDeleted(idx) &&
                    !bin.isProbablyExpired(idx) &&
                    envImpl.getExtinctionState(
                        databaseImpl, bin.getKey(idx)) == NOT_EXTINCT) {

                    throw new EnvironmentFailureException(
                        envImpl, EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
                        makeFetchErrorMsg(null, lsn, idx), e);
                }
                return null;

            } catch (ErasedException e) {
                /*
                 * Eraser got to the log file. Do not consider this an
                 * integrity error if EXTINCT or MAYBE_EXTINCT.
                 */
                if (envImpl.getExtinctionState(
                        databaseImpl, bin.getKey(idx)) == NOT_EXTINCT) {

                    throw new EnvironmentFailureException(
                        envImpl, EnvironmentFailureReason.LOG_CHECKSUM,
                        "LN is erased unexpectedly, implied corruption. " +
                        makeFetchErrorMsg(null, lsn, idx), e);
                }
                return null;

            } catch (EnvironmentFailureException e) {
                e.addErrorMessage(makeFetchErrorMsg(null, lsn, idx));
                throw e;

            } catch (RuntimeException e) {
                throw new EnvironmentFailureException(
                    envImpl, EnvironmentFailureReason.LOG_INTEGRITY,
                    makeFetchErrorMsg(e.toString(), lsn, idx), e);
            }
        }

        if (child.isLN()) {
            final LN ln = (LN) child;

            if (cacheMode != CacheMode.UNCHANGED &&
                cacheMode != CacheMode.MAKE_COLD) {
                ln.setFetchedCold(false);
            }

            ohCache.freeRedundantLN(bin, idx, ln, cacheMode);
        }

        child.incFetchStats(envImpl, isMiss);

        return child;
    }

    /**
     * Return the idx'th LN target, enforcing rules defined by the cache modes
     * for the LN. This method should be called instead of getTarget when a
     * cache mode applies to user operations such as reads and updates.
     */
    public final LN getLN(int idx, CacheMode cacheMode) {
        assert isBIN();

        final LN ln = (LN) entryTargets.get(idx);

        if (ln == null) {
            return null;
        }

        if (cacheMode != CacheMode.UNCHANGED &&
            cacheMode != CacheMode.MAKE_COLD) {
            ln.setFetchedCold(false);
        }

        final OffHeapCache ohCache = getOffHeapCache();

        if (ohCache.isEnabled()) {
            ohCache.freeRedundantLN((BIN) this, idx, ln, cacheMode);
        }

        return ln;
    }

    /**
     * Initialize a node that has been read in from the log.
     */
    @Override
    public final void postFetchInit(DatabaseImpl db, long fetchedLsn) {
        assert isLatchExclusiveOwner();

        commonInit(db);
        setLastLoggedLsn(fetchedLsn);
        convertDupKeys(); // must be after initMemorySize
        addToMainCache();

        if (isBIN()) {
            setFetchedCold(true);
        }

        /* See Database.mutateDeferredWriteBINDeltas. */
        if (db.isDeferredWriteMode()) {
            mutateToFullBIN(false);
        }
    }

    /**
     * Initialize a BIN loaded from off-heap cache.
     *
     * Does not call setLastLoggedLsn because materialization of the off-heap
     * BIN initializes all fields including the last logged/delta LSNs.
     */
    private void postLoadInit(IN parent, int idx) {
        assert isLatchExclusiveOwner();

        commonInit(parent.databaseImpl);
        addToMainCache();

        if (isBIN()) {
            setFetchedCold(true);
            setFetchedColdOffHeap(true);
        }

        getEnv().getOffHeapCache().postBINLoad(parent, idx, (BIN) this);
    }

    /**
     * Initialize a node read in during recovery.
     */
    public final void postRecoveryInit(DatabaseImpl db, long lastLoggedLsn) {
        commonInit(db);
        setLastLoggedLsn(lastLoggedLsn);
    }

    /**
     * Common actions of postFetchInit, postLoadInit and postRecoveryInit.
     */
    private void commonInit(DatabaseImpl db) {
        setDatabase(db);
        initMemorySize(); // compute before adding to IN list
    }

    /**
     * Add to INList and perform eviction related initialization.
     */
    private void addToMainCache() {

        getEnv().getInMemoryINs().add(this);

        if (!isDIN() && !isDBIN()) {
            if (isUpperIN() && traceLRU) {
                LoggerUtils.envLogMsg(
                    traceLevel, getEnv(),
                    Thread.currentThread().getId() + "-" +
                    Thread.currentThread().getName() +
                    "-" + getEnv().getName() +
                    " postFetchInit(): " +
                    " Adding UIN to LRU: " + getNodeId());
            }
            getEvictor().addBack(this);
        }

        /* Compress full BINs after fetching or loading. */
        if (!(this instanceof DBIN || this instanceof DIN)) {
            getEnv().lazyCompress(this);
        }
    }

    /**
     * Needed only during duplicates conversion, not during normal operation.
     * The needDupKeyConversion field will only be true when first upgrading
     * from JE 4.1.  After the first time an IN is converted, it will be
     * written out in a later file format, so the needDupKeyConversion field
     * will be false and this method will do nothing.  See
     * DupConvert.convertInKeys.
     */
    private void convertDupKeys() {
        /* Do not convert more than once. */
        if (!needDupKeyConversion) {
            return;
        }
        needDupKeyConversion = false;
        DupConvert.convertInKeys(databaseImpl, this);
    }

    /**
     * @see Node#incFetchStats
     */
    @Override
    final void incFetchStats(EnvironmentImpl envImpl, boolean isMiss) {
        Evictor e = envImpl.getEvictor();
        if (isBIN()) {
            e.incBINFetchStats(isMiss, isBINDelta(false/*checLatched*/));
        } else {
            e.incUINFetchStats(isMiss);
        }
    }

    public String makeFetchErrorMsg(
        final String msg,
        final long lsn,
        final int idx) {

        final byte state = idx >= 0 ? entryStates[idx] : 0;

        final long expirationTime;

        if (isBIN() && idx >= 0) {

            final BIN bin = (BIN) this;

            expirationTime = TTL.expirationToSystemTime(
                bin.getExpiration(idx), isExpirationInHours());

        } else {
            expirationTime = 0;
        }

        return makeFetchErrorMsg(msg, this, lsn, state, expirationTime);
    }

    /**
     * @param in parent IN.  Is null when root is fetched.
     */
    static String makeFetchErrorMsg(
        String msg,
        IN in,
        long lsn,
        byte state,
        long expirationTime) {

        /*
         * Bolster the exception with the LSN, which is critical for
         * debugging. Otherwise, the exception propagates upward and loses the
         * problem LSN.
         */
        StringBuilder sb = new StringBuilder();

        if (in == null) {
            sb.append("fetchRoot of ");
        } else if (in.isBIN()) {
            sb.append("fetchLN of ");
        } else {
            sb.append("fetchIN of ");
        }

        if (lsn == DbLsn.NULL_LSN) {
            sb.append("null lsn");
        } else {
            sb.append(DbLsn.getNoFormatString(lsn));
        }

        if (in != null) {
            sb.append(" parent IN=").append(in.getNodeId());
            sb.append(" IN class=").append(in.getClass().getName());
            sb.append(" lastFullLsn=");
            sb.append(DbLsn.getNoFormatString(in.getLastFullLsn()));
            sb.append(" lastLoggedLsn=");
            sb.append(DbLsn.getNoFormatString(in.getLastLoggedLsn()));
            sb.append(" parent.getDirty()=").append(in.getDirty());
        }

        sb.append(" state=").append(state);

        sb.append(" expires=");

        if (expirationTime != 0) {
            sb.append(TTL.formatExpirationTime(expirationTime));
        } else {
            sb.append("never");
        }

        if (msg != null) {
            sb.append(" ").append(msg);
        }

        return sb.toString();
    }

    public final int findEntry(
        byte[] key,
        boolean indicateIfDuplicate,
        boolean exact) {

        return findEntry(key, indicateIfDuplicate, exact, null /*Comparator*/);
    }

    /**
     * Find the entry in this IN for which key is LTE the key arg.
     *
     * Currently uses a binary search, but eventually, this may use binary or
     * linear search depending on key size, number of entries, etc.
     *
     * This method guarantees that the key parameter, which is the user's key
     * parameter in user-initiated search operations, is always the left hand
     * parameter to the Comparator.compare method.  This allows a comparator
     * to perform specialized searches, when passed down from upper layers.
     *
     * This is public so that DbCursorTest can access it.
     *
     * Note that the 0'th entry's key is treated specially in an IN.  It always
     * compares lower than any other key.
     *
     * @param key - the key to search for.
     * @param indicateIfDuplicate - true if EXACT_MATCH should
     * be or'd onto the return value if key is already present in this node.
     * @param exact - true if an exact match must be found.
     * @return offset for the entry that has a key LTE the arg.  0 if key
     * is less than the 1st entry.  -1 if exact is true and no exact match
     * is found.  If indicateIfDuplicate is true and an exact match was found
     * then EXACT_MATCH is or'd onto the return value.
     */
    public final int findEntry(
        byte[] key,
        boolean indicateIfDuplicate,
        boolean exact,
        Comparator comparator) {

        assert idKeyIsSlotKey();

        int high = nEntries - 1;
        int low = 0;
        int middle = 0;

        if (comparator == null) {
            comparator = databaseImpl.getKeyComparator();
        }

        /*
         * Special Treatment of 0th Entry
         * ------------------------------
         * IN's are special in that they have a entry[0] where the key is a
         * virtual key in that it always compares lower than any other key.
         * BIN's don't treat key[0] specially.  But if the caller asked for an
         * exact match or to indicate duplicates, then use the key[0] and
         * forget about the special entry zero comparison.
         *
         * We always use inexact searching to get down to the BIN, and then
         * call findEntry separately on the BIN if necessary.  So the behavior
         * of findEntry is different for BINs and INs, because it's used in
         * different ways.
         *
         * Consider a tree where the lowest key is "b" and we want to insert
         * "a".  If we did the comparison (with exact == false), we wouldn't
         * find the correct (i.e.  the left) path down the tree.  So the
         * virtual key ensures that "a" gets inserted down the left path.
         *
         * The insertion case is a good specific example.  findBinForInsert
         * does inexact searching in the INs only, not the BIN.
         *
         * There's nothing special about the 0th key itself, only the use of
         * the 0th key in the comparison algorithm.
         */
        boolean entryZeroSpecialCompare =
            isUpperIN() && !exact && !indicateIfDuplicate;

        assert nEntries >= 0;

        while (low <= high) {

            middle = (high + low) / 2;
            int s;

            if (middle == 0 && entryZeroSpecialCompare) {
                s = 1;
            } else {
                s = entryKeys.compareKeys(
                    key,  keyPrefix, middle,
                    haveEmbeddedData(middle), comparator);
            }

            if (s < 0) {
                high = middle - 1;
            } else if (s > 0) {
                low = middle + 1;
            } else {
                int ret;
                if (indicateIfDuplicate) {
                    ret = middle | EXACT_MATCH;
                } else {
                    ret = middle;
                }

                if ((ret >= 0) && exact && isEntryKnownDeleted(ret & 0xffff)) {
                    return -1;
                } else {
                    return ret;
                }
            }
        }

        /*
         * No match found.  Either return -1 if caller wanted exact matches
         * only, or return entry whose key is < search key.
         */
        if (exact) {
            return -1;
        } else {
            return high;
        }
    }

    /**
     * Inserts a slot with the given key, lsn and child node into this IN, if
     * a slot with the same key does not exist already. The state of the new
     * slot is set to DIRTY. Assumes this node is already latched by the
     * caller.
     *
     * @return true if the entry was successfully inserted, false
     * if it was a duplicate.
     *
     * @throws EnvironmentFailureException if the node is full
     * (it should have been split earlier).
     */
    public final boolean insertEntry(
        Node child,
        byte[] key,
        long childLsn)
        throws DatabaseException {

        assert(!isBINDelta());

        int res = insertEntry1(
             child, key, null, childLsn, EntryStates.DIRTY_BIT, false);

        return (res & INSERT_SUCCESS) != 0;
    }

    /**
     * Inserts a slot with the given key, lsn and child node into this IN, if
     * a slot with the same key does not exist already. The state of the new
     * slot is set to DIRTY. Assumes this node is already latched by the
     * caller.
     *
     * @param data If the data portion of a record must be embedded in this
     * BIN, "data" stores the record's data. Null otherwise. See also comment
     * for the keyEntries field. 
     *
     * @return either (1) the index of location in the IN where the entry was
     * inserted |'d with INSERT_SUCCESS, or (2) the index of the duplicate in
     * the IN if the entry was found to be a duplicate.
     *
     * @throws EnvironmentFailureException if the node is full (it should have
     * been split earlier).
     */
    public final int insertEntry1(
        Node child,
        byte[] key,
        byte[] data,
        long childLsn,
        boolean blindInsertion) {

        return insertEntry1(
            child, key, data, childLsn, EntryStates.DIRTY_BIT,
            blindInsertion);
    }

    /**
     * Inserts a slot with the given key, lsn, state, and child node into this
     * IN, if a slot with the same key does not exist already. Assumes this
     * node is already latched by the caller.
     *
     * This returns a failure if there's a duplicate match. The caller must do
     * the processing to check if the entry is actually deleted and can be
     * overwritten. This is foisted upon the caller rather than handled in this
     * object because there may be some latch releasing/retaking in order to
     * check a child LN.
     *
     * @param data If the data portion of a record must be embedded in this
     * BIN, "data" stores the record's data. Null otherwise. See also comment
     * for the keyEntries field. 
     *
     * @return either (1) the index of location in the IN where the entry was
     * inserted |'d with INSERT_SUCCESS, or (2) the index of the duplicate in
     * the IN if the entry was found to be a duplicate.
     *
     * @throws EnvironmentFailureException if the node is full (it should have
     * been split earlier).
     */
    public final int insertEntry1(
        Node child,
        byte[] key,
        byte[] data,
        long childLsn,
        byte state,
        boolean blindInsertion) {

        /*
         * Search without requiring an exact match, but do let us know the
         * index of the match if there is one.
         */
        int index = findEntry(key, true, false);

        if (index >= 0 && (index & EXACT_MATCH) != 0) {

            /*
             * There is an exact match.  Don't insert; let the caller decide
             * what to do with this duplicate.
             */
            return index & ~IN.EXACT_MATCH;
        }

        /*
         * There was no key match, but if this is a bin delta, there may be an
         * exact match in the full bin. Mutate to full bin and search again.
         * However, if we know for sure that the key does not exist in the full
         * BIN, then don't mutate, unless there is no space in the delta to do
         * the insertion.
         */
        if (isBINDelta()) {

            BIN bin = (BIN)this;

            boolean doBlindInsertion = (nEntries < getMaxEntries());

            if (doBlindInsertion &&
                !blindInsertion &&
                bin.mayHaveKeyInFullBin(key)) {

                doBlindInsertion = false;
            }

            if (!doBlindInsertion) {

                mutateToFullBIN(true /*leaveFreeSlot*/);

                index = findEntry(key, true, false);

                if (index >= 0 && (index & EXACT_MATCH) != 0) {
                    return index & ~IN.EXACT_MATCH;
                }
            } else {
                getEvictor().incBinDeltaBlindOps();

                if (traceDeltas) {
                    LoggerUtils.envLogMsg(
                        traceLevel, getEnv(),
                        Thread.currentThread().getId() + "-" +
                        Thread.currentThread().getName() +
                        "-" + getEnv().getName() +
                        (blindInsertion ?
                         " Blind insertion in BIN-delta " :
                         " Blind put in BIN-delta ") +
                        getNodeId() + " nEntries = " +
                        nEntries + " max entries = " +
                        getMaxEntries() +
                        " full BIN entries = " +
                        bin.getFullBinNEntries() +
                        " full BIN max entries = " +
                        bin.getFullBinMaxEntries());
                }
            }
        }

        if (nEntries >= getMaxEntries()) {
            throw unexpectedState(
                getEnv(),
                "Node " + getNodeId() +
                " should have been split before calling insertEntry" +
                " is BIN-delta: " + isBINDelta() +
                " num entries: " + nEntries +
                " max entries: " + getMaxEntries());
        }

        /* There was no key match, so insert to the right of this entry. */
        index++;

        /* We found a spot for insert, shift entries as needed. */
        if (index < nEntries) {
            int oldSize = computeLsnOverhead();

            /* Adding elements to the LSN array can change the space used. */
            shiftEntriesRight(index);

            updateMemorySize(computeLsnOverhead() - oldSize);
        } else {
            nEntries++;
        }

        if (isBINDelta()) {
            ((BIN)this).incFullBinNEntries();
        }

        int oldSize = computeLsnOverhead();

        if (data == null || databaseImpl.isDeferredWriteMode()) {
            setTarget(index, child);
        }

        setLsnInternal(index, childLsn);

        boolean multiSlotChange = insertKey(index, key, data);

        /*
         * Do this after calling insert key to overwrite whatever state changes
         * were done by the insertEntry() call.
         */
        entryStates[index] = state;

        if (data != null) {
            setEmbeddedLN(index);
            if (data.length == 0) {
                setNoDataLN(index);
            }
        }

        adjustCursorsForInsert(index);

        updateMemorySize(oldSize,
                         getEntryInMemorySize(index) +
                         computeLsnOverhead());

        if (multiSlotChange) {
            updateMemorySize(inMemorySize, computeMemorySize());
        }

        setDirty(true);

        assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());

        return (index | INSERT_SUCCESS);
    }

    /**
     * Removes the slot at index from this IN.  Assumes this node is already
     * latched by the caller.
     *
     * @param index The index of the entry to delete from the IN.
     */
    public void deleteEntry(int index) {
        deleteEntry(index, true /*makeDirty*/, true /*validate*/);
    }

    /**
     * Variant that allows specifying whether the IN is dirtied and whether
     * validation takes place. 'validate' should be false only in tests.
     *
     * See BIN.compress and INCompressor for a discussion about why slots can
     * be deleted without dirtying the BIN, and why the next delta is
     * prohibited when the slot is dirty.
     */
    void deleteEntry(int index, boolean makeDirty, boolean validate) {

        assert !isBINDelta();
        assert index >= 0 && index < nEntries;
        assert !validate || validateSubtreeBeforeDelete(index);

        if (makeDirty) {
            setDirty(true);
        }

        if (isDirty(index)) {
            setProhibitNextDelta(true);
        }

        Node child = getTarget(index);

        final OffHeapCache ohCache = getEnv().getOffHeapCache();
        final int level = getNormalizedLevel();
        if (level == 1) {
            ohCache.freeLN((BIN) this, index);
        } else if (level == 2) {
            ohCache.freeBIN((BIN) child, this, index);
        }

        if (child != null && child.isIN()) {
            IN childIN = (IN)child;
            getEnv().getInMemoryINs().remove(childIN);
        }

        updateMemorySize(getEntryInMemorySize(index), 0);
        int oldLSNArraySize = computeLsnOverhead();

        /*
         * Do the actual deletion. Note: setTarget() must be called before
         * copyEntries() so that the hasCachedChildrenFlag will be properly
         * maintained.
         */
        setTarget(index, null);
        copyEntries(index + 1, index, nEntries - index - 1);
        nEntries--;

        /* cleanup what used to be the last entry */
        clearEntry(nEntries);

        /* setLsnInternal can mutate to an array of longs. */
        updateMemorySize(oldLSNArraySize, computeLsnOverhead());

        assert(isBIN() || hasCachedChildrenFlag() == hasCachedChildren());

        /*
         * Note that we don't have to adjust cursors for delete, since
         * there should be nothing pointing at this record.
         */
        traceDelete(Level.FINEST, index);
    }

    /**
     * WARNING: clearEntry() calls entryTargets.set() directly, instead of
     * setTarget(). As a result, the hasCachedChildren flag of the IN is not
     * updated here. The caller is responsible for updating this flag, if
     * needed.
     */
    void clearEntry(int idx) {

        entryTargets = entryTargets.set(idx, null, this);
        entryKeys = entryKeys.set(idx, null, this);
        offHeapBINIds = offHeapBINIds.set(idx, 0, this);
        setLsnInternal(idx, DbLsn.NULL_LSN);
        entryStates[idx] = 0;
    }

    /**
     * This method is called after the idx'th child of this node gets logged,
     * and changes position as a result. 
     *
     * @param newLSN The new on-disk position of the child.
     *
     * @param newVLSN The VLSN of the logrec at the new position.
     * For LN children only.
     *
     * @param newSize The size of the logrec at the new position.
     * For LN children only.
     */
    public final void updateEntry(
        int idx,
        long newLSN,
        long newVLSN,
        int newSize) {

        setLsn(idx, newLSN);

        if (isBIN()) {
            if (isEmbeddedLN(idx)) {
                ((BIN)this).setCachedVLSN(idx, newVLSN);
            } else {
                setLastLoggedSize(idx, newSize);
            }
        }

        setDirty(true);
    }

    /**
     * This method is called only from BIN.applyDelta(). It applies the info
     * extracted from a delta slot to the corresponding slot in the full BIN.
     *
     * Unlike other update methods, the LSN may be NULL_LSN if the KD flag is
     * set. This allows applying a BIN-delta with a NULL_LSN and KD, for an
     * invisible log entry for example.
     *
     * No need to do memory counting in this method because the BIN is not
     * yet attached to the tree.
     */
    final void applyDeltaSlot(
        int idx,
        Node node,
        long lsn,
        int lastLoggedSize,
        byte state,
        byte[] key,
        byte[] data) {

        assert(isBIN());
        assert(!isBINDelta());
        assert(lsn != DbLsn.NULL_LSN ||
               (state & EntryStates.KNOWN_DELETED_BIT) != 0);
        assert(node == null || data == null);
        assert(!getInListResident());

        ((BIN) this).freeOffHeapLN(idx);

        setLsn(idx, lsn, false/*check*/);
        setLastLoggedSize(idx, lastLoggedSize);
        setTarget(idx, node);

        updateLNSlotKey(idx, key, data);

        assert(isEmbeddedLN(idx) == isEmbeddedLN(state));
        assert(isNoDataLN(idx) == isNoDataLN(state));

        entryStates[idx] = state;

        setDirty(true);
    }

    /**
     * Update the idx slot of this BIN to reflect a record insertion in an
     * existing KD slot. It is called from CursorImpl.insertRecordInternal(),
     * after logging the insertion op.
     *
     * @param newLN The LN associated with the new record.
     *
     * @param newLSN The LSN of the insertion logrec.
     *
     * @param newSize The size of the insertion logrec.
     *
     * @param newKey The value for the record's key. It is equal to the current
     * key value in the slot, but may not be identical to that value if a
     * custom comparator is used.
     *
     * @param newData If the record's data must be embedded in this BIN, "data"
     * stores the record's data. Null otherwise. See also comment for the
     * keyEntries field.
     */
    public final void insertRecord(
        int idx,
        LN newLN,
        long newLSN,
        int newSize,
        byte[] newKey,
        byte[] newData,
        int expiration,
        boolean expirationInHours) {

        assert(isBIN());

        final BIN bin = (BIN) this;

        bin.freeOffHeapLN(idx); // old version of the LN is stale

        long oldSlotSize = getEntryInMemorySize(idx);

        setLsn(idx, newLSN);

        boolean multiSlotChange = updateLNSlotKey(idx, newKey, newData);

        if (isEmbeddedLN(idx)) {

            clearLastLoggedSize(idx);

            bin.setCachedVLSN(idx, newLN.getVLSNSequence());

            if (databaseImpl.isDeferredWriteMode()) {
                setTarget(idx, newLN);
            }
        } else {
            setTarget(idx, newLN);
            setLastLoggedSize(idx, newSize);
        }

        bin.setExpiration(idx, expiration, expirationInHours);

        if (multiSlotChange) {
            updateMemorySize(inMemorySize, computeMemorySize());
        } else {
            long newSlotSize = getEntryInMemorySize(idx);
            updateMemorySize(oldSlotSize, newSlotSize);
        }

        clearKnownDeleted(idx);
        clearPendingDeleted(idx);
        setDirty(true);

        assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
    }

    /**
     * Update the idx slot of this BIN to reflect an update of the associated
     * record. It is called from CursorImpl.updateRecordInternal(), after
     * logging the update op.
     *
     * @param oldMemSize If the child LN was cached before the update op, it has
     * already been updated in-place by the caller. In this case, oldMemSize
     * stores the size of the child LN before the update, and it is used to do
     * memory counting. Otherwise oldMemSize is 0 and the newly created LN has
     * not been attached to the tree; it will be attached later by the caller,
     * if needed.
     *
     * @param newLSN The LSN of the update logrec.
     *
     * @param newVLSN The VLSN of the update logrec.
     *
     * @param newSize The on-disk size of the update logrec.
     *
     * @param newKey The new value for the record's key. It is equal to the
     * current value, but may not be identical to the current value if a
     * custom comparator is used. It may be null, if the caller knows for
     * sure that the key does not change.
     *
     * @param newData If the record's data must be embedded in this BIN, "data"
     * stores the record's data. Null otherwise. See also comment for the
     * keyEntries field.
     */
    public final void updateRecord(
        int idx,
        long oldMemSize,
        long newLSN,
        long newVLSN,
        int newSize,
        byte[] newKey,
        byte[] newData,
        int expiration,
        boolean expirationInHours) {

        assert(isBIN());

        final BIN bin = (BIN) this;

        bin.freeOffHeapLN(idx); // old version of the LN is stale

        long oldSlotSize = getEntryInMemorySize(idx);

        setLsn(idx, newLSN);

        boolean multiSlotChange = updateLNSlotKey(idx, newKey, newData);

        if (isEmbeddedLN(idx)) {
            clearLastLoggedSize(idx);
            ((BIN)this).setCachedVLSN(idx, newVLSN);
        } else {
            setLastLoggedSize(idx, newSize);
        }

        bin.setExpiration(idx, expiration, expirationInHours);

        if (multiSlotChange) {
            updateMemorySize(inMemorySize, computeMemorySize());
        } else {
            /* Update mem size for key change. */
            long newSlotSize = getEntryInMemorySize(idx);
            updateMemorySize(oldSlotSize, newSlotSize);

            /* Update mem size for node change. */
            Node newLN = entryTargets.get(idx);
            long newMemSize =
                (newLN != null ? newLN.getMemorySizeIncludedByParent() : 0);
            updateMemorySize(oldMemSize, newMemSize);
        }

        setDirty(true);
    }

    /**
     * Update the idx slot slot of this BIN to reflect a deletion of the 
     * associated record. It is called from CursorImpl.deleteCurrentRecord(),
     * after logging the deletion op.
     *
     * @param oldMemSize If the child LN was cached before the deletion, it
     * has already been updated in-place by the caller (the ln contents have
     * been deleted). In this case, oldMemSize stores the in-memory size of
     * the child LN before the update, and it is used to do memory counting.
     * Otherwise oldMemSize is 0 and the newly created LN has not been attached
     * to the tree; it will be attached later by the caller, if needed.
     *
     * @param newLSN The LSN of the deletion logrec.
     *
     * @param newVLSN The VLSN of the deletion logrec.
     *
     * @param newSize The on-disk size of the deletion logrec.
     */
    public final void deleteRecord(
        int idx,
        long oldMemSize,
        long newLSN,
        long newVLSN,
        int newSize) {

        assert(isBIN());

        final BIN bin = (BIN) this;

        bin.freeOffHeapLN(idx); // old version of the LN is stale

        setLsn(idx, newLSN);

        if (isEmbeddedLN(idx)) {
            clearLastLoggedSize(idx);
            bin.setCachedVLSN(idx, newVLSN);
        } else {
            setLastLoggedSize(idx, newSize);
        }

        if (entryTargets.get(idx) != null) {
            /* Update mem size for node change. */
            assert(oldMemSize != 0);
            Node newLN = entryTargets.get(idx);
            long newMemSize = newLN.getMemorySizeIncludedByParent();
            updateMemorySize(oldMemSize, newMemSize);
        } else {
            assert(oldMemSize == 0);
        }

        setPendingDeleted(idx);
        setDirty(true);
    }

    /**
     * This method is used by the RecoveryManager to change the current version
     * of a record, either to a later version (in case of redo), or to an 
     * earlier version (in case of undo). The current version may or may not be
     * cached as a child LN of this BIN (it may be only in case of txn abort
     * during normal processing). If it is, it is evicted. The new version is
     * not attached to the in-memory tree, to save memory during crash
     * recovery.
     *
     * @param idx The BIN slot for the record.
     *
     * @param lsn The LSN of the new record version. It may be null in case of
     * undo, if the logrec that is being undone is an insertion and the record
     * did not exist at all in the DB before that insertion.
     *
     * @param knownDeleted True if the new version is a committed deletion.
     *
     * @param pendingDeleted True if the new version is a deletion, which 
     * may or may not be committed.
     *
     * @param key The key of the new version. It is null only if we are undoing
     * and the revert-to version was not embedded (in this case the key of the
     * revert-to version is not stored in the logrec). If it is null and the
     * DB allows key updates, the new record version is fetched from disk to
     * retrieve its key, so that the key values stored in the BIN slots are
     * always transactionally correct.
     *
     * @param data The data of the new version. It is non-null if and only if
     * the new version must be embedded in the BIN.
     *
     * @param vlsn The VLSN of the new version.
     *
     * @param logrecSize The on-disk size of the logrec corresponding to the
     * new version. It may be 0 (i.e. unknown) in case of undo. 
     */
    public final void recoverRecord(
        int idx,
        long lsn,
        boolean knownDeleted,
        boolean pendingDeleted,
        byte[] key,
        byte[] data,
        long vlsn,
        int logrecSize,
        int expiration,
        boolean expirationInHours) {

        assert(isBIN());

        BIN bin = (BIN) this;

        bin.freeOffHeapLN(idx); // old version of the LN is stale

        if (lsn == DbLsn.NULL_LSN) {

            /*
             * A NULL lsn means that we are undoing an insertion that was done
             * without slot reuse. To undo such an insertion we evict the 
             * current version (it may cached only in case of normal txn abort)
             * and set the KD flag in the slot. We also set the LSN to null to
             * ensure that the slot does not point to a logrec that does not
             * reflect the slot's current state. The slot can then be put on
             * the compressor for complete removal.
             */
            setKnownDeletedAndEvictLN(idx);

            setLsnInternal(idx, DbLsn.NULL_LSN);

            bin.queueSlotDeletion(idx);

            return;
        }

        if (key == null &&
            databaseImpl.allowsKeyUpdates() &&
            !knownDeleted) {

            try {
                WholeEntry wholeEntry = getEnv().getLogManager().
                    getLogEntryAllowInvisibleAtRecovery(
                        lsn, getLastLoggedSize(idx));

                LNLogEntry logrec = (LNLogEntry) wholeEntry.getEntry();
                logrec.postFetchInit(getDatabase());

                key = logrec.getKey();
                logrecSize = wholeEntry.getHeader().getEntrySize();

            } catch (FileNotFoundException e) {
                throw new EnvironmentFailureException(
                    getEnv(), EnvironmentFailureReason.LOG_FILE_NOT_FOUND,
                    makeFetchErrorMsg(null, lsn, idx), e);

            } catch (ErasedException e) {
                throw new EnvironmentFailureException(
                    getEnv(), EnvironmentFailureReason.LOG_CHECKSUM,
                    "LN is erased unexpectedly, implied corruption. " +
                    makeFetchErrorMsg(null, lsn, idx), e);
            }
        }

        long oldSlotSize = getEntryInMemorySize(idx);

        setLsn(idx, lsn);
        setTarget(idx, null);

        boolean multiSlotChange = updateLNSlotKey(idx, key, data);

        if (isEmbeddedLN(idx)) {
            clearLastLoggedSize(idx);
            bin.setCachedVLSN(idx, vlsn);
        } else {
            setLastLoggedSize(idx, logrecSize);
        }

        if (knownDeleted) {
            assert(!pendingDeleted);
            setKnownDeleted(idx);
            bin.queueSlotDeletion(idx);
        } else {
            clearKnownDeleted(idx);
            if (pendingDeleted) {
                setPendingDeleted(idx);
                bin.queueSlotDeletion(idx);
            } else {
                clearPendingDeleted(idx);
            }
        }

        bin.setExpiration(idx, expiration, expirationInHours);

        if (multiSlotChange) {
            updateMemorySize(inMemorySize, computeMemorySize());
        } else {
            long newSlotSize = getEntryInMemorySize(idx);
            updateMemorySize(oldSlotSize, newSlotSize);
        }

        setDirty(true);
    }

    /**
     * Update the cached-child and LSN properties of the idx-th slot. This
     * method is used by the RecoveryManager.recoverChildIN() to change the
     * version of a child IN, a later version The child IN may or may not be
     * already attached to the tree.
     */
    public final void recoverIN(
        int idx,
        Node node,
        long lsn,
        int lastLoggedSize) {

        long oldSlotSize = getEntryInMemorySize(idx);

        /*
         * If we are about to detach a cached child IN, make sure that it is
         * not in the INList. This is correct, because this method is called
         * during the recovery phase where the INList is disabled,
         */
        Node child = getTarget(idx);
        assert(child == null ||
               !((IN)child).getInListResident() ||
               child == node/* this is needed by a unit test*/);

        setLsn(idx, lsn);
        setLastLoggedSize(idx, lastLoggedSize);
        setTarget(idx, node);

        long newSlotSize = getEntryInMemorySize(idx);
        updateMemorySize(oldSlotSize, newSlotSize);

        setDirty(true);

        assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
    }

    /**
     * Attach the given node as the idx-th child of "this" node. If the child
     * node is an LN, update the key of the parent slot to the given key value,
     * if that value is non-null and an update is indeed necessary.
     *
     * This method is called after the child node has been either (a) fetched
     * in from disk and is not dirty, or (b) is a newly created instance that
     * will be written out later by something like a checkpoint. In either
     * case, the slot LSN does not need to be updated.
     *
     * Note: does not dirty the node unless the LN slot key is changed.
     */
    public final void attachNode(int idx, Node node, byte[] newKey) {

        assert !(node instanceof IN) || ((IN) node).isLatchExclusiveOwner();

        long oldSlotSize = getEntryInMemorySize(idx);

        /* Make sure we are not using this method to detach a cached child */
        assert(getTarget(idx) == null);

        setTarget(idx, node);

        boolean multiSlotChange = false;

        if (isBIN() && newKey != null) {
            assert(!haveEmbeddedData(idx));
            multiSlotChange = updateLNSlotKey(idx, newKey, null);
        }

        if (multiSlotChange) {
            updateMemorySize(inMemorySize, computeMemorySize());
        } else {
            long newSlotSize = getEntryInMemorySize(idx);
            updateMemorySize(oldSlotSize, newSlotSize);
        }

        assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
    }

    /*
     * Detach from the tree the child node at the idx-th slot.
     *
     * The most common caller of this method is the evictor. If the child
     * being evicted was dirty, it has just been logged and the lsn of the
     * slot must be updated.
     */
    public final void detachNode(int idx, boolean updateLsn, long newLsn) {

        long oldSlotSize = getEntryInMemorySize(idx);

        Node child = getTarget(idx);

        if (updateLsn) {
            setLsn(idx, newLsn);
            setDirty(true);
        }
        setTarget(idx, null);

        long newSlotSize = getEntryInMemorySize(idx);
        updateMemorySize(oldSlotSize, newSlotSize);

        if (child != null && child.isIN()) {
            getEnv().getInMemoryINs().remove((IN) child);
        }

        assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
    }

    /**
     * This method is used in DupConvert, where it is called to convert the
     * keys of an upper IN that has just been fetched from the log and is not
     * attached to in-memory tree yet.
     */
    public final void convertKey(int idx, byte[] newKey) {

        long oldSlotSize = getEntryInMemorySize(idx);

        boolean multiSlotChange = updateKey(idx, newKey, null);

        if (multiSlotChange) {
            updateMemorySize(inMemorySize, computeMemorySize());
        } else {
            long newSlotSize = getEntryInMemorySize(idx);
            updateMemorySize(oldSlotSize, newSlotSize);
        }

        setDirty(true);

        assert(isBIN() || hasCachedChildren() == hasCachedChildrenFlag());
    }

    void copyEntries(final int from, final int to, final int n) {

        entryTargets = entryTargets.copy(from, to, n, this);
        entryKeys = entryKeys.copy(from, to, n, this);
        offHeapBINIds = offHeapBINIds.copy(from, to, n, this);

        System.arraycopy(entryStates, from, entryStates, to, n);

        if (entryLsnLongArray == null) {
            final int fromOff = from << 2;
            final int toOff = to << 2;
            final int nBytes = n << 2;
            System.arraycopy(entryLsnByteArray, fromOff,
                entryLsnByteArray, toOff, nBytes);
        } else {
            System.arraycopy(entryLsnLongArray, from,
                entryLsnLongArray, to,
                n);
        }
    }

    /**
     * Return true if this node needs splitting.  For the moment, needing to be
     * split is defined by there being no free entries available.
     */
    public final boolean needsSplitting() {

        if (isBINDelta()) {
            BIN bin = (BIN)this;
            int fullBinNEntries = bin.getFullBinNEntries();
            int fullBinMaxEntries = bin.getFullBinMaxEntries();

            if (fullBinNEntries < 0) {
                /* fullBinNEntries is unknown in logVersions < 10 */
                mutateToFullBIN(false /*leaveFreeSlot*/);
            } else {
                assert(fullBinNEntries > 0);
                return ((fullBinMaxEntries - fullBinNEntries) < 1);
            }
        }

        return ((getMaxEntries() - nEntries) < 1);
    }

    /**
     * Split this into two nodes.  Parent IN is passed in parent and should be
     * latched by the caller.
     *
     * childIndex is the index in parent of where "this" can be found.
     */
    public final IN split(
        IN parent,
        int childIndex,
        IN grandParent,
        int maxEntries) {

        return splitInternal(parent, childIndex, grandParent, maxEntries, -1);
    }

    /**
     * Called when we know we are about to split on behalf of a key that is the
     * minimum (leftSide) or maximum (!leftSide) of this node.  This is
     * achieved by just forcing the split to occur either one element in from
     * the left or the right (i.e. splitIndex is 1 or nEntries - 1).
     */
    IN splitSpecial(
        IN parent,
        int parentIndex,
        IN grandParent,
        int maxEntriesPerNode,
        byte[] key,
        boolean leftSide) {

        int index = findEntry(key, false, false);

        if (leftSide && index == 0) {
            return splitInternal(
                parent, parentIndex, grandParent, maxEntriesPerNode, 1);

        } else if (!leftSide && index == (nEntries - 1)) {
            return splitInternal(
                parent, parentIndex, grandParent, maxEntriesPerNode,
                nEntries - 1);

        } else {
            return split(
                parent, parentIndex, grandParent, maxEntriesPerNode);
        }
    }

    final IN splitInternal(
        final IN parent,
        final int childIndex,
        final IN grandParent,
        final int maxEntries,
        int splitIndex)
        throws DatabaseException {

        assert(!isBINDelta());

        /*
         * Find the index of the existing identifierKey so we know which IN
         * (new or old) to put it in.
         */
        if (identifierKey == null) {
            throw unexpectedState();
        }

        final int idKeyIndex = findEntry(identifierKey, false, false);

        if (splitIndex < 0) {
            splitIndex = nEntries / 2;
        }

        /* Range of entries to copy to new sibling. */
        final int low, high;

        if (idKeyIndex < splitIndex) {

            /*
             * Current node (this) keeps left half entries.  Right half entries
             * will go in the new node.
             */
            low = splitIndex;
            high = nEntries;
        } else {

            /*
             * Current node (this) keeps right half entries.  Left half entries
             * will go in the new node.
             */
            low = 0;
            high = splitIndex;
        }

        final byte[] newIdKey = getKey(low);
        long parentLsn;

        /*
         * Ensure that max entries is large enough to hold the slots being
         * moved to the new sibling, with one spare slot for insertions. This
         * is important when the maxEntries param is less than nEntries in this
         * node, which can occur when the user reduces the fanout or when this
         * node has temporarily grown beyond its original fanout.
         */
        final IN newSibling = createNewInstance(
            newIdKey,
            Math.max(maxEntries, high - low + 1),
            level);

        newSibling.latch(CacheMode.UNCHANGED);

        try {
            boolean addedNewSiblingToCompressorQueue = false;
            final int newSiblingNEntries = (high - low);
            final boolean haveCachedChildren = hasCachedChildrenFlag();

            assert(isBIN() || haveCachedChildren == hasCachedChildren());

            final BIN bin = isBIN() ? (BIN) this : null;

            /**
             * Distribute entries among the split node and the new sibling.
             */
            for (int i = low; i < high; i++) {

                if (!addedNewSiblingToCompressorQueue &&
                    bin != null &&
                    bin.isDefunct(i)) {

                    addedNewSiblingToCompressorQueue = true;
                    getEnv().addToCompressorQueue((BIN) newSibling);
                }

                newSibling.appendEntryFromOtherNode(this, i);
                clearEntry(i);
            }

            if (low == 0) {
                shiftEntriesLeft(newSiblingNEntries);
            }

            nEntries -= newSiblingNEntries;
            setDirty(true);

            if (isUpperIN() && haveCachedChildren) {
                setHasCachedChildrenFlag(hasCachedChildren());
            }

            assert(isBIN() ||
                   hasCachedChildrenFlag() == hasCachedChildren());
            assert(isBIN() ||
                   newSibling.hasCachedChildrenFlag() ==
                   newSibling.hasCachedChildren());

            adjustCursors(newSibling, low, high);

            /*
             * If this node has no key prefix, calculate it now that it has
             * been split.  This must be done before logging, to ensure the
             * prefix information is made persistent [#20799].
             */
            byte[] newKeyPrefix = computeKeyPrefix(-1);
            recalcSuffixes(newKeyPrefix, null, null, -1);

            /* Apply compaction after prefixing [#20799]. */
            entryKeys = entryKeys.compact(this);

            /* Only recalc if there are multiple entries in newSibling. */
            if (newSibling.getNEntries() > 1) {
                byte[] newSiblingPrefix = newSibling.computeKeyPrefix(-1);
                newSibling.recalcSuffixes(newSiblingPrefix, null, null, -1);
                /* initMemorySize calls entryKeys.compact. */
                newSibling.initMemorySize();
            }

            assert idKeyIsSlotKey();
            assert newSibling.idKeyIsSlotKey();

            /*
             * Update size. newSibling and parent are correct, but this IN has
             * had its entries shifted and is not correct.
             *
             * Also, inMemorySize does not reflect changes that may have
             * resulted from key prefixing related changes, it needs to be
             * brought up to date, so update it appropriately for this and the
             * above reason.
             */
            EnvironmentImpl env = getEnv();
            INList inMemoryINs = env.getInMemoryINs();
            long oldMemorySize = inMemorySize;
            long newSize = computeMemorySize();
            updateMemorySize(oldMemorySize, newSize);

            /*
             * Parent refers to child through an element of the entries array.
             * Depending on which half of the BIN we copied keys from, we
             * either have to adjust one pointer and add a new one, or we have
             * to just add a new pointer to the new sibling.
             *
             * We must use the provisional logging for two reasons:
             *
             *   1) All three log entries must be read atomically. The parent
             *   must get logged last, as all referred-to children must precede
             *   it. Provisional entries guarantee that all three are processed
             *   as a unit. Recovery skips provisional entries, so the changed
             *   children are only used if the parent makes it out to the log.
             *
             *   2) We log all they way to the root to avoid the "great aunt"
             *   problem (see RecoveryManager.buildINs), and provisional
             *   logging is necessary during a checkpoint for levels less than
             *   maxFlushLevel.
             *
             * We prohibit compression during logging because there should be
             * at least one entry in each IN. Note the use of getKey(0) below.
             */
            long newSiblingLsn =
                newSibling.optionalLogProvisionalNoCompress(parent);

            long myNewLsn = optionalLogProvisionalNoCompress(parent);

            assert nEntries > 0;

            /*
             * When we update the parent entry, we make sure that we don't
             * replace the parent's key that points at 'this' with a key that
             * is > than the existing one.  Replacing the parent's key with
             * something > would effectively render a piece of the subtree
             * inaccessible.  So only replace the parent key with something
             * <= the existing one.  See tree/SplitTest.java for more details
             * on the scenario.
             */
            if (low == 0) {

                /*
                 * Change the original entry to point to the new child and add
                 * an entry to point to the newly logged version of this
                 * existing child.
                 */
                parent.prepareForSlotReuse(childIndex);

                parent.updateSplitSlot(
                    childIndex, newSibling, newSiblingLsn, newIdKey);

                boolean inserted = parent.insertEntry(
                    this, getKey(0), myNewLsn);
                assert inserted;
            } else {

                /*
                 * Update the existing child's LSN to reflect the newly logged
                 * version and insert new child into parent.
                 */
                parent.updateSplitSlot(childIndex, this, myNewLsn, getKey(0));

                boolean inserted = parent.insertEntry(
                    newSibling, newIdKey, newSiblingLsn);
                assert inserted;
            }

            inMemoryINs.add(newSibling);

            /*
             * Log the parent. Note that the root slot or grandparent slot is
             * not updated with the parent's LSN here; this is done by
             * Tree.forceSplit.
             */
            if (parent.isRoot()) {
                parentLsn = parent.optionalLog();
            } else {
                parentLsn = parent.optionalLogProvisional(grandParent);
            }

            /* Coordinate the split with an in-progress checkpoint. */
            env.getCheckpointer().coordinateSplitWithCheckpoint(newSibling);

            /*
             * Check whether either the old or the new sibling must be added
             * to the LRU (priority-1 LRUSet).
             */
            assert(!isDIN() && !isDBIN());

            if(isBIN() || !newSibling.hasCachedChildrenFlag()) {
                if (isUpperIN() && traceLRU) {
                    LoggerUtils.envLogMsg(
                        traceLevel, getEnv(),
                        "split-newSibling " +
                            Thread.currentThread().getId() + "-" +
                            Thread.currentThread().getName() +
                            "-" + getEnv().getName() +
                            " Adding UIN to LRU: " +
                            newSibling.getNodeId());
                }
                getEvictor().addBack(newSibling);
            }

            if (isUpperIN() &&
                haveCachedChildren &&
                !hasCachedChildrenFlag()) {
                if (traceLRU) {
                    LoggerUtils.envLogMsg(
                        traceLevel, getEnv(),
                        "split-oldSibling " +
                        Thread.currentThread().getId() + "-" +
                        Thread.currentThread().getName() +
                        "-" + getEnv().getName() +
                        " Adding UIN to LRU: " + getNodeId());
                }
                getEvictor().addBack(this);
            }

            /* Debug log this information. */
            traceSplit(Level.FINE, parent,
                       newSibling, parentLsn, myNewLsn,
                       newSiblingLsn, splitIndex, idKeyIndex, childIndex);
        } finally {
            newSibling.releaseLatch();
        }

        return newSibling;
    }

    /**
     * Used for moving entries between BINs during splits.
     */
    void appendEntryFromOtherNode(IN from, int fromIdx) {

        assert(!isBINDelta());

        final Node target = from.entryTargets.get(fromIdx);
        final int ohBinId = from.getOffHeapBINId(fromIdx);
        final boolean ohBinPri2 = from.isOffHeapBINPri2(fromIdx);
        final boolean ohBinDirty = from.isOffHeapBINDirty(fromIdx);
        final long lsn = from.getLsn(fromIdx);
        final byte state = from.entryStates[fromIdx];
        final byte[] key = from.getKey(fromIdx);
        final byte[] data = (from.haveEmbeddedData(fromIdx) ?
                             from.getData(fromIdx) : null);

        long oldSize = computeLsnOverhead();

        ++nEntries;

        int idx = nEntries - 1;

        /*
         * When calling setTarget for an IN child we must latch it, because
         * setTarget sets the parent.
         */
        if (target != null && target.isIN()) {
            final IN in = (IN) target;
            in.latchNoUpdateLRU(databaseImpl);
            setTarget(idx, target);
            in.releaseLatch();
        } else {
            setTarget(idx, target);
        }

        boolean multiSlotChange = insertKey(idx, key, data);

        /* setLsnInternal can mutate to an array of longs. */
        setLsnInternal(idx, lsn);

        entryStates[idx] = state;

        if (ohBinId >= 0) {
            setOffHeapBINId(idx, ohBinId, ohBinPri2, ohBinDirty);
            getOffHeapCache().setOwner(ohBinId, this);
        }

        if (multiSlotChange) {
            updateMemorySize(inMemorySize, computeMemorySize());
        } else {
            long newSize = getEntryInMemorySize(idx) + computeLsnOverhead();
            updateMemorySize(oldSize, newSize);
        }

        setDirty(true);
    }

    /**
     * Update a slot that is being split. The slot to be updated here is the
     * one that existed before the split.
     *
     * @param child The new child to be placed under the slot. May be the
     * newly created sibling or the pre-existing sibling.
     * @param lsn The new lsn of the child (the child was logged just before
     * calling this method, so its slot lsn must be updated)
     * @param key The new key for the slot. We should not actually update the
     * slot key, because its value is the lower bound of the key range covered
     * by the slot, and this lower bound does not change as a result of the
     * split (the new slot created as a result of the split is placed to the
     * right of the pre-existing slot). There is however one exception: the
     * key can be updated if "idx" is the 0-slot. The 0-slot key is not a true
     * lower bound; the actual lower bound for the 0-slot is the key in the
     * parent slot for this IN. So, in this case, if the given key is less
     * than the current one, it is better to update the key in order to better
     * approximate the real lower bound (and thus make the isKeyInBounds()
     * method more effective).
     */
    private void updateSplitSlot(
        int idx,
        IN child,
        long lsn,
        byte[] key) {

        assert(isUpperIN());

        long oldSize = getEntryInMemorySize(idx);

        setLsn(idx, lsn);
        setTarget(idx, child);

        if (idx == 0) {
            int s = entryKeys.compareKeys(
                key, keyPrefix, idx, haveEmbeddedData(idx),
                getKeyComparator());

            boolean multiSlotChange = false;
            if (s < 0) {
                multiSlotChange = updateKey(idx, key, null/*data*/);
            }

            if (multiSlotChange) {
                updateMemorySize(inMemorySize, computeMemorySize());
            } else {
                long newSize = getEntryInMemorySize(idx);
                updateMemorySize(oldSize, newSize);
            }
        } else {
            long newSize = getEntryInMemorySize(idx);
            updateMemorySize(oldSize, newSize);
        }

        setDirty(true);

        assert(hasCachedChildren() == hasCachedChildrenFlag());
    }

    /**
     * Shift entries to the right by one position, starting with (and
     * including) the entry at index. Increment nEntries by 1. Called
     * in insertEntry1()
     *
     * @param index - The position to start shifting from.
     */
    private void shiftEntriesRight(int index) {
        copyEntries(index, index + 1, nEntries - index);
        clearEntry(index);
        nEntries++;
        setDirty(true);
    }

    /**
     * Shift entries starting at the byHowMuch'th element to the left, thus
     * removing the first byHowMuch'th elements of the entries array.  This
     * always starts at the 0th entry. Caller is responsible for decrementing
     * nEntries.
     *
     * @param byHowMuch - The number of entries to remove from the left side
     * of the entries array.
     */
    private void shiftEntriesLeft(int byHowMuch) {
        copyEntries(byHowMuch, 0, nEntries - byHowMuch);
        for (int i = nEntries - byHowMuch; i < nEntries; i++) {
            clearEntry(i);
        }
        setDirty(true);
    }

    void adjustCursors(
        IN newSibling,
        int newSiblingLow,
        int newSiblingHigh) {
        /* Cursors never refer to IN's. */
    }

    void adjustCursorsForInsert(int insertIndex) {
        /* Cursors never refer to IN's. */
    }

    /**
     * Called prior to changing a slot to contain a different logical node.
     *
     * Necessary to support assertions for transient LSNs in shouldUpdateLsn.
     * Examples: LN slot reuse, and splits where a new node is placed in an
     * existing slot.
     *
     * Also needed to free the off-heap BIN associated with the old node.
     *
     * TODO: This method is no longer used for LN slot reuse, and freeing of
     * the off-heap BIN could be done by the only caller, splitInternal, and
     * then this method could be removed.
     */
    public void prepareForSlotReuse(int idx) {

        if (databaseImpl.isDeferredWriteMode()) {
            setLsn(idx, DbLsn.NULL_LSN, false/*check*/);
        }

        final OffHeapCache ohCache = getOffHeapCache();
        if (ohCache.isEnabled() && getNormalizedLevel() == 2) {
            ohCache.freeBIN((BIN) getTarget(idx), this, idx);
        }
    }

    /*
     * Get the current memory consumption of this node
     */
    public long getInMemorySize() {
        return inMemorySize;
    }

    /**
     * Compute the current memory consumption of this node, after putting
     * its keys in their compact representation, if possible.
     */
    private void initMemorySize() {
        entryKeys = entryKeys.compact(this);
        inMemorySize = computeMemorySize();
    }

    /**
     * Count up the memory usage attributable to this node alone. LNs children
     * are counted by their BIN parents, but INs are not counted by their
     * parents because they are resident on the IN list.  The identifierKey is
     * "intentionally" not kept track of in the memory budget.
     */
    public long computeMemorySize() {

        long calcMemorySize = getFixedMemoryOverhead();

        calcMemorySize += MemoryBudget.byteArraySize(entryStates.length);

        calcMemorySize += computeLsnOverhead();

        for (int i = 0; i < nEntries; i++) {
            calcMemorySize += getEntryInMemorySize(i);
        }

        if (keyPrefix != null) {
            calcMemorySize += MemoryBudget.byteArraySize(keyPrefix.length);
        }

        if (provisionalObsolete != null) {
            calcMemorySize += provisionalObsolete.getMemorySize();
        }

        calcMemorySize += entryTargets.calculateMemorySize();
        calcMemorySize += entryKeys.calculateMemorySize();

        if (offHeapBINIds != null) {
            calcMemorySize += offHeapBINIds.getMemorySize();
        }

        return calcMemorySize;
    }

    /*
     * Overridden by subclasses.
     */
    protected long getFixedMemoryOverhead() {
        return MemoryBudget.IN_FIXED_OVERHEAD;
    }

    /*
     * Compute the memory consumption for storing this node's LSNs
     */
    private int computeLsnOverhead() {
        return (entryLsnLongArray == null) ?
            MemoryBudget.byteArraySize(entryLsnByteArray.length) :
            MemoryBudget.ARRAY_OVERHEAD +
                (entryLsnLongArray.length *
                 MemoryBudget.PRIMITIVE_LONG_ARRAY_ITEM_OVERHEAD);
    }

    private long getEntryInMemorySize(int idx) {

        /*
         * Do not count state size here, since it is counted as overhead
         * during initialization.
         */
        long ret = 0;

        /*
         * Don't count the key size if the representation has already
         * accounted for it.
         */
        if (!entryKeys.accountsForKeyByteMemUsage()) {

            /*
             * Materialize the key object only if needed, thus avoiding the
             * object allocation cost when possible.
             */
            final byte[] key = entryKeys.get(idx);
            if (key != null) {
                ret += MemoryBudget.byteArraySize(key.length);
            }
        }

        final Node target = entryTargets.get(idx);
        if (target != null) {
            ret += target.getMemorySizeIncludedByParent();
        }
        return ret;
    }

    /**
     * Compacts the representation of the IN, if possible.
     *
     * Called by the evictor to reduce memory usage. Should not be called too
     * often (e.g., every CRUD operation), since this could cause lots of
     * memory allocations as the representations contract and expend, resulting
     * in expensive GC.
     *
     * @return number of bytes reclaimed.
     */
    public long compactMemory() {

        final long oldSize = inMemorySize;
        final INKeyRep oldKeyRep = entryKeys;

        entryTargets = entryTargets.compact(this);
        entryKeys = entryKeys.compact(this);
        offHeapBINIds = offHeapBINIds.compact(this, EMPTY_OFFHEAP_BIN_IDS);

        /*
         * Note that we only need to account for mem usage changes in the key
         * rep here, not the target rep.  The target rep, unlike the key rep,
         * updates its mem usage internally, and the responsibility for mem
         * usage of contained nodes is fixed -- it is always managed by the IN.
         *
         * When the key rep changes, the accountsForKeyByteMemUsage property
         * also changes. Recalc the size of the entire IN, because
         * responsibility for managing contained key byte mem usage has shifted
         * between the key rep and the IN parent.
         */
        if (entryKeys != oldKeyRep) {
            updateMemorySize(inMemorySize, computeMemorySize());
        }

        return oldSize - inMemorySize;
    }

    /**
     * Returns the amount of memory currently budgeted for this IN.
     */
    public long getBudgetedMemorySize() {
        return inMemorySize - accumulatedDelta;
    }

    /**
     * Called as part of a memory budget reset (during a checkpoint) to clear
     * the accumulated delta and return the total memory size.
     */
    public long resetAndGetMemorySize() {
        accumulatedDelta = 0;
        return inMemorySize;
    }

    protected void updateMemorySize(long oldSize, long newSize) {
        long delta = newSize - oldSize;
        updateMemorySize(delta);
    }

    /*
     * Called when a cached child is replaced by another cached child.
     */
    void updateMemorySize(Node oldNode, Node newNode) {
        long delta = 0;
        if (newNode != null) {
            delta = newNode.getMemorySizeIncludedByParent();
        }

        if (oldNode != null) {
            delta -= oldNode.getMemorySizeIncludedByParent();
        }
        updateMemorySize(delta);
    }

    /*
     * Change this.onMemorySize by the given delta and update the memory
     * budget for the cache, but only if the accummulated delta for this
     * node exceeds the ACCUMULATED_LIMIT threshold and this IN is actually
     * on the IN list. (For example, when we create new INs, they are
     * manipulated off the IN list before being added; if we updated the
     * environment wide cache then, we'd end up double counting.)
     */
    void updateMemorySize(long delta) {

        if (delta == 0) {
            return;
        }

        inMemorySize += delta;

        if (getInListResident()) {

            /*
             * This assertion is disabled if the environment is invalid to
             * avoid spurious assertions during testing of IO errors.  If the
             * environment is invalid, memory budgeting errors are irrelevant.
             * [#21929]
             */
            assert
                inMemorySize >= getFixedMemoryOverhead() ||
                !getEnv().isValid():
                "delta: " + delta + " inMemorySize: " + inMemorySize +
                " overhead: " + getFixedMemoryOverhead() +
                " computed: " + computeMemorySize() +
                " dump: " + toString() + assertPrintMemorySize();

            accumulatedDelta += delta;
            if (accumulatedDelta > ACCUMULATED_LIMIT ||
                accumulatedDelta < -ACCUMULATED_LIMIT) {
                updateMemoryBudget();
            }
        }
    }

    /**
     * Move the accumulated delta to the memory budget.
     */
    public void updateMemoryBudget() {
        final EnvironmentImpl env = getEnv();
        env.getInMemoryINs().memRecalcUpdate(this, accumulatedDelta);
        env.getMemoryBudget().updateTreeMemoryUsage(accumulatedDelta);
        accumulatedDelta = 0;
    }

    /*
     *  Utility method used during unit testing.
     */
    protected long printMemorySize() {

        final long inOverhead = getFixedMemoryOverhead();

        final long statesOverhead =
            MemoryBudget.byteArraySize(entryStates.length);

        final int lsnOverhead =  computeLsnOverhead();

        int entryOverhead = 0;
        for (int i = 0; i < nEntries; i++) {
            entryOverhead += getEntryInMemorySize(i);
        }

        final int keyPrefixOverhead =  (keyPrefix != null) ?
            MemoryBudget.byteArraySize(keyPrefix.length) : 0;

        final int provisionalOverhead = (provisionalObsolete != null) ?
            provisionalObsolete.getMemorySize() : 0;

        final long targetRepOverhead = entryTargets.calculateMemorySize();
        final long keyRepOverhead = entryKeys.calculateMemorySize();
        final long total = inOverhead + statesOverhead + lsnOverhead +
             entryOverhead + keyPrefixOverhead +  provisionalOverhead +
             targetRepOverhead + keyRepOverhead;

        final long offHeapBINIdOverhead = offHeapBINIds.getMemorySize();

        System.out.println(" nEntries:" + nEntries +
                           "/" + entryStates.length +
                           " in: " + inOverhead +
                           " states: " + statesOverhead +
                           " entry: " + entryOverhead +
                           " lsn: " + lsnOverhead +
                           " keyPrefix: " + keyPrefixOverhead +
                           " provisional: " + provisionalOverhead +
                           " targetRep(" + entryTargets.getType() + "): " +
                           targetRepOverhead +
                           " keyRep(" + entryKeys.getType() +"): " +
                           keyRepOverhead +
                           " offHeapBINIds: " + offHeapBINIdOverhead +
                           " Total: " + total +
                           " inMemorySize: " + inMemorySize);
        return total;
    }

    /* Utility method used to print memory size in an assertion. */
    private boolean assertPrintMemorySize() {
        printMemorySize();
        return true;
    }

    public boolean verifyMemorySize() {

        long calcMemorySize = computeMemorySize();
        if (calcMemorySize != inMemorySize) {

            String msg = "-Warning: Out of sync. Should be " +
                calcMemorySize + " / actual: " + inMemorySize +
                " node: " + getNodeId();
            LoggerUtils.envLogMsg(Level.INFO, getEnv(), msg);

            System.out.println(msg);
            printMemorySize();

            return false;
        }
        return true;
    }

    /**
     * Adds (increments) or removes (decrements) the cache stats for the key
     * and target representations.  Used when rep objects are being replaced
     * with a new instance, rather than by calling their mutator methods.
     * Specifically, it is called when mutating from full bin to bin delta
     * or vice-versa.
     */
    protected void updateRepCacheStats(boolean increment) {
        assert(isBIN());
        entryKeys.updateCacheStats(increment, this);
        entryTargets.updateCacheStats(increment, this);
    }

    protected int getCompactMaxKeyLength() {
        return getEnv().getCompactMaxKeyLength();
    }

    /**
     * Called when adding/removing this IN to/from the INList.
     */
    public void setInListResident(boolean resident) {

        if (!resident) {
            /* Decrement the stats before clearing its residency */
            entryTargets.updateCacheStats(false, this);
            entryKeys.updateCacheStats(false, this);
        }

        if (resident) {
            flags |= IN_RESIDENT_BIT;
        } else {
            flags &= ~IN_RESIDENT_BIT;
        }

        if (resident) {
            /* Increment the stats after setting its residency. */
            entryTargets.updateCacheStats(true, this);
            entryKeys.updateCacheStats(true, this);
        }
    }

    /**
     * Returns whether this IN is on the INList.
     */
    public boolean getInListResident() {
        return (flags & IN_RESIDENT_BIT) != 0;
    }

    public IN getPrevLRUNode() {
    	return prevLRUNode;
    }

    public void setPrevLRUNode(IN node) {
    	prevLRUNode = node;
    }

    public IN getNextLRUNode() {
    	return nextLRUNode;
    }

    public void setNextLRUNode(IN node) {
    	nextLRUNode = node;
    }

    /**
     * Try to compact or otherwise reclaim memory in this IN and return the
     * number of bytes reclaimed. For example, a BIN should evict LNs, if
     * possible.
     *
     * Used by the evictor to reclaim memory by some means short of evicting
     * the entire node.  If a positive value is returned, the evictor will
     * postpone full eviction of this node.
     */
    public long partialEviction() {
        return 0;
    }

    /**
     * Returns whether any child is non-null in the main or off-heap cache.
     */
    public boolean hasCachedChildren() {
        assert isLatchOwner();

        for (int i = 0; i < getNEntries(); i++) {
            if (entryTargets.get(i) != null) {
                return true;
            }
        }

        return false;
    }

    /**
     * Disallow delta on next log. Set to true (a) when we we delete a slot
     * from a BIN, (b) when the cleaner marks a BIN as dirty so that it will
     * be migrated during the next checkpoint.
     */
    public void setProhibitNextDelta(boolean val) {

        if (!isBIN()) {
            return;
        }

        if (val) {
            flags |= IN_PROHIBIT_NEXT_DELTA_BIT;
        } else {
            flags &= ~IN_PROHIBIT_NEXT_DELTA_BIT;
        }
    }

    public boolean getProhibitNextDelta() {
        return (flags & IN_PROHIBIT_NEXT_DELTA_BIT) != 0;
    }

    /*
     * Validate the subtree that we're about to delete.  Make sure there aren't
     * more than one valid entry on each IN and that the last level of the tree
     * is empty. Also check that there are no cursors on any bins in this
     * subtree. Assumes caller is holding the latch on this parent node.
     *
     * While we could latch couple down the tree, rather than hold latches as
     * we descend, we are presumably about to delete this subtree so
     * concurrency shouldn't be an issue.
     *
     * @return true if the subtree rooted at the entry specified by "index" is
     * ok to delete.
     *
     * Overriden by BIN class.
     */
    boolean validateSubtreeBeforeDelete(int index)
        throws DatabaseException {

        if (index >= nEntries) {

            /*
             * There's no entry here, so of course this entry is deletable.
             */
            return true;
        } else {
            IN child = fetchIN(index, CacheMode.UNCHANGED);

            boolean needToLatch = !child.isLatchExclusiveOwner();

            try {
                if (needToLatch) {
                    child.latch(CacheMode.UNCHANGED);
                }
                return child.isValidForDelete();
            } finally {
                if (needToLatch && isLatchOwner()) {
                    child.releaseLatch();
                }
            }
        }
    }

    /**
     * Check if this node fits the qualifications for being part of a deletable
     * subtree. It can only have one IN child and no LN children.
     *
     * Note: the method is overwritten by BIN and LN.
     * BIN.isValidForDelete() will not fetch any child LNs.
     * LN.isValidForDelete() simply returns false.
     *
     * We assume that this is only called under an assert.
     */
    @Override
    boolean isValidForDelete()
        throws DatabaseException {

        assert(!isBINDelta());

        /*
         * Can only have one valid child, and that child should be
         * deletable.
         */
        if (nEntries > 1) {            // more than 1 entry.
            return false;

        } else if (nEntries == 1) {    // 1 entry, check child
            IN child = fetchIN(0, CacheMode.UNCHANGED);
            boolean needToLatch = !child.isLatchExclusiveOwner();
            if (needToLatch) {
                child.latch(CacheMode.UNCHANGED);
            }

            boolean ret = false;
            try {
                if (child.isBINDelta()) {
                    return false;
                }

                ret = child.isValidForDelete();

            } finally {
                if (needToLatch) {
                    child.releaseLatch();
                }
            }
            return ret;
        } else {                       // 0 entries.
            return true;
        }
    }

    /**
     * Add self and children to this in-memory IN list. Called by recovery, can
     * run with no latching.
     */
    @Override
    final void rebuildINList(INList inList)
        throws DatabaseException {

        /*
         * Recompute your in memory size first and then add yourself to the
         * list.
         */
        initMemorySize();

        inList.add(this);

        boolean hasCachedChildren = false;

        /*
         * Add your children if they're resident. (LNs know how to stop the
         * flow).
         */
        for (int i = 0; i < nEntries; i++) {
            Node n = getTarget(i);
            if (n != null) {
                n.rebuildINList(inList);
                hasCachedChildren = true;
            }
            if (getOffHeapBINId(i) >= 0) {
                hasCachedChildren = true;
            }
        }

        if (isUpperIN()) {
            if (hasCachedChildren) {
                setHasCachedChildrenFlag(true);
            } else {
                setHasCachedChildrenFlag(false);
                if (!isDIN()) {
                    if (traceLRU) {
                        LoggerUtils.envLogMsg(
                            traceLevel, getEnv(),
                            "rebuildINList " +
                            Thread.currentThread().getId() +
                            "-" +
                            Thread.currentThread().getName() +
                            "-" + getEnv().getName() +
                            " Adding UIN to LRU: " +
                            getNodeId());
                    }
                    getEvictor().addBack(this);
                }
            }
        } else if (isBIN() && !isDBIN()) {
            getEvictor().addBack(this);
        }
    }

    /*
     * DbStat support.
     */
    void accumulateStats(TreeWalkerStatsAccumulator acc) {
        acc.processIN(this, getNodeId(), getLevel());
    }

    /**
     * Sets the last logged LSN, which for a BIN may be a delta.
     *
     * It is called from IN.postFetch/RecoveryInit(). If the logrec we have
     * just read was a BINDelta, this.lastFullVersion has already been set (in
     * BINDeltaLogEntry.readMainItem() or in OldBinDelta.reconstituteBIN()).
     * So, this method will set this.lastDeltaVersion. Otherwise, if the
     * logrec was a full BIN, this.lastFullVersion has not been set yet,
     * and it will be set here. In this case, this.lastDeltaVersion will
     * remain NULL.
     */
    public void setLastLoggedLsn(long lsn) {

        if (isBIN()) {
            if (getLastFullLsn() == DbLsn.NULL_LSN) {
                setLastFullLsn(lsn);
            } else {
                ((BIN)this).setLastDeltaLsn(lsn);
            }
        } else {
            setLastFullLsn(lsn);
        }
    }

    /**
     * Returns the LSN of the last last logged version of this IN, or NULL_LSN
     * if never logged.
     */
    public final long getLastLoggedLsn() {
        if (isBIN()) {
            return (getLastDeltaLsn() != DbLsn.NULL_LSN ?
                    getLastDeltaLsn() :
                    getLastFullLsn());
        }

        return getLastFullLsn();
    }

    /**
     * Sets the last full version LSN.
     */
    public final void setLastFullLsn(long lsn) {
        lastFullVersion = lsn;
    }

    /**
     * Returns the last full version LSN, or NULL_LSN if never logged.
     */
    public final long getLastFullLsn() {
        return lastFullVersion;
    }

    /**
     * Returns the last delta version LSN, or NULL_LSN if a delta was not last
     * logged. For BINs, it just returns the value of the lastDeltaVersion
     * field. Public for unit testing.
     */
    public long getLastDeltaLsn() {
        return DbLsn.NULL_LSN;
    }

    /*
     * Logging support
     */

    /**
     * When splits and checkpoints intermingle in a deferred write databases,
     * a checkpoint target may appear which has a valid target but a null LSN.
     * Deferred write dbs are written out in checkpoint style by either
     * Database.sync() or a checkpoint which has cleaned a file containing
     * deferred write entries. For example,
     *   INa
     *    |
     *   BINb
     *
     *  A checkpoint or Database.sync starts
     *  The INList is traversed, dirty nodes are selected
     *  BINb is bypassed on the INList, since it's not dirty
     *  BINb is split, creating a new sibling, BINc, and dirtying INa
     *  INa is selected as a dirty node for the ckpt
     *
     * If this happens, INa is in the selected dirty set, but not its dirty
     * child BINb and new child BINc.
     *
     * In a durable db, the existence of BINb and BINc are logged
     * anyway. But in a deferred write db, there is an entry that points to
     * BINc, but no logged version.
     *
     * This will not cause problems with eviction, because INa can't be
     * evicted until BINb and BINc are logged, are non-dirty, and are detached.
     * But it can cause problems at recovery, because INa will have a null LSN
     * for a valid entry, and the LN children of BINc will not find a home.
     *
     * To prevent this, search for all dirty children that might have been
     * missed during the selection phase, and write them out. It's not
     * sufficient to write only null-LSN children, because the existing sibling
     * must be logged lest LN children recover twice (once in the new sibling,
     * once in the old existing sibling.
     *
     * TODO:
     * Would the problem above be solved by logging dirty nodes using a tree
     * traversal (post-order), rather than using the dirty map?
     */
    public void logDirtyChildren()
        throws DatabaseException {

        assert(!isBINDelta());

        EnvironmentImpl envImpl = getDatabase().getEnv();

        /* Look for targets that are dirty. */
        for (int i = 0; i < getNEntries(); i++) {

            IN child = (IN) getTarget(i);

            if (child != null) {
                child.latch(CacheMode.UNCHANGED);
                try {
                    if (child.getDirty()) {
                        /* Ask descendants to log their children. */
                        child.logDirtyChildren();
                        long childLsn =
                            child.log(false, // allowDeltas
                                      true,  // isProvisional
                                      true,  // backgroundIO
                                      this); // parent

                        updateEntry(
                            i, childLsn, VLSN.NULL_VLSN_SEQUENCE,
                            0/*lastLoggedSize*/);
                    }
                } finally {
                    child.releaseLatch();
                }
            }
        }
    }

    public final long log() {
        return logInternal(
            this, null, false /*allowDeltas*/, true /*allowCompress*/,
            Provisional.NO, false /*backgroundIO*/, null /*parent*/);
    }

    public final long log(
        boolean allowDeltas,
        boolean isProvisional,
        boolean backgroundIO,
        IN parent) {

        return logInternal(
            this, null, allowDeltas, true /*allowCompress*/,
            isProvisional ? Provisional.YES : Provisional.NO,
            backgroundIO, parent);
    }

    public final long log(
        boolean allowDeltas,
        Provisional provisional,
        boolean backgroundIO,
        IN parent) {

        return logInternal(
            this, null, allowDeltas, true /*allowCompress*/, provisional,
            backgroundIO, parent);
    }

    final long optionalLog() {

        if (databaseImpl.isDeferredWriteMode()) {
            return getLastLoggedLsn();
        } else {
            return logInternal(
                this, null, false /*allowDeltas*/, true /*allowCompress*/,
                Provisional.NO, false /*backgroundIO*/, null /*parent*/);
        }
    }

    long optionalLogProvisional(IN parent) {
        return optionalLogProvisional(parent, true /*allowCompress*/);
    }

    long optionalLogProvisionalNoCompress(IN parent) {
        return optionalLogProvisional(parent, false /*allowCompress*/);
    }

    private long optionalLogProvisional(IN parent, boolean allowCompress) {

        if (databaseImpl.isDeferredWriteMode()) {
            return getLastLoggedLsn();
        } else {
            return logInternal(
                this, null, false /*allowDeltas*/, allowCompress,
                Provisional.YES, false /*backgroundIO*/, parent);
        }
    }

    public static long logEntry(
        INLogEntry logEntry,
        Provisional provisional,
        boolean backgroundIO,
        IN parent) {

        return logInternal(
            null, logEntry, true /*allowDeltas*/, false /*allowCompress*/,
            provisional, backgroundIO, parent);
    }

    /**
     * Bottleneck method for all IN logging.
     *
     * If 'node' is non-null, 'logEntry' must be null.
     * If 'node' is null, 'logEntry' and 'parent' must be non-null.
     *
     * When 'logEntry' is non-null we are logging an off-heap BIN, and it is
     * not resident in the main cache. The lastFull/DeltaLsns are not updated
     * here, and this must be done instead by the caller.
     *
     * When 'node' is non-null, 'parent' may or may not be null. It must be
     * non-null when logging provisionally, since obsolete LSNs are added to
     * the parent's collection.
     */
    private static long logInternal(
        final IN node,
        INLogEntry logEntry,
        final boolean allowDeltas,
        final boolean allowCompress,
        final Provisional provisional,
        final boolean backgroundIO,
        final IN parent) {

        assert node == null || node.isLatchExclusiveOwner();
        assert parent == null || parent.isLatchExclusiveOwner();
        assert node != null || parent != null;
        assert (node == null) != (logEntry == null);

        final DatabaseImpl dbImpl =
            (node != null) ? node.getDatabase() : parent.getDatabase();

        final EnvironmentImpl envImpl = dbImpl.getEnv();

        final boolean countObsoleteNow =
            provisional != Provisional.YES || dbImpl.isTemporary();

        final boolean isBin = (node != null) ?
            node.isBIN() : (parent.getNormalizedLevel() == 2);

        final BIN bin = (node != null && isBin) ? ((BIN) node) : null;

        final boolean isDelta;

        if (isBin) {
            if (logEntry != null) {
                /*
                 * When a logEntry is supplied (node/bin are null), the logic
                 * below is implemented by OffHeapCache.createBINLogEntry.
                 */
                isDelta = logEntry.isBINDelta();
            } else {
                /* Compress non-dirty slots before determining delta status. */
                if (allowCompress) {
                    envImpl.lazyCompress(bin, false /*compressDirtySlots*/);
                }

                isDelta = bin.isBINDelta() ||
                    (allowDeltas && bin.shouldLogDelta());

                /* Be sure that we didn't illegally mutate to a delta. */
                assert (!(isDelta && bin.isDeltaProhibited()));

                /* Also compress dirty slots, if we will not log a delta. */
                if (allowCompress && !isDelta) {
                    envImpl.lazyCompress(bin, true /*compressDirtySlots*/);
                }

                /*
                 * Write dirty LNs in deferred-write databases after
                 * compression to reduce total logging, at least for temp DBs.
                 */
                if (dbImpl.isDeferredWriteMode()) {
                    bin.logDirtyChildren();
                }

                logEntry = isDelta ?
                    (new BINDeltaLogEntry(bin)) :
                    (new INLogEntry<>(bin));
            }
        } else {
            assert node != null;

            isDelta = false;
            logEntry = new INLogEntry<>(node);
        }

        final LogParams params = new LogParams();
        params.entry = logEntry;
        params.provisional = provisional;
        params.repContext = ReplicationContext.NO_REPLICATE;
        params.nodeDb = dbImpl;
        params.backgroundIO = backgroundIO;

        /*
         * For delta logging:
         *  + Count lastDeltaVersion obsolete, if non-null.
         *  + Set lastDeltaVersion to newly logged LSN.
         *  + Leave lastFullVersion unchanged.
         *
         * For full version logging:
         *  + Count lastFullVersion and lastDeltaVersion obsolete, if non-null.
         *  + Set lastFullVersion to newly logged LSN.
         *  + Set lastDeltaVersion to null.
         */
        final long oldLsn =
            isDelta ? DbLsn.NULL_LSN : logEntry.getPrevFullLsn();

        final long auxOldLsn = logEntry.getPrevDeltaLsn();

        /*
         * Determine whether to count the prior version of an IN (as well as
         * accumulated provisionally obsolete LSNs for child nodes) obsolete
         * when logging the new version.
         *
         * True is set if we are logging the IN non-provisionally, since the
         * non-provisional version durably replaces the prior version and
         * causes all provisional children to also become durable.
         *
         * True is also set if the database is temporary. Since we never use a
         * temporary DB past recovery, prior versions of an IN are never used.
         * [#16928]
         */
        if (countObsoleteNow) {
            params.oldLsn = oldLsn;
            params.auxOldLsn = auxOldLsn;
            params.packedObsoleteInfo =
                (node != null) ? node.provisionalObsolete : null;
        }

        /* Log it. */
        final LogItem item = envImpl.getLogManager().log(params);

        if (node != null) {
            node.setDirty(false);
        }

        if (countObsoleteNow) {
            if (node != null) {
                node.discardProvisionalObsolete();
            }
        } else if (parent != null) {
            parent.trackProvisionalObsolete(node, oldLsn);
            parent.trackProvisionalObsolete(node, auxOldLsn);
            /*
             * TODO:
             * The parent is null and provisional is YES when evicting the root
             * of a DW DB. How does obsolete counting happen?
             */
        }

        if (bin != null) {
            /*
             * When a logEntry is supplied (node/bin are null), the logic
             * below is implemented by OffHeapCache.postBINLog.
             */
            if (isDelta) {
                bin.setLastDeltaLsn(item.lsn);
            } else {
                bin.setLastFullLsn(item.lsn);
                bin.setLastDeltaLsn(DbLsn.NULL_LSN);
            }

            bin.setProhibitNextDelta(false);

        } else if (node != null) {
            node.setLastFullLsn(item.lsn);
        }

        final Evictor evictor = envImpl.getEvictor();

        if (node != null && evictor.getUseDirtyLRUSet()) {

            /*
             * To capture all cases where a node needs to be moved to the
             * priority-1 LRUSet after being cleaned, we invoke moveToPri1LRU()
             * from IN.afterLog(). This includes the case where the node is
             * being logged as part of being evicted, in which case we don't
             * really want it to go back to the LRU. However, this is ok
             * because moveToPri1LRU() checks whether the node is actually
             * in the priority-2 LRUSet before moving it to the priority-1
             * LRUSet.
             */
            if (traceLRU && node.isUpperIN()) {
                LoggerUtils.envLogMsg(
                    traceLevel, envImpl,
                    Thread.currentThread().getId() + "-" +
                        Thread.currentThread().getName() +
                        "-" + envImpl.getName() +
                        " afterLogCommon(): " +
                        " Moving UIN to mixed LRU: " + node.getNodeId());
            }
            evictor.moveToPri1LRU(node);
        }

        return item.lsn;
    }

    /**
     * Adds the given obsolete LSN and any tracked obsolete LSNs for the given
     * child IN to this IN's tracking list.  This method is called to track
     * obsolete LSNs when a child IN is logged provisionally.  Such LSNs
     * cannot be considered obsolete until an ancestor IN is logged
     * non-provisionally.
     */
    void trackProvisionalObsolete(final IN childIN, final long obsoleteLsn) {

        final boolean moveChildInfo =
            (childIN != null && childIN.provisionalObsolete != null);

        final boolean addChildLsn = (obsoleteLsn != DbLsn.NULL_LSN);

        if (!moveChildInfo && !addChildLsn) {
            return;
        }

        final int oldMemSize = (provisionalObsolete != null) ?
             provisionalObsolete.getMemorySize() : 0;

        if (moveChildInfo) {
            if (provisionalObsolete != null) {
                /* Append child info to parent info. */
                provisionalObsolete.copyObsoleteInfo
                    (childIN.provisionalObsolete);
            } else {
                /* Move reference from child to parent. */
                provisionalObsolete = childIN.provisionalObsolete;
            }
            childIN.updateMemorySize(
                0 - childIN.provisionalObsolete.getMemorySize());
            childIN.provisionalObsolete = null;
        }

        if (addChildLsn) {
            if (provisionalObsolete == null) {
                provisionalObsolete = new PackedObsoleteInfo();
            }
            provisionalObsolete.addObsoleteInfo(obsoleteLsn);
        }

        updateMemorySize(oldMemSize,
                         (provisionalObsolete != null) ?
                         provisionalObsolete.getMemorySize() :
                         0);
    }

    /**
     * Discards the provisional obsolete tracking information in this node
     * after it has been counted in the live tracker.  This method is called
     * after this node is logged non-provisionally.
     */
    private void discardProvisionalObsolete()
        throws DatabaseException {

        if (provisionalObsolete != null) {
            updateMemorySize(0 - provisionalObsolete.getMemorySize());
            provisionalObsolete = null;
        }
    }

    /*
     * NOOP for upper INs. Overriden by BIN class.
     */
    public void mutateToFullBIN(boolean leaveFreeSlot) {
    }

    /**
     * Only write dirty slots when logging a BIN-delta.
     *
     * Don't write slots that are extinct when logging a full BIN, to support
     * erasure of extinct records. Skipping the extinct slots during logging
     * is necessary because extinct slots cannot be deleted by BIN.compress
     * when a cursor is present.
     *
     * Otherwise, when logging an IN, write all slots.
     */
    private int getNEntriesToWrite(boolean deltasOnly) {
        if (deltasOnly) {
            return getNDeltas();
        }
        if (!isBIN()) {
            return nEntries;
        }
        int n = 0;
        for (int i = 0; i < nEntries; i++) {
            if (canDeleteExtinctSlot(i)) {
                continue;
            }
            n += 1;
        }
        return n;
    }

    /**
     * Returns whether the given slot is extinct and can be deleted during
     * logging.
     *
     * Check KnownDeleted before checking for an extinct slot, to
     * reduce calls to the more expensive getExtinctionState method.
     * KnownDeleted is set on extinct slots by the ExtinctionScanner.
     */
    private boolean canDeleteExtinctSlot(int idx) {
        assert isBIN();

        final EnvironmentImpl envImpl = getEnv();

        /*
         * The check for isValid is necessary because we cannot call the
         * extinction filter until recovery is complete and we have the DB
         * name and type [#27022]. In the future we may wish to implement a
         * more comprehensive check in getExtinctionState.
         */
        return envImpl.isValid() &&
            isEntryKnownDeleted(idx) &&
            envImpl.getExtinctionState(databaseImpl, getKey(idx)) == EXTINCT;
    }

    public final int getNDeltas() {
        int n = 0;
        for (int i = 0; i < nEntries; i++) {
            if (!isDirty(i)) {
                continue;
            }
            n += 1;
        }
        return n;
    }

    /**
     * @see Node#getGenericLogType
     */
    @Override
    public final LogEntryType getGenericLogType() {
        return getLogType();
    }

    /**
     * Get the log type of this node.
     */
    public LogEntryType getLogType() {
        return LogEntryType.LOG_IN;
    }

    /**
     * @see Loggable#getLogSize
     *
     * Overrriden by DIN and DBIN classes.
     */
    @Override
    public int getLogSize() {
        return getLogSize(false);
    }

    public final int getLogSize(boolean deltasOnly) {

        BIN bin = (isBIN() ? (BIN)this : null);

        boolean haveVLSNCache = (bin != null && bin.isVLSNCachingEnabled());

        int size = 0;

        boolean haveExpiration = false;

        if (bin != null) {
            int base = bin.getExpirationBase();
            haveExpiration = (base != -1);
            size += LogUtils.getPackedIntLogSize(base);
        }

        size += LogUtils.getPackedLongLogSize(nodeId);
        size += LogUtils.getByteArrayLogSize(identifierKey); // identifier key

        if (keyPrefix != null) {
            size += LogUtils.getByteArrayLogSize(keyPrefix);
        }

        size += 1; // one byte for boolean flags

        final int nEntriesToWrite = getNEntriesToWrite(deltasOnly);

        final int maxEntriesToWrite =
            (!deltasOnly ?
             getMaxEntries() :
             bin.getDeltaCapacity(nEntriesToWrite));

        size += LogUtils.getPackedIntLogSize(nEntriesToWrite);
        size += LogUtils.getPackedIntLogSize(level);
        size += LogUtils.getPackedIntLogSize(maxEntriesToWrite);

        final boolean compactLsnsRep = (entryLsnLongArray == null);
        size += LogUtils.getBooleanLogSize();   // compactLsnsRep
        if (compactLsnsRep) {
            size += LogUtils.INT_BYTES;         // baseFileNumber
        }

        for (int i = 0; i < nEntries; i++) {    // entries

            /* See getNEntriesToWrite and canDeleteExtinctSlot. */
            if (deltasOnly) {
                if (!isDirty(i)) {
                    continue;
                }
            } else {
                if (bin != null && canDeleteExtinctSlot(i)) {
                    continue;
                }
            }

            size += LogUtils.getByteArrayLogSize(entryKeys.get(i)) + // key
                (compactLsnsRep ? LogUtils.INT_BYTES :
                 LogUtils.getLongLogSize()) +                       // LSN
                1;                                                  // state

            if (isLastLoggedSizeStored(i)) {
                size += LogUtils.getPackedIntLogSize(getLastLoggedSize(i));
            }

            if (haveVLSNCache && isEmbeddedLN(i)) {
                size += LogUtils.getPackedLongLogSize(bin.getCachedVLSN(i));
            }

            if (haveExpiration) {
                size +=
                    LogUtils.getPackedIntLogSize(bin.getExpirationOffset(i));
            }
        }

        if (deltasOnly) {
            size += LogUtils.getPackedIntLogSize(bin.getFullBinNEntries());
            size += LogUtils.getPackedIntLogSize(bin.getFullBinMaxEntries());

            size += bin.getBloomFilterLogSize();
        }

        return size;
    }

    /*
     * Overridden by DIN and DBIN classes.
     */
    @Override
    public void writeToLog(ByteBuffer logBuffer) {

        serialize(logBuffer, false /*deltasOnly*/, true /*clearDirtyBits*/);
    }

    public void writeToLog(ByteBuffer logBuffer, boolean deltasOnly) {

        serialize(logBuffer, deltasOnly, !deltasOnly /*clearDirtyBits*/);
    }

    /**
     * WARNING: In the case of BINs this method is not only used for logging
     * but also for off-heap caching. Therefore, this method should not have
     * side effects unless the clearDirtyBits param is true.
     */
    public final void serialize(ByteBuffer logBuffer,
                                boolean deltasOnly,
                                boolean clearDirtyBits) {

        assert(!deltasOnly || isBIN());

        BIN bin = (isBIN() ? (BIN)this : null);

        byte[] bloomFilter = (deltasOnly ? bin.createBloomFilter() : null);

        boolean haveExpiration = false;

        if (bin != null) {
            int base = bin.getExpirationBase();
            haveExpiration = (base != -1);
            LogUtils.writePackedInt(logBuffer, base);
        }

        LogUtils.writePackedLong(logBuffer, nodeId);

        LogUtils.writeByteArray(logBuffer, identifierKey);

        boolean hasKeyPrefix = (keyPrefix != null);
        boolean mayHaveLastLoggedSize = mayHaveLastLoggedSizeStored();
        boolean haveVLSNCache = (bin != null && bin.isVLSNCachingEnabled());

        byte booleans = (byte) (isRoot() ? 1 : 0);
        booleans |= (hasKeyPrefix ? 2 : 0);
        booleans |= (mayHaveLastLoggedSize ? 4 : 0);
        booleans |= (bloomFilter != null ? 8 : 0);
        booleans |= (haveVLSNCache ? 16 : 0);
        booleans |= (isExpirationInHours() ? 32 : 0);

        logBuffer.put(booleans);

        if (hasKeyPrefix) {
            LogUtils.writeByteArray(logBuffer, keyPrefix);
        }

        final int nEntriesToWrite = getNEntriesToWrite(deltasOnly);

        final int maxEntriesToWrite =
            (!deltasOnly ?
             getMaxEntries() :
             bin.getDeltaCapacity(nEntriesToWrite));
        /*
        if (deltasOnly) {
            BIN bin = (BIN)this;
            System.out.println(
                "Logging BIN-delta: " + getNodeId() +
                " is delta = " + isBINDelta() +
                " nEntries = " + nEntriesToWrite +
                " max entries = " + maxEntriesToWrite +
                " full BIN entries = " + bin.getFullBinNEntries() +
                " full BIN max entries = " + bin.getFullBinMaxEntries());
        }
        */
        LogUtils.writePackedInt(logBuffer, nEntriesToWrite);
        LogUtils.writePackedInt(logBuffer, level);
        LogUtils.writePackedInt(logBuffer, maxEntriesToWrite);

        /* true if compact representation. */
        boolean compactLsnsRep = (entryLsnLongArray == null);
        LogUtils.writeBoolean(logBuffer, compactLsnsRep);
        if (compactLsnsRep) {
            LogUtils.writeInt(logBuffer, (int) baseFileNumber);
        }

        for (int i = 0; i < nEntries; i++) {

            /* See getNEntriesToWrite and canDeleteExtinctSlot. */
            if (deltasOnly) {
                if (!isDirty(i)) {
                    continue;
                }
            } else {
                if (bin != null && canDeleteExtinctSlot(i)) {
                    continue;
                }
            }

            LogUtils.writeByteArray(logBuffer, entryKeys.get(i));

            /*
             * A NULL_LSN may be stored when an incomplete insertion occurs,
             * but in that case the KnownDeleted flag must be set. See
             * Tree.insert.  [#13126]
             */
            assert checkForNullLSN(i) :
                "logging IN " + getNodeId() + " with null lsn child " +
                " db=" + databaseImpl.getName() +
                " isDeferredWriteMode=" + databaseImpl.isDeferredWriteMode() +
                " isTemporary=" + databaseImpl.isTemporary();

            if (compactLsnsRep) {
                int offset = i << 2;
                int fileOffset = getFileOffset(offset);
                logBuffer.put(getFileNumberOffset(offset));
                logBuffer.put((byte) (fileOffset & 0xff));
                logBuffer.put((byte) ((fileOffset >>> 8) & 0xff));
                logBuffer.put((byte) ((fileOffset >>> 16) & 0xff));
            } else {
                LogUtils.writeLong(logBuffer, entryLsnLongArray[i]);
            }

            logBuffer.put(
                (byte) (entryStates[i] & EntryStates.CLEAR_TRANSIENT_BITS));

            if (clearDirtyBits) {
                entryStates[i] &= EntryStates.CLEAR_DIRTY_BIT;
            }

            if (isLastLoggedSizeStored(i)) {
                LogUtils.writePackedInt(logBuffer, getLastLoggedSize(i));
            }

            if (haveVLSNCache && isEmbeddedLN(i)) {
                LogUtils.writePackedLong(logBuffer, bin.getCachedVLSN(i));
            }

            if (haveExpiration) {
                LogUtils.writePackedInt(
                    logBuffer, bin.getExpirationOffset(i));
            }
        }

        if (deltasOnly) {
            LogUtils.writePackedInt(logBuffer, bin.getFullBinNEntries());
            LogUtils.writePackedInt(logBuffer, bin.getFullBinMaxEntries());

            if (bloomFilter != null) {
                BINDeltaBloomFilter.writeToLog(bloomFilter, logBuffer);
            }
        }
    }

    /*
     * Used for assertion to prevent writing a null lsn to the log.
     */
    private boolean checkForNullLSN(int index) {
        boolean ok;
        if (isBIN()) {
            ok = !(getLsn(index) == DbLsn.NULL_LSN &&
                   (entryStates[index] & EntryStates.KNOWN_DELETED_BIT) == 0);
        } else {
            ok = (getLsn(index) != DbLsn.NULL_LSN);
        }
        return ok;
    }

    /**
     * Returns whether the given serialized IN is a BIN that may have
     * expiration values.
     */
    public boolean mayHaveExpirationValues(
        ByteBuffer itemBuffer,
        int entryVersion) {

        if (!isBIN() || entryVersion < 12) {
            return false;
        }

        itemBuffer.mark();
        int expirationBase = LogUtils.readPackedInt(itemBuffer);
        itemBuffer.reset();

        return (expirationBase != -1);
    }

    @Override
    public void readFromLog(
        ByteBuffer itemBuffer,
        int entryVersion) {

        materialize(
            itemBuffer, entryVersion,
            false /*deltasOnly*/, true /*clearDirtyBits*/);
    }

    public void readFromLog(
        ByteBuffer itemBuffer,
        int entryVersion,
        boolean deltasOnly) {

        materialize(
            itemBuffer, entryVersion,
            deltasOnly, !deltasOnly /*clearDirtyBits*/);
    }

    /**
     * WARNING: In the case of BINs this method is used not only for logging
     * but also for off-heap caching. Therefore, this method should not have
     * side effects unless the clearDirtyBits param is true or an older log
     * version is passed (off-heap caching uses the current version).
     */
    public final void materialize(
        ByteBuffer itemBuffer,
        int entryVersion,
        boolean deltasOnly,
        boolean clearDirtyBits) {

        assert(!deltasOnly || isBIN());

        BIN bin = (isBIN() ? (BIN)this : null);

        boolean unpacked = (entryVersion < 6);

        boolean haveExpiration = false;

        if (bin != null && entryVersion >= 12) {
            int base = LogUtils.readPackedInt(itemBuffer);
            haveExpiration = (base != -1);
            bin.setExpirationBase(base);
        }

        nodeId = LogUtils.readLong(itemBuffer, unpacked);
        identifierKey = LogUtils.readByteArray(itemBuffer, unpacked);

        byte booleans = itemBuffer.get();

        setIsRootFlag((booleans & 1) != 0);

        if ((booleans & 2) != 0) {
            keyPrefix = LogUtils.readByteArray(itemBuffer, unpacked);
        }

        boolean mayHaveLastLoggedSize = ((booleans & 4) != 0);
        assert !(mayHaveLastLoggedSize && (entryVersion < 9));

        boolean hasBloomFilter = ((booleans & 8) != 0);
        assert(!hasBloomFilter || (entryVersion >= 10 && deltasOnly));

        boolean haveVLSNCache = ((booleans & 16) != 0);
        assert !(haveVLSNCache && (entryVersion < 11));

        setExpirationInHours((booleans & 32) != 0);

        nEntries = LogUtils.readInt(itemBuffer, unpacked);
        level = LogUtils.readInt(itemBuffer, unpacked);
        int length = LogUtils.readInt(itemBuffer, unpacked);

        entryTargets = INTargetRep.NONE;
        entryKeys = new INKeyRep.Default(length);
        baseFileNumber = -1;
        long storedBaseFileNumber = -1;
        if (disableCompactLsns) {
            entryLsnByteArray = null;
            entryLsnLongArray = new long[length];
        } else {
            entryLsnByteArray = new byte[length << 2];
            entryLsnLongArray = null;
        }
        entryStates = new byte[length];
        boolean compactLsnsRep = false;

        if (entryVersion > 1) {
            compactLsnsRep = LogUtils.readBoolean(itemBuffer);
            if (compactLsnsRep) {
                baseFileNumber = LogUtils.readInt(itemBuffer);
                storedBaseFileNumber = baseFileNumber;
            }
        }

        for (int i = 0; i < nEntries; i++) {

            entryKeys = entryKeys.set(
                i, LogUtils.readByteArray(itemBuffer, unpacked), this);

            long lsn;
            if (compactLsnsRep) {
                /* LSNs in compact form. */
                byte fileNumberOffset = itemBuffer.get();
                int fileOffset = (itemBuffer.get() & 0xff);
                fileOffset |= ((itemBuffer.get() & 0xff) << 8);
                fileOffset |= ((itemBuffer.get() & 0xff) << 16);
                if (fileOffset == THREE_BYTE_NEGATIVE_ONE) {
                    lsn = DbLsn.NULL_LSN;
                } else {
                    lsn = DbLsn.makeLsn
                        (storedBaseFileNumber + fileNumberOffset, fileOffset);
                }
            } else {
                /* LSNs in long form. */
                lsn = LogUtils.readLong(itemBuffer);              // LSN
            }

            setLsnInternal(i, lsn);

            byte entryState = itemBuffer.get();                   // state

            if (clearDirtyBits) {
                entryState &= EntryStates.CLEAR_DIRTY_BIT;
            }

            /*
             * The MIGRATE_BIT (now the transient OFFHEAP_DIRTY_BIT) was
             * accidentally written in a pre-JE 6 log version.
             */
            if (entryVersion < 9) {
                entryState &= EntryStates.CLEAR_TRANSIENT_BITS;
            }

            /*
             * A NULL_LSN is the remnant of an incomplete insertion and the
             * KnownDeleted flag should be set.  But because of bugs in prior
             * releases, the KnownDeleted flag may not be set.  So set it here.
             * See Tree.insert.  [#13126]
             */
            if (entryVersion < 9 && lsn == DbLsn.NULL_LSN) {
                entryState |= EntryStates.KNOWN_DELETED_BIT;
            }

            entryStates[i] = entryState;

            if (mayHaveLastLoggedSize && !isEmbeddedLN(i)) {
                setLastLoggedSizeUnconditional(
                    i, LogUtils.readPackedInt(itemBuffer));
            }

            if (haveVLSNCache && isEmbeddedLN(i)) {
                bin.setCachedVLSNUnconditional(
                    i, LogUtils.readPackedLong(itemBuffer));
            }

            if (haveExpiration) {
                bin.setExpirationOffset(i, LogUtils.readPackedInt(itemBuffer));
            }
        }

        if (deltasOnly) {
            setBINDelta(true);

            if (entryVersion >= 10) {
                bin.setFullBinNEntries(LogUtils.readPackedInt(itemBuffer));
                bin.setFullBinMaxEntries(LogUtils.readPackedInt(itemBuffer));

                if (hasBloomFilter) {
                    bin.bloomFilter = BINDeltaBloomFilter.readFromLog(
                        itemBuffer, entryVersion);
                }
            }
        }

        /* Dup conversion will be done by postFetchInit. */
        needDupKeyConversion = (entryVersion < 8);
    }

    /**
     * @see Loggable#logicalEquals
     * Always return false, this item should never be compared.
     */
    @Override
    public final boolean logicalEquals(Loggable other) {
        return false;
    }

    /**
     * @see Loggable#dumpLog
     */
    @Override
    public final void dumpLog(StringBuilder sb, boolean verbose) {
        sb.append(beginTag());

        sb.append("");

        sb.append(Key.dumpString(identifierKey, "idKey", 0));

        // isRoot
        sb.append("");

        // level
        sb.append("");

        if (keyPrefix != null) {
            sb.append(Key.dumpString(keyPrefix, "keyPrefix", 0));
        }

        // nEntries, length of entries array
        sb.append("");

        if (verbose) {
            for (int i = 0; i < nEntries; i++) {
                sb.append("");
                sb.append(Key.dumpString(getKey(i), 0));
                if (isEmbeddedLN(i)) {
                    sb.append(Key.dumpString(getData(i), "data", 0));
                }
                sb.append(DbLsn.toString(getLsn(i)));
                sb.append("");
            }
        }

        sb.append("");

        if (isBINDelta(false)) {
            if (bin.bloomFilter != null) {
                BINDeltaBloomFilter.dumpLog(bin.bloomFilter, sb, verbose);
            }
        }

        /* Add on any additional items from subclasses before the end tag. */
        dumpLogAdditional(sb);

        sb.append(endTag());
    }

    /**
     * Allows subclasses to add additional fields before the end tag. If they
     * just overload dumpLog, the xml isn't nested.
     */
    protected void dumpLogAdditional(StringBuilder sb) {
    }

    public String beginTag() {
        return BEGIN_TAG;
    }

    public String endTag() {
        return END_TAG;
    }

    /**
     * For unit test support:
     * @return a string that dumps information about this IN, without
     */
    @Override
    public String dumpString(int nSpaces, boolean dumpTags) {
        StringBuilder sb = new StringBuilder();
        if (dumpTags) {
            sb.append(TreeUtils.indent(nSpaces));
            sb.append(beginTag());
            sb.append('\n');
        }

        if (dumpTags) {
            sb.append(TreeUtils.indent(nSpaces));
            sb.append("");
        } else {
            sb.append(nodeId);
        }
        sb.append('\n');

        BIN bin = null;
        if (isBIN()) {
            bin = (BIN) this;
        }

        sb.append(TreeUtils.indent(nSpaces + 2));
        sb.append("");
        sb.append(identifierKey == null ?
                  "" :
                  Key.dumpString(identifierKey, 0));
        sb.append("");
        sb.append('\n');
        sb.append(TreeUtils.indent(nSpaces + 2));
        sb.append("");
        sb.append(keyPrefix == null ? "" : Key.dumpString(keyPrefix, 0));
        sb.append("\n");
        sb.append(TreeUtils.indent(nSpaces + 2));
        sb.append("");
        sb.append('\n');
        sb.append(TreeUtils.indent(nSpaces + 2));
        sb.append("");
        sb.append('\n');
        sb.append(TreeUtils.indent(nSpaces + 2));
        sb.append("");
        sb.append('\n');
        sb.append(TreeUtils.indent(nSpaces + 2));
        sb.append("");
        sb.append(TreeUtils.indent(nSpaces + 2));
        sb.append(
            "");
        if (bin != null) {
            sb.append(TreeUtils.indent(nSpaces + 2));
            sb.append("");
            sb.append(TreeUtils.indent(nSpaces + 2));
            sb.append("");
        }
        sb.append('\n');

        sb.append(TreeUtils.indent(nSpaces + 2));
        sb.append("");
        sb.append('\n');

        for (int i = 0; i < nEntries; i++) {
            sb.append(TreeUtils.indent(nSpaces + 4));
            sb.append("\n");
            if (getLsn(i) == DbLsn.NULL_LSN) {
                sb.append(TreeUtils.indent(nSpaces + 6));
                sb.append("");
            } else {
                sb.append(DbLsn.dumpString(getLsn(i), nSpaces + 6));
            }
            sb.append('\n');
            if (entryKeys.get(i) == null) {
                sb.append(TreeUtils.indent(nSpaces + 6));
                sb.append("");
            } else {
                sb.append(Key.dumpString(entryKeys.get(i), (nSpaces + 6)));
            }
            sb.append('\n');
            if (getOffHeapBINId(i) >= 0) {
                sb.append("\n");
            }
            if (bin != null && bin.getOffHeapLNId(i) != 0) {
                sb.append("\n");
            }
            if (entryTargets.get(i) == null) {
                sb.append(TreeUtils.indent(nSpaces + 6));
                sb.append("");
            } else {
                sb.append(entryTargets.get(i).dumpString(nSpaces + 6, true));
            }
            sb.append('\n');
            sb.append(TreeUtils.indent(nSpaces + 4));
            sb.append("");
            sb.append('\n');
        }

        sb.append(TreeUtils.indent(nSpaces + 2));
        sb.append("");
        sb.append('\n');
        if (dumpTags) {
            sb.append(TreeUtils.indent(nSpaces));
            sb.append(endTag());
        }
        return sb.toString();
    }

    private void dumpSlotState(StringBuilder sb, int i, BIN bin) {
        sb.append(" kd=\"").append(isEntryKnownDeleted(i));
        sb.append("\" pd=\"").append(isEntryPendingDeleted(i));
        sb.append("\" dirty=\"").append(isDirty(i));
        sb.append("\" embedded=\"").append(isEmbeddedLN(i));
        sb.append("\" noData=\"").append(isNoDataLN(i));
        if (bin != null) {
            sb.append("\" logSize=\"");
            sb.append(bin.getLastLoggedSizeUnconditional(i));
            long vlsn = bin.getCachedVLSN(i);
            if (!VLSN.isNull(vlsn)) {
                sb.append("\" vlsn=\"").append(vlsn);
            }
        }
        if (bin != null && bin.getExpiration(i) != 0) {
            sb.append("\" expires=\"");
            sb.append(TTL.formatExpiration(
                bin.getExpiration(i), bin.isExpirationInHours()));
        }
        sb.append("\"");
    }

    /**
     * Converts to an identifying string that is safe to output in a log.
     * Keys are not included for security/privacy reasons.
     */
    public String toSafeString(final int... slotIndexes) {

        final BIN bin = isBIN() ? (BIN) this : null;
        final StringBuilder sb = new StringBuilder();

        sb.append("IN nodeId=").append(getNodeId());
        sb.append(" lastLoggedLSN=");
        sb.append(DbLsn.getNoFormatString(getLastLoggedLsn()));
        sb.append(" lastFulLSN=");
        sb.append(DbLsn.getNoFormatString(getLastFullLsn()));
        sb.append(" level=").append(Integer.toHexString(getLevel()));
        sb.append(" flags=").append(Integer.toHexString(flags));
        sb.append(" isBINDelta=").append(isBINDelta());
        sb.append(" nSlots=").append(getNEntries());

        if (slotIndexes != null) {
            for (final int i : slotIndexes) {
                sb.append(" slot-").append(i).append(":[");
                sb.append("lsn=");
                sb.append(DbLsn.getNoFormatString(getLsn(i)));
                sb.append(" offset=");
                sb.append(DbLsn.getFileOffset(getLsn(i)));
                if (bin != null) {
                    sb.append(" offset+logSize=");
                    sb.append(DbLsn.getFileOffset(getLsn(i)) +
                        bin.getLastLoggedSizeUnconditional(i));
                }
                dumpSlotState(sb, i, bin);
                sb.append("]");
            }
        }

        return sb.toString();
    }

    @Override
    public String toString() {
        return dumpString(0, true);
    }

    public String shortClassName() {
        return "IN";
    }

    /**
     * Send trace messages to the java.util.logger. Don't rely on the logger
     * alone to conditionalize whether we send this message, we don't even want
     * to construct the message if the level is not enabled.
     */
    private void traceSplit(Level level,
                            IN parent,
                            IN newSibling,
                            long parentLsn,
                            long myNewLsn,
                            long newSiblingLsn,
                            int splitIndex,
                            int idKeyIndex,
                            int childIndex) {
        Logger logger = getEnv().getLogger();
        if (logger.isLoggable(level)) {
            StringBuilder sb = new StringBuilder();
            sb.append(TRACE_SPLIT);
            sb.append(" parent=");
            sb.append(parent.getNodeId());
            sb.append(" child=");
            sb.append(getNodeId());
            sb.append(" newSibling=");
            sb.append(newSibling.getNodeId());
            sb.append(" parentLsn = ");
            sb.append(DbLsn.getNoFormatString(parentLsn));
            sb.append(" childLsn = ");
            sb.append(DbLsn.getNoFormatString(myNewLsn));
            sb.append(" newSiblingLsn = ");
            sb.append(DbLsn.getNoFormatString(newSiblingLsn));
            sb.append(" splitIdx=");
            sb.append(splitIndex);
            sb.append(" idKeyIdx=");
            sb.append(idKeyIndex);
            sb.append(" childIdx=");
            sb.append(childIndex);
            LoggerUtils.logMsg(logger,
                               databaseImpl.getEnv(),
                               level,
                               sb.toString());
        }
    }

    /**
     * Send trace messages to the java.util.logger. Don't rely on the logger
     * alone to conditionalize whether we send this message, we don't even want
     * to construct the message if the level is not enabled.
     */
    private void traceDelete(Level level, int index) {
        Logger logger = databaseImpl.getEnv().getLogger();
        if (logger.isLoggable(level)) {
            StringBuilder sb = new StringBuilder();
            sb.append(TRACE_DELETE);
            sb.append(" in=").append(getNodeId());
            sb.append(" index=");
            sb.append(index);
            LoggerUtils.logMsg(logger,
                               databaseImpl.getEnv(),
                               level,
                               sb.toString());
        }
    }

    public final void setFetchINHook(TestHook hook) {
        fetchINHook = hook;
    }
}