All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sleepycat.je.dbi.CursorImpl Maven / Gradle / Ivy

The newest version!
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.dbi;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;

import com.sleepycat.je.CacheMode;
import com.sleepycat.je.Cursor;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.DuplicateDataException;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.LockConflictException;
import com.sleepycat.je.LockNotAvailableException;
import com.sleepycat.je.OperationResult;
import com.sleepycat.je.config.EnvironmentParams;
import com.sleepycat.je.latch.LatchSupport;
import com.sleepycat.je.log.LogItem;
import com.sleepycat.je.log.LogUtils;
import com.sleepycat.je.log.ReplicationContext;
import com.sleepycat.je.tree.BIN;
import com.sleepycat.je.tree.BINBoundary;
import com.sleepycat.je.tree.IN;
import com.sleepycat.je.tree.Key;
import com.sleepycat.je.tree.LN;
import com.sleepycat.je.tree.SearchResult;
import com.sleepycat.je.tree.StorageSize;
import com.sleepycat.je.tree.TrackingInfo;
import com.sleepycat.je.tree.Tree;
import com.sleepycat.je.tree.TreeWalkerStatsAccumulator;
import com.sleepycat.je.txn.LockGrantType;
import com.sleepycat.je.txn.LockInfo;
import com.sleepycat.je.txn.LockManager;
import com.sleepycat.je.txn.LockResult;
import com.sleepycat.je.txn.LockType;
import com.sleepycat.je.txn.Locker;
import com.sleepycat.je.txn.LockerFactory;
import com.sleepycat.je.txn.WriteLockInfo;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.Pair;
import com.sleepycat.je.utilint.StatGroup;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;
import com.sleepycat.je.utilint.VLSN;

/**
 * A CursorImpl is the internal implementation of the cursor.
 */
public class CursorImpl implements Cloneable {

    private static final boolean DEBUG = false;

    private static final byte CURSOR_NOT_INITIALIZED = 1;
    private static final byte CURSOR_INITIALIZED = 2;
    private static final byte CURSOR_CLOSED = 3;
    private static final String TRACE_DELETE = "Delete";
    private static final String TRACE_MOD = "Mod:";
    private static final String TRACE_INSERT = "Ins:";

    public static final int FOUND = 0x1;
    /* Exact match on the key portion. */
    public static final int EXACT_KEY = 0x2;
    /* Record found is the last one in the dbImpl. */
    public static final int FOUND_LAST = 0x4;

    /*
     * Allocate hashCode ids from this. [#13896]
     */
    private static long lastAllocatedId = 0;

    /*
     * Unique id that we can return as a hashCode to prevent calls to
     * Object.hashCode(). [#13896]
     */
    private final int thisId;

    /* The dbImpl behind the handle. */
    private final DatabaseImpl dbImpl;

    /* Owning transaction. */
    private Locker locker;

    private final boolean retainNonTxnLocks;

    private final boolean isSecondaryCursor;

    /*
     * Cursor location in the dbImpl, represented by a BIN and an index
     * in the BIN.  The bin is null if not established, and the index is
     * negative if not established.
     */
    private volatile BIN bin;
    private volatile int index;

    /* State of the cursor. See CURSOR_XXX above. */
    private byte status;

    private CacheMode cacheMode;
    private boolean allowEviction;
    private BIN priorBIN;

    /*
     * A cache of the record version for the operation at the current position.
     * Is null if the cursor is uninitialized.  For a secondary cursor, is the
     * version of the primary record.
     */
    private RecordVersion currentRecordVersion;

    /*
     * A cache of the storage size for the operation at the cursor position.
     * Both values are zero if the cursor is uninitialized. priStorageSize is
     * non-zero only if Cursor.readPrimaryAfterGet was called.
     */
    private int storageSize;
    private int priStorageSize;

    /* Number of secondary records written by a primary put or delete. */
    private int nSecWrites;

    private ThreadLocal treeStatsAccumulatorTL;

    private TestHook testHook;

    /**
     * Creates a cursor with retainNonTxnLocks=true, isSecondaryCursor=false.
     * These are the standard settings for an internal cursor.
     */
    public CursorImpl(DatabaseImpl database, Locker locker) {
        this(database, locker,
             true /*retainNonTxnLocks*/,
             false /*isSecondaryCursor*/);
    }

    /**
     * Creates a cursor.
     *
     * A cursor always retains transactional locks when it is reset or closed.
     * Non-transaction locks may be retained or not, depending on the
     * retainNonTxnLocks parameter value.
     *
     * Normally a user-created non-transactional Cursor releases locks on reset
     * and close, and a ThreadLocker is normally used.  However, by passing
     * true for retainNonTxnLocks a ThreadLocker can be made to retain locks;
     * this capability is used by SecondaryCursor.readPrimaryAfterGet.
     *
     * For internal (non-user) cursors, a BasicLocker is often used and locks
     * are retained. In these internal use cases the caller explicitly calls
     * BasicLocker.operationEnd() after the cursor is closed, and
     * retainNonTxnLocks is set to true to prevent the locks acquired by the
     * BasicLocker from being released when the cursor is closed.
     *
     * BasicLocker is also used for NameLN operations while opening a Database
     * handle.  Database handle locks must be retained, even if the Database is
     * opened non-transactionally.
     *
     * @param retainNonTxnLocks is true if non-transactional locks should be
     * retained (not released automatically) when the cursor is reset or
     * closed.
     *
     * @param isSecondaryCursor whether to treat this cursor as a secondary
     * cursor, e.g., secondary records don't have record versions.
     */
    public CursorImpl(
        DatabaseImpl dbImpl,
        Locker locker,
        boolean retainNonTxnLocks,
        boolean isSecondaryCursor) {

        thisId = (int) getNextCursorId();
        bin = null;
        index = -1;

        this.retainNonTxnLocks = retainNonTxnLocks;
        this.isSecondaryCursor = isSecondaryCursor;

        /* Associate this cursor with the dbImpl. */
        this.dbImpl = dbImpl;
        this.locker = locker;
        this.locker.registerCursor(this);

        /*
         * This default value is used only when the CursorImpl is used directly
         * (mainly for internal databases).  When the CursorImpl is created by
         * a Cursor, CursorImpl.setCacheMode will be called.
         */
        this.cacheMode = CacheMode.DEFAULT;

        status = CURSOR_NOT_INITIALIZED;

        /*
         * Do not perform eviction here because we may be synchronized on the
         * Database instance. For example, this happens when we call
         * Database.openCursor().  Also eviction may be disabled after the
         * cursor is constructed.
         */
    }

    /**
     * Performs a shallow copy and returns the new cursor.
     *
     * @param samePosition If true, this cursor's position is used for the new
     * cursor, and addCursor is called on the new cursor to register it with
     * the current BIN.  If false, the new cursor will be uninitialized.
     */
    public CursorImpl cloneCursor(final boolean samePosition) {

        assert assertCursorState(
            false /*mustBeInitialized*/, false /*mustNotBeInitialized*/);

        CursorImpl ret = null;
        try {
            latchBIN();

            ret = (CursorImpl) super.clone();

            if (!retainNonTxnLocks) {
                ret.locker = locker.newNonTxnLocker();
            }

            ret.locker.registerCursor(ret);

            if (samePosition) {
                ret.addCursor();
            } else {
                ret.clear();
            }
        } catch (CloneNotSupportedException cannotOccur) {
            return null;
        } finally {
            releaseBIN();
        }

        /* Perform eviction before and after each cursor operation. */
        criticalEviction();

        return ret;
    }

    /*
     * Allocate a new hashCode id.  Doesn't need to be synchronized since it's
     * ok for two objects to have the same hashcode.
     */
    private static long getNextCursorId() {
        return ++lastAllocatedId;
    }

    @Override
    public int hashCode() {
        return thisId;
    }

    public Locker getLocker() {
        return locker;
    }

    public DatabaseImpl getDb() {
        return dbImpl;
    }

    /**
     * Called when a cursor has been duplicated prior to being moved.  The new
     * locker is informed of the old locker, so that a preempted lock taken by
     * the old locker can be ignored. [#16513]
     *
     * @param closingCursor the old cursor that will be closed if the new
     * cursor is moved successfully.
     */
    public void setClosingLocker(CursorImpl closingCursor) {

        /*
         * If the two lockers are different, then the old locker will be closed
         * when the operation is complete.  This is currently the case only for
         * ReadCommitted cursors and non-transactional cursors that do not
         * retain locks.
         */
        if (!retainNonTxnLocks && locker != closingCursor.locker) {
            locker.setClosingLocker(closingCursor.locker);
        }
    }

    /**
     * Called when a cursor move operation is complete.  Clears the
     * closingLocker so that a reference to the old closed locker is not held.
     */
    public void clearClosingLocker() {
        locker.setClosingLocker(null);
    }

    public CacheMode getCacheMode() {
        return cacheMode;
    }

    /**
     * Sets the effective cache mode to use for the next operation.  The
     * cacheMode field will never be set to null, and can be passed directly to
     * latching methods.
     *
     * @see #performCacheModeEviction
     */
    public void setCacheMode(final CacheMode mode) {
        cacheMode = mode;
    }

    public void setTreeStatsAccumulator(TreeWalkerStatsAccumulator tSA) {
        maybeInitTreeStatsAccumulator();
        treeStatsAccumulatorTL.set(tSA);
    }

    private void maybeInitTreeStatsAccumulator() {
        if (treeStatsAccumulatorTL == null) {
            treeStatsAccumulatorTL = new ThreadLocal<>();
        }
    }

    private TreeWalkerStatsAccumulator getTreeStatsAccumulator() {
        if (EnvironmentImpl.getThreadLocalReferenceCount() > 0) {
            maybeInitTreeStatsAccumulator();
            return treeStatsAccumulatorTL.get();
        } else {
            return null;
        }
    }

    public void incrementLNCount() {
        TreeWalkerStatsAccumulator treeStatsAccumulator =
            getTreeStatsAccumulator();
        if (treeStatsAccumulator != null) {
            treeStatsAccumulator.incrementLNCount();
        }
    }

    public int getIndex() {
        return index;
    }

    public BIN getBIN() {
        return bin;
    }

    public void setIndex(int idx) {
        index = idx;
    }

    public void setOnFirstSlot() {
        assert(bin.isLatchOwner());
        index = 0;
    }

    public void setOnLastSlot() {
        assert(bin.isLatchOwner());
        index = bin.getNEntries() - 1;
    }

    public boolean isOnBIN(BIN bin) {
        return this.bin == bin;
    }

    public void assertBIN(BIN bin) {
        assert this.bin == bin :
            "nodeId=" + bin.getNodeId() +
            " cursor=" + dumpToString(true);
    }

    public boolean isOnSamePosition(CursorImpl other) {
        return bin == other.bin && index == other.index;
    }

    public void setBIN(BIN newBin) {

        /*
         * Historical note. In the past we checked here that the cursor was
         * removed for the prior BIN by calling BIN.containsCursor [#16280].
         * Because the containsCursor method takes a latch on the prior BIN,
         * this causes a rare latch deadlock when newBin is latched (during an
         * insert, for example), since this thread will latch two BINs in
         * arbitrary order; so the assertion was removed [#21395].
         */
        bin = newBin;
    }

    public void latchBIN() {
        while (bin != null) {
            BIN waitingOn = bin;
            waitingOn.latch(cacheMode);
            if (bin == waitingOn) {
                return;
            }
            waitingOn.releaseLatch();
        }
    }

    public void releaseBIN() {
        if (bin != null) {
            bin.releaseLatchIfOwner();
        }
    }

    void addCursor(BIN bin) {
        if (bin != null) {
            assert bin.isLatchExclusiveOwner();
            bin.addCursor(this);
        }
    }

    /**
     * Add to the current cursor.
     */
    void addCursor() {
        if (bin != null) {
            addCursor(bin);
        }
    }

    /**
     * Change cursor to point to the given BIN/index.  If the new BIN is
     * different, then old BIN must be unlatched and the new BIN must be
     * latched.
     */
    private void setPosition(BIN newBin, int newIndex) {
        if (bin != newBin) {
            if (bin != null) {
                latchBIN();
                bin.removeCursor(this);
                bin.releaseLatch();
            }
            setBIN(newBin);
            addCursor();
        }
        setIndex(newIndex);
    }

    /**
     * Called for creating trace messages without any latching.
     */
    public long getCurrentNodeId() {
        final BIN b = bin;
        return (b == null ? -1 : b.getNodeId());
    }

    public static long getCurrentLsn(final CursorImpl cursor) {

        if (cursor == null) {
            return DbLsn.NULL_LSN;
        }

        cursor.latchBIN();
        final long lsn = cursor.getCurrentLsn();
        cursor.releaseBIN();

        return lsn;
    }

    public long getCurrentLsn() {

        assert(bin != null && bin.isLatchOwner());
        assert(index >= 0 && index < bin.getNEntries());

        return bin.getLsn(index);
    }

    public byte[] getCurrentKey() {
        return getCurrentKey(false);
    }

    /**
     * Returns the key at the current position, regardless of whether the
     * record is defunct.  Does not lock. The key returned is not a copy and
     * may not be returned directly to the user without copying it first.
     *
     * The cursor must be initialized.
     *
     * TODO:
     * The returned byte array is normally, but not always a copied, and then
     * copied again into the user's DatabaseEntry. If this method always
     * returns a copy, the extra copy into DatabaseEntry could be avoided.
     */
    public byte[] getCurrentKey(boolean isLatched) {

        if (!isLatched) {
            latchBIN();
        }

        try {
            assert(bin != null);
            assert(index >= 0 && index < bin.getNEntries());

            return bin.getKey(index);
        } finally {
            if (!isLatched) {
                releaseBIN();
            }
        }
    }

    public boolean isProbablyExpired() {
        latchBIN();
        try {
            return bin.isProbablyExpired(index);
        } finally {
            releaseBIN();
        }
    }

    public long getExpirationTime() {
        latchBIN();
        try {
            return TTL.expirationToSystemTime(
                bin.getExpiration(index), bin.isExpirationInHours());
        } finally {
            releaseBIN();
        }
    }

    private void setInitialized() {
        status = CURSOR_INITIALIZED;
    }

    /**
     * @return true if this cursor is closed
     */
    public boolean isClosed() {
        return (status == CURSOR_CLOSED);
    }

    /**
     * @return true if this cursor is not initialized
     */
    public boolean isNotInitialized() {
        return (status == CURSOR_NOT_INITIALIZED);
    }

    public boolean isInternalDbCursor() {
        return dbImpl.isInternalDb();
    }

    public boolean hasDuplicates() {
        return dbImpl.getSortedDuplicates();
    }

    /**
     * For a non-sticky cursor, this method is called when the cursor is
     * initialized and an advancing operation (next/prev/skip) is about to be
     * performed. The cursor position is not reset as it would be if the
     * operation were a search or an insertion, for example.
     */
    public void beforeNonStickyOp() {

        /*
         * When the cache mode dictates that we evict the LN or BIN, we evict
         * the LN here before the cursor's position changes. We can assume that
         * either the position will change or the cursor will be reset. The BIN
         * is evicted later.
         */
        if (cacheMode != CacheMode.DEFAULT &&
            cacheMode != CacheMode.KEEP_HOT) {

            latchBIN();
            try {
                performCacheModeLNEviction();
            } finally {
                releaseBIN();
            }
        }

        releaseNonTxnLocks();

        criticalEviction();
    }

    /**
     * For a non-sticky cursor, this method is called after a successful
     * operation. The cursor position is not reset as it would be if the
     * operation failed.
     */
    public void afterNonStickyOp() {

        /*
         * To implement BIN eviction for a non-sticky cursor we must save the
         * prior BIN, and only evict it after the operation and only when the
         * BIN changes. The prior BIN is evicted after the operation (in this
         * method) and when the cursor is reset or closed.
         */
        performPriorBINEviction();

        if (priorBIN == null) {
            priorBIN = bin;
        }

        criticalEviction();
    }

    /**
     * Reset a cursor to an uninitialized state, but unlike close(), allow it
     * to be used further.
     */
    public void reset() {

        /* Must remove cursor before evicting BIN and releasing locks. */
        removeCursorAndPerformCacheEviction(null /*newCursor*/);

        releaseNonTxnLocks();

        /* Perform eviction before and after each cursor operation. */
        criticalEviction();
    }

    private void clear() {
        bin = null;
        index = -1;
        status = CURSOR_NOT_INITIALIZED;
        currentRecordVersion = null;
        storageSize = 0;
        priStorageSize = 0;
        nSecWrites = 0;
        priorBIN = null;
    }

    private void releaseNonTxnLocks() {
        if (!retainNonTxnLocks) {
            locker.releaseNonTxnLocks();
        }
    }

    public void close() {
        close(null);
    }

    /**
     * Close a cursor.
     *
     * @param newCursor is another cursor that is kept open by the parent
     * Cursor object, or null if no other cursor is kept open.
     */
    public void close(final CursorImpl newCursor) {

        assert assertCursorState(
            false /*mustBeInitialized*/, false /*mustNotBeInitialized*/);

        /* Must remove cursor before evicting BIN and releasing locks. */
        removeCursorAndPerformCacheEviction(newCursor);

        locker.unRegisterCursor(this);

        if (!retainNonTxnLocks) {
            locker.nonTxnOperationEnd();
        }

        status = CURSOR_CLOSED;

        /* Perform eviction before and after each cursor operation. */
        criticalEviction();
    }

    private void removeCursorAndPerformCacheEviction(CursorImpl newCursor) {

        performPriorBINEviction();

        latchBIN();

        if (bin == null) {
            clear(); // ensure that state is uninitialized
            return;
        }

        try {
            /* Must remove cursor before evicting BIN. */
            bin.removeCursor(this);
            performCacheModeEviction(newCursor); // may release latch
        } finally {
            releaseBIN();
            clear();
        }
    }

    /**
     * Performs cache mode-based eviction but for the prior BIN only. This is
     * called after a successful operation using a non-sticky cursor. The prior
     * BIN is evicted only if the BIN has changed.
     */
    private void performPriorBINEviction() {

        if (priorBIN == null || priorBIN == bin) {
            return;
        }

        /*
         * This priorBIN should not be processed again, and setting it to null
         * enables the setting of a new priorBIN.
         */
        BIN binToEvict = priorBIN;
        priorBIN = null;

        /* Short circuit modes that do not perform BIN eviction. */
        if (cacheMode== CacheMode.DEFAULT ||
            cacheMode == CacheMode.KEEP_HOT ||
            cacheMode == CacheMode.EVICT_LN) {
            return;
        }

        binToEvict.latch(CacheMode.UNCHANGED);
        try {
            performCacheModeBINEviction(binToEvict);
        } finally {
            binToEvict.releaseLatchIfOwner();
        }
    }

    /**
     * Disables or enables eviction during cursor operations.  For example, a
     * cursor used to implement eviction (e.g., in some UtilizationProfile and
     * most DbTree and VLSNIndex methods) should not itself perform eviction,
     * but eviction should be enabled for user cursors.  Eviction is disabled
     * by default.
     */
    public void setAllowEviction(boolean allowed) {
        allowEviction = allowed;
    }

    public void criticalEviction() {

        /*
         * In addition to disabling critical eviction for internal cursors (see
         * setAllowEviction above), we do not perform critical eviction when
         * UNCHANGED, EVICT_BIN or MAKE_COLD is used and the BIN is not dirty.
         * Operations using these modes for a non-dirty BIN generally do not
         * add any net memory to the cache, so they shouldn't have to perform
         * critical eviction or block while another thread performs eviction.
         */
        if (allowEviction &&
            ((bin != null && bin.getDirty()) ||
             (cacheMode != CacheMode.UNCHANGED &&
              cacheMode != CacheMode.EVICT_BIN &&
              cacheMode != CacheMode.MAKE_COLD))) {
            dbImpl.getEnv().criticalEviction(false /*backgroundIO*/);
        }
    }

    /**
     * When multiple operations are performed, CacheMode-based eviction is
     * performed for a given operation at the end of the next operation, which
     * calls close() or reset() on the CursorImpl of the previous operation.
     * Eviction for the last operation (including when only one operation is
     * performed) also occurs during Cursor.close(), which calls
     * CursorImpl.close().
     *
     * By default, the CacheMode returned by DatabaseImpl.getCacheMode is used,
     * and the defaults specified by the user for the Database or Environment
     * are applied.  However, the default mode can be overridden by the user by
     * calling Cursor.setCacheMode, and the mode may be changed prior to each
     * operation, if desired.
     *
     * To implement a per-operation CacheMode, two CacheMode fields are
     * maintained.  Cursor.cacheMode is the mode to use for the next operation.
     * CursorImpl.cacheMode is the mode that was used for the previous
     * operation, and that is used for eviction when that CursorImpl is closed
     * or reset.
     *
     * This method must be called with the BIN latched but may release it,
     * namely when the BIN is evicted.
     */
    private void performCacheModeEviction(final CursorImpl newCursor) {

        /* Short circuit modes that do not perform LN or BIN eviction. */
        if (cacheMode == CacheMode.DEFAULT ||
            cacheMode == CacheMode.KEEP_HOT) {
            return;
        }

        final boolean movedOffBin;
        final boolean movedOffLn;

        if (newCursor != null) {
            movedOffBin = (bin != newCursor.bin);
            movedOffLn = (movedOffBin || index != newCursor.index);
        } else {
            movedOffBin = true;
            movedOffLn = true;
        }

        if (movedOffLn) {
            performCacheModeLNEviction();
        }

        /* Short circuit modes that do not perform BIN eviction. */
        if (cacheMode == CacheMode.EVICT_LN) {
            return;
        }

        if (movedOffBin) {
            performCacheModeBINEviction(bin);
        }
    }

    /**
     * Performs the LN portion of CacheMode eviction. The BIN is latched on
     * method entry and exit. Must be called only for CacheMode.EVICT_LN,
     * EVICT_BIN, UNCHANGED and MAKE_COLD.
     */
    private void performCacheModeLNEviction() {
        switch (cacheMode) {
        case EVICT_LN:
        case EVICT_BIN:
            evictLN(true /*isLatched*/, false /*ifFetchedCold*/);
            break;
        case UNCHANGED:
        case MAKE_COLD:
            evictLN(true /*isLatched*/, true /*ifFetchedCold*/);
            break;
        default:
            assert false;
        }
    }

    /**
     * Performs the BIN portion of CacheMode eviction. The BIN is latched on
     * method entry, but may or may not be latched on exit. Must be called only
     * for CacheMode.EVICT_BIN, UNCHANGED and MAKE_COLD.
     */
    private void performCacheModeBINEviction(BIN binToEvict) {
        switch (cacheMode) {
        case EVICT_BIN:
            evictBIN(binToEvict, CacheMode.EVICT_BIN);
            break;
        case UNCHANGED:
        case MAKE_COLD:
            if (binToEvict.getFetchedCold()) {
                evictBIN(binToEvict, CacheMode.UNCHANGED);
            }
            break;
        default:
            assert false;
        }
    }

    /**
     * Evict the given BIN. Must already be latched. The latch will be released
     * inside the doCacheModeEvict() call.
     */
    private void evictBIN(BIN binToEvict, CacheMode cacheMode) {

        dbImpl.getEnv().getEvictor().doCacheModeEvict(binToEvict, cacheMode);
    }

    /**
     * Evict the LN node at the cursor position.
     */
    public void evictLN() {
        evictLN(false /*isLatched*/, false /*ifFetchedCold*/);
    }

    /**
     * Evict the LN node at the cursor position.
     */
    private void evictLN(boolean isLatched, boolean ifFetchedCold) {
        try {
            if (!isLatched) {
                latchBIN();
            }
            if (index >= 0) {
                bin.evictLN(index, ifFetchedCold);
            }
        } finally {
            if (!isLatched) {
                releaseBIN();
            }
        }
    }

    private boolean shouldEmbedLN(byte[] data) {

        return data.length <= dbImpl.getEnv().getMaxEmbeddedLN() &&
            !dbImpl.getSortedDuplicates() &&
            !dbImpl.getDbType().isInternal();
    }

    /**
     * Delete the item pointed to by the cursor. If the item is already
     * defunct, return KEYEMPTY. Returns with nothing latched.
     */
    public OperationResult deleteCurrentRecord(ReplicationContext repContext) {

        assert assertCursorState(
            true /*mustBeInitialized*/, false /*mustNotBeInitialized*/);

        final EnvironmentImpl envImpl = dbImpl.getEnv();
        final DbType dbType = dbImpl.getDbType();
        final long currLsn;
        final LogItem logItem;

        boolean  success = false;
        
        latchBIN();

        try {
            /*
             * Get a write lock. An uncontended lock is permitted because we
             * will log a new LN before releasing the BIN latch.
             */
            final LockStanding lockStanding = lockLN(
                LockType.WRITE, true /*allowUncontended*/, false /*noWait*/);

            if (!lockStanding.recordExists()) {
                revertLock(lockStanding);
                success = true;
                return null;
            }

            currLsn = lockStanding.lsn;
            assert(currLsn != DbLsn.NULL_LSN);
            final boolean currEmbeddedLN = bin.isEmbeddedLN(index);
            final int currLoggedSize = bin.getLastLoggedSize(index);
            final byte[] currKey = bin.getKey(index);

            final int expiration = bin.getExpiration(index);
            final boolean expirationInHours = bin.isExpirationInHours();

            /*
             * Must fetch LN if the LN is not embedded and any of the following
             * are true:
             *  - CLEANER_FETCH_OBSOLETE_SIZE is configured and lastLoggedSize
             *    is unknown
             *  - this database does not use the standard LN class and we
             *    cannot call DbType.createdDeletedLN further below
             * For other cases, we are careful not to fetch, in order to avoid
             * a random read during a delete operation.
             */
            LN ln;
            if ((currLoggedSize == 0 &&
                 !currEmbeddedLN &&
                 envImpl.getCleaner().getFetchObsoleteSize(dbImpl)) ||
                !dbType.mayCreateDeletedLN()) {

                ln = bin.fetchLN(index, cacheMode);
                if (ln == null) {
                    /* An expired LN was purged. */
                    revertLock(lockStanding);
                    success = true;
                    return null;
                }
            } else {
                ln = bin.getLN(index, cacheMode);
            }

            /*
             * Make the existing LN deleted, if cached; otherwise, create a
             * new deleted LN (with ln.data == null), but do not attach it
             * to the tree yet.
             */
            long oldLNMemSize = 0;
            if (ln != null) {
                oldLNMemSize = ln.getMemorySizeIncludedByParent();
                ln.delete();
            } else {
                ln = dbType.createDeletedLN(envImpl);
            }

            /* Get a wli to log. */
            final WriteLockInfo wli = lockStanding.prepareForUpdate(bin, index);

            /* Log the deleted record version and lock its new LSN. */
            logItem = ln.optionalLog(
                envImpl, dbImpl, locker, wli,
                currEmbeddedLN /*newEmbeddedLN*/, currKey /*newKey*/,
                expiration, expirationInHours,
                currEmbeddedLN, currLsn, currLoggedSize,
                false/*isInsertion*/, repContext);

            /*
             * Now update the parent BIN to reference the logrec written
             * above, set the PD flag on, and do the BIN memory counting.
             */
            bin.deleteRecord(
                index, oldLNMemSize, logItem.lsn,
                ln.getVLSNSequence(), logItem.size);

            /*
             * If the LN is not cached, we don't need to attach the LN to the
             * tree, because as long as the PD flag is on, the record's data
             * will  never be accessed. But for DW DBs, we must attach the LN
             * because no logrec was generated above, and as a result, the LN
             * must be in the tree so that a logrec will be generated when
             * a db.sync() occurs later (that logrec is needed for crash
             * recovery, because BINs are not replayed during crash recovery).
             *
             * If the LN child is cached, it is desirable to evict it because
             * as long as the PD flag is on, the record's data will  never be
             * accessed. But for DW DBs we should not evict the dirty LN since
             * it will be logged unnecessarily.
             */
            if (bin.getTarget(index) == null) {
                if (dbImpl.isDeferredWriteMode()) {
                    bin.attachNode(index, ln, null /*lnKey*/);
                }
            } else {
                if (!dbImpl.isDeferredWriteMode()) {
                    bin.evictLN(index);
                }
            }

            /* Cache record version/size for delete operation. */
            setCurrentVersion(ln.getVLSNSequence(), logItem.lsn);
            setStorageSize();

            locker.addDeleteInfo(bin);
            success = true;

            trace(Level.FINER, TRACE_DELETE, bin, index, currLsn, logItem.lsn);

            return DbInternal.makeResult(expiration, expirationInHours);

        } finally {

            if (success &&
                !dbImpl.isInternalDb() &&
                bin != null &&
                bin.isBINDelta()) {
                dbImpl.getEnv().incBinDeltaDeletes();
            }

            releaseBIN();
        }
    }

    /**
     * Modify the current record with the given data, and optionally replace
     * the key.
     *
     * @param key The new key value for the BIN slot S to be updated. Cannot
     * be partial. For a no-dups DB, it is null. For dups DBs it is a 2-part
     * key combining the current primary key of slot S with the original,
     * user-provided data. "key" (if not null) must compare equal to S.key
     * (otherwise DuplicateDataException is thrown), but the 2 keys may not
     * be identical if custom comparators are used. So, S.key will actually
     * be replaced by "key".
     *
     * @param data The new data to (perhaps partially) replace the data of the
     * LN associated with the BIN slot. For dups DBs it is EMPTY_DUPS_DATA.
     * Note: for dups DBs the original, user-provided "data" must not be
     * partial.
     *
     * @param returnOldData To receive the old LN data (before the update).
     * It is needed only by DBs with indexes/triggers; will be null otherwise.
     *
     * @param returnNewData To receive the full data of the updated LN.
     * It is needed only by DBs with indexes/triggers and only if "data" is
     * partial; will be null otherwise. Note: "returnNewData" may be different
     * than "data" only if "data" is partial.
     *
     * @return OperationResult, or null if an expired LN was purged and a
     * partial 'data' param was supplied.
     */
    public OperationResult updateCurrentRecord(
        DatabaseEntry key,
        DatabaseEntry data,
        ExpirationInfo expInfo,
        DatabaseEntry returnOldData,
        DatabaseEntry returnNewData,
        ReplicationContext repContext) {

        assert assertCursorState(
            true /*mustBeInitialized*/, false /*mustNotBeInitialized*/);

        if (returnOldData != null) {
            returnOldData.setData(null);
        }
        if (returnNewData != null) {
            returnNewData.setData(null);
        }

        final LockStanding lockStanding;
        OperationResult result = null;
        boolean success = false;

        latchBIN();

        try {
            /* Get a write lock. */
            lockStanding = lockLN(
                LockType.WRITE, true /*allowUncontended*/, false /*noWait*/);

            if (!lockStanding.recordExists()) {
                revertLock(lockStanding);
            } else {
                result = updateRecordInternal(
                    (key != null ? Key.makeKey(key) : null), data,
                    expInfo, returnOldData, returnNewData,
                    lockStanding, repContext);
            }

            success = true;
            return result;

        } finally {

            if (success &&
                !dbImpl.isInternalDb() &&
                bin != null &&
                bin.isBINDelta()) {
                dbImpl.getEnv().incBinDeltaUpdates();
            }

            releaseBIN();
        }
    }

    /**
     * Insert the given record (key + LN) in the tree or return false if the
     * key is already present.
     *
     * The cursor must initially be uninitialized.
     *
     * This method is called directly internally for putting tree map LNs
     * and file summary LNs.
     *
     * @return true if we inserted a slot or reused a slot having a defunct
     * record, or false if the insertion failed because a non-defunct record
     * exists with the given key.
     */
    public boolean insertRecord(
        byte[] key,
        LN ln,
        boolean blindInsertion,
        ReplicationContext repContext) {

        assert assertCursorState(
            false /*mustBeInitialized*/, true /*mustNotBeInitialized*/);
        if (LatchSupport.TRACK_LATCHES) {
            LatchSupport.expectBtreeLatchesHeld(0);
        }

        try {
            final Pair result =
                insertRecordInternal(
                    key, ln, null /*expirationInfo*/, blindInsertion,
                    null /*returnNewData*/, repContext);

            return result.second() != null;
        } finally {
            releaseBIN();
        }
    }

    /**
     * Insert or update a given record. The method searches for the record
     * using its key. It will perform an update if the record is found, 
     * otherwise an insertion.
     *
     * The cursor must initially be uninitialized.
     *
     * Called by all the Cursor.putXXX() ops, except putCurrent().
     *
     * @param key The new key value for the BIN slot S to be inserted/updated.
     * Cannot be partial. For dups DBs it is a 2-part key combining the
     * original, user-provided key and data. In case of update, "key" must
     * compare equal to S.key (otherwise DuplicateDataException is thrown),
     * but the 2 keys may not be identical if custom comparators are used.
     * So, S.key will actually be replaced by "key".
     *
     * @param data In case of update, the new data to (perhaps partially)
     * replace the data of the LN associated with the BIN slot. For dups DBs
     * it is EMPTY_DUPS_DATA. Note: for dups DBs the original, user-provided
     * "data" must not be partial.
     *
     * @param ln is normally a new LN node that is created for insertion, and
     * will be discarded if an update occurs.  However, HA will pass an
     * existing node.
     *
     * @param putMode OVERWRITE or NO_OVERWRITE
     *
     * @param returnOldData To receive, in case of update, the old LN data
     * (before the update). It is needed only by DBs with indexes/triggers;
     * will be null otherwise.
     *
     * @param returnNewData To receive the full data of the new or updated LN.
     * It is needed only by DBs with indexes/triggers and only if "data" is
     * partial; will be null otherwise. Note: "returnNewData" may be different
     * than "data" only if "data" is partial.
     *
     * @return OperationResult where isUpdate() distinguishes insertions and
     * updates. Is null only if an expired LN was purged and a partial 'data'
     * param was supplied.
     */
    public OperationResult insertOrUpdateRecord(
        final DatabaseEntry key,
        final DatabaseEntry data,
        final LN ln,
        final ExpirationInfo expInfo,
        final PutMode putMode,
        final DatabaseEntry returnOldData,
        final DatabaseEntry returnNewData,
        final ReplicationContext repContext) {

        assert key != null;
        assert data != null;
        assert ln != null;
        assert putMode != null;
        assert assertCursorState(
            false /*mustBeInitialized*/, true /*mustNotBeInitialized*/);
        if (LatchSupport.TRACK_LATCHES) {
            LatchSupport.expectBtreeLatchesHeld(0);
        }

        if (putMode != PutMode.OVERWRITE &&
            putMode != PutMode.NO_OVERWRITE) {
            throw EnvironmentFailureException.unexpectedState(
                putMode.toString());
        }

        boolean success = false;
        boolean inserted = false;

        byte[] keyCopy = Key.makeKey(key);

        try {

            /*
             * Try to insert the key/data pair as a new record. Will succeed if
             * the record does not exist in the DB already. Otherwise, the 
             * insertRecord() returns with the cursor registered on the slot
             * whose key is equal to "key", with the LSN of that slot locked
             * in WRITE mode, and with the containing BIN latched. 
             */
            Pair insertResult =
                insertRecordInternal(
                    keyCopy, ln, expInfo,
                    false /*blindInsertion*/,
                    returnNewData, repContext);

            if (insertResult.second() != null) {
                inserted = true;
                success = true;
                return insertResult.second();
            }

            /*
             * There is a non-defunct slot whose key is == "key". So, this is
             * going to be an update. Note: Cursor has been registered on the
             * existing slot by insertRecord()
             */
            if (putMode == PutMode.NO_OVERWRITE) {
                success = true;
                return null;
            }

            /*
             * Update the non-defunct record at the cursor position. We have
             * optimized by preferring to take an uncontended lock. The
             * lockStanding var is guaranteed to be non-null in this case.
             * The BIN must remain latched when calling this method.
             */
            final OperationResult result = updateRecordInternal(
                keyCopy, data, expInfo,
                returnOldData, returnNewData,
                insertResult.first(), repContext);
       
            success = true;
            return result;

        } finally {

            if (success &&
                !dbImpl.isInternalDb() &&
                bin != null &&
                bin.isBINDelta()) {
                if (inserted) {
                    dbImpl.getEnv().incBinDeltaInserts();
                } else {
                    dbImpl.getEnv().incBinDeltaUpdates();
                }
            }

            releaseBIN();
        }
    }

    /**
     * Try to insert the key/data pair as a new record. Will succeed if a
     * non-defunct record does not exist already with the given key.
     *
     * The cursor must initially be uninitialized.
     *
     * On return, this.bin is latched.
     *
     * @return a non-null pair of LockStanding and OperationResult.
     *
     *   + LockStanding will be non-null if a slot with the given key already
     *     exists, whether or not we reuse the slot for this record (i.e.,
     *     whether or not the result is non-null). In other words, we always
     *     lock the record in an existing slot for the give key.
     *
     *   + OperationResult will be non-null if we inserted a slot or reused a
     *     slot having a defunct record, or null if the insertion failed
     *     because a non-defunct record exists with the given key.
     */
    private Pair insertRecordInternal(
        final byte[] key,
        final LN ln,
        ExpirationInfo expInfo,
        final boolean blindInsertion,
        final DatabaseEntry returnNewData,
        final ReplicationContext repContext) {

        final EnvironmentImpl envImpl = dbImpl.getEnv();
        final Tree tree = dbImpl.getTree();
        WriteLockInfo wli;
        LockStanding lockStanding = null;
        final boolean isSlotReuse;
        final long currLsn;

        final boolean currEmbeddedLN;
        final boolean newEmbeddedLN;
        final byte[] data;

        if (shouldEmbedLN(ln.getData())) {
            data = ln.getData();
            newEmbeddedLN = true;
        } else {
            newEmbeddedLN = false;
            data = null;
        }

        if (expInfo == null) {
            expInfo = ExpirationInfo.DEFAULT;
        }

        /*
         * At this point, this cursor does not have a position so it cannot be
         * registered with the BIN that will be used. This is good because it
         * allows slot compression to occur before BIN splits (thus avoiding
         * splits if compression finds and removes any defunct slots). However,
         * if another cursor, including the one from which this was cloned, is
         * registered with the BIN, then splits won't be allowed. This is a
         * good reason to use non-sticky cursors for insertions, especially
         * sequential insertions since they will often end up in the same BIN.
         *
         * Find and latch the BIN that should contain the "key". On return from
         * the tree search, this.bin is latched, but "this" is still not
         * registered.
         */
        bin = tree.findBinForInsert(key, getCacheMode());

        /*
         * In the case where logging occurs before locking, allow lockers to
         * reject the operation (e.g., if writing on a replica) and also
         * prepare to undo in the (very unlikely) event that logging succeeds
         * but locking fails. Call this method BEFORE slot insertion, in case
         * it throws an exception which would leave the slot with a null LSN.
         *
         * For Txn, creates the writeInfo map (if not done already), and
         * inserts dbImpl in the undoDatabases map. Noop for other
         * non-HA lockers.
         */
        locker.preLogWithoutLock(dbImpl);

        /*
         * If the key exists already, insertEntry1() does not insert, but
         * returns the index of the existing key.
         *
         * If bin is a delta and it does not contain the key, then:
         * (a) if blindInsertion is false, insertEntry1() will mutate it to a
         * full BIN and check again if the key exists or not.
         * (b) if blindInsertion is true, insertEntry1() will not mutate the
         * delta; it will just insert the key into the delta. This is OK,
         * because blindInsertion will be true only if we know already that the
         * key does not exist in the tree.
         */
        int insertIndex = bin.insertEntry1(
            ln, key, data, DbLsn.NULL_LSN, blindInsertion);

        if ((insertIndex & IN.INSERT_SUCCESS) == 0) {
            /*
             * Key exists. Insertion was not successful. Register the cursor on
             * the existing slot. If the slot is defunct, the key does not
             * really exist and the slot can be reused to do an insertion.
             */
            isSlotReuse = true;

            setIndex(insertIndex);
            addCursor();
            setInitialized();

            /*
             * Lock the LSN for the existing LN slot, and check defunct-ness.
             * An uncontended lock request is permitted because we are holding
             * the bin latch. If no locker holds a lock on the slot, then no
             * lock is taken by this cursor either.
             */
            lockStanding = lockLN(
                LockType.WRITE, true /*allowUncontended*/, false /*noWait*/);
            assert(lockStanding != null);

            if (lockStanding.recordExists()) {
                return new Pair<>(lockStanding, null);
            }

            /*
             * The record in the current slot is defunct. Note: it may have
             * been made defunct by this.locker itself.
             */
            currLsn = lockStanding.lsn;
            currEmbeddedLN = bin.isEmbeddedLN(index);

            /*
             * Create a new WriteLockInfo or get an existing one for the LSN
             * of the current slot, and set its abortLSN and abortKD fields,
             * if needed, i.e, if it is not the current txn the one who created
             * this LSN. The abortLSN and abortKD fields of the wli will be
             * included in the new logrec.
             */
            wli = lockStanding.prepareForUpdate(bin, index);

        } else {
            /*
             * Register the cursor at the slot that has been successfully
             * inserted.
             */
            isSlotReuse = false;
            currEmbeddedLN = newEmbeddedLN;
            currLsn = DbLsn.NULL_LSN;

            setIndex(insertIndex & ~IN.INSERT_SUCCESS);
            addCursor();
            setInitialized();

            /* Create a new WriteLockInfo */
            wli = LockStanding.prepareForInsert(bin);
        }

        /*
         * Log the new LN and lock the LSN of the new logrec in WRITE mode.
         * Note: in case of slot reuse, we pass NULL_LSN for the oldLsn param
         * because the old defunct LN is counted obsolete by other means.
         */
        LogItem logItem = null;
        try {
            logItem = ln.optionalLog(
                envImpl, dbImpl, locker, wli,
                newEmbeddedLN, key,
                expInfo.expiration, expInfo.expirationInHours,
                currEmbeddedLN, currLsn, 0/*currSize*/,
                true/*isInsertion*/, repContext);
        } finally {
            if (logItem == null && !isSlotReuse) {
                /*
                 * Possible buffer overflow, out-of-memory, or I/O exception
                 * during logging. The BIN entry will contain a NULL_LSN. To
                 * prevent an exception during a future fetchLN() call, we
                 * set the KD flag. We do not call BIN.deleteEntry because it
                 * does not adjust cursors. We do not add this entry to the
                 * compressor queue to avoid complexity (this situation is
                 * rare).
                 */
                bin.setKnownDeletedAndEvictLN(index);
            }
        }

        assert logItem != null;

        if (lockStanding == null) {
            /*
             * No slot reuse; straight insertion. Update LSN in BIN slot.
             * The LN is already in the slot.
             */
            bin.updateEntry(
                index, logItem.lsn, ln.getVLSNSequence(),
                logItem.size);

            bin.setExpiration(
                index, expInfo.expiration, expInfo.expirationInHours);

            /*
             * The following call accounts for extra marshaled memory, i.e.,
             * memory that was added to the LN as a side-effect of logging it.
             * This can happen for FileSummaryLN's only (it is a noop for
             * other kinds of LNs).
             *
             * To avoid violating assertions (e.g., in IN.changeMemorySize), we
             * must must finish the memory adjustment while the BIN is still
             * latched. [#20069]
             *
             * This special handling does not apply to slot reuse, because the
             * updateEntry() version used in the slot reuse case will recalc
             * the BIN memory from scratch, and as a result, will take into
             * account the extra marshaled memory. [#20845]
             */
            if (bin.getTarget(index) == ln) {
                ln.addExtraMarshaledMemorySize(bin);
            }

        } else {

            /*
             * Slot reuse. When reusing a slot, the key is replaced in the BIN
             * slot. This ensures that the correct key value is used when the
             * new key is non-identical to the key in the slot but is
             * considered equal by the btree comparator.
             */
            bin.insertRecord(
                index, ln, logItem.lsn, logItem.size, key, data,
                expInfo.expiration, expInfo.expirationInHours);
        }

        if (returnNewData != null) {
            returnNewData.setData(null);
            ln.setEntry(returnNewData);
        }

        /* Cursor is positioned on new record. */
        setInitialized();

        /* Cache record version/size for insertion operation. */
        setCurrentVersion(ln.getVLSNSequence(), bin.getLsn(index));
        setStorageSize();

        /*
         * It is desirable to evict the LN in a duplicates DB because it will
         * never be fetched again. But for deferred-write DBs we should not
         * evict a dirty LN since it may be logged unnecessarily.
         */
        if (dbImpl.getSortedDuplicates() &&
            !dbImpl.isDeferredWriteMode() &&
            bin.getTarget(index) != null) {
            bin.evictLN(index);
        }

        traceInsert(Level.FINER, bin, logItem.lsn, index);

        return new Pair<>(
            lockStanding,
            DbInternal.makeResult(
                expInfo.expiration, expInfo.expirationInHours));
    }

    /**
     * Update the record where the cursor is currently positioned at. The
     * cursor is registered with this position, the associated bin is latched,
     * the BIN slot is not defunct, and it has been locked in WRITE mode.
     *
     * @param returnOldData if non-null, will be filled in with the
     * pre-existing record's data. However, if an expired LN was purged, it
     * will not be filled in and the caller should expect this; see {@link
     * Cursor#putNotify}.
     *
     * @return OperationResult, or null if an expired LN was purged and a
     * partial 'data' param was supplied.
     */
    private OperationResult updateRecordInternal(
        final byte[] key,
        final DatabaseEntry data,
        final ExpirationInfo expInfo,
        final DatabaseEntry returnOldData,
        final DatabaseEntry returnNewData,
        final LockStanding lockStanding,
        final ReplicationContext repContext) {

        assert(lockStanding.recordExists());

        final EnvironmentImpl envImpl = dbImpl.getEnv();
        final DbType dbType = dbImpl.getDbType();

        final long currLsn = lockStanding.lsn;
        assert(currLsn != DbLsn.NULL_LSN);
        final int currLoggedSize = bin.getLastLoggedSize(index);
        final byte[] currKey = bin.getKey(index);
        final byte[] currData;

        final boolean currEmbeddedLN = bin.isEmbeddedLN(index);
        final boolean newEmbeddedLN;

        final LogItem logItem;

        /*
         * Must fetch LN if it is not embedded and any of the following
         * are true:
         *  - returnOldData is non-null: data needs to be returned
         *  - data is a partial entry: needs to be resolved
         *  - CLEANER_FETCH_OBSOLETE_SIZE is configured and lastLoggedSize
         *    is unknown
         *  - this database does not use the standard LN class and we
         *    cannot call DbType.createdUpdatedLN further below (this is
         *    the case for NameLNs, MapLNs, and FileSummaryLNs).
         * For other cases, we are careful not to fetch, in order to avoid
         * a random read during an update operation.
         */
        LN ln;
        if (returnOldData != null ||
            data.getPartial() ||
            (currLoggedSize == 0 &&
             !currEmbeddedLN &&
             envImpl.getCleaner().getFetchObsoleteSize(dbImpl)) ||
            !dbType.mayCreateUpdatedLN()) {

            if (currEmbeddedLN) {
                currData = bin.getData(index);
                ln = bin.getLN(index, cacheMode);
            } else {
                ln = bin.fetchLN(index, cacheMode);
                currData = (ln != null ? ln.getData() : null);
            }
        } else {
            ln = bin.getLN(index, cacheMode);
            currData = (ln != null ? ln.getData() : null);
        }

        final byte[] newData;
        if (data.getPartial()) {
            if (currData == null) {
                /* Expired LN was purged. Cannot use a partial entry. */
                return null;
            }
            newData = LN.resolvePartialEntry(data, currData);
        } else {
            newData = LN.copyEntryData(data);
        }

        /*
         * If the key is changed (according to the comparator), we assume
         * it is actually the data that has changed for a duplicate's DB.
         */
        if (key != null &&
            Key.compareKeys(
                currKey, key, dbImpl.getKeyComparator()) != 0) {
            throw new DuplicateDataException(
                "Can't replace a duplicate with new data that is not " +
                "equal to the existing data according to the duplicate " +
                " comparator.");
        }

        if (returnOldData != null && currData != null) {
            returnOldData.setData(null);
            LN.setEntry(returnOldData, currData);
        }

        newEmbeddedLN = shouldEmbedLN(newData);

        /* Update the existing LN, if cached, else create new LN. */
        final long oldLNMemSize;
        if (ln != null) {
            oldLNMemSize = ln.getMemorySizeIncludedByParent();
            ln.modify(newData);
        } else {
            oldLNMemSize = 0;
            ln = dbType.createUpdatedLN(envImpl, newData);
        }

        final int oldExpiration = bin.getExpiration(index);
        final boolean oldExpirationInHours = bin.isExpirationInHours();

        if (expInfo != null) {
            expInfo.setOldExpirationTime(
                TTL.expirationToSystemTime(
                    oldExpiration, oldExpirationInHours));
        }

        final int expiration;
        final boolean expirationInHours;

        if (expInfo != null && expInfo.updateExpiration) {
            if (expInfo.expiration != oldExpiration ||
                expInfo.expirationInHours != oldExpirationInHours) {
                expInfo.setExpirationUpdated(true);
            }
            expiration = expInfo.expiration;
            expirationInHours = expInfo.expirationInHours;
        } else {
            expiration = oldExpiration;
            expirationInHours = oldExpirationInHours;
        }

        /*
         * Create a new WriteLockInfo or get an existing one for the LSN
         * of the current slot, and set its abortLSN and abortKD fields,
         * if needed, i.e, if it is not the current txn the one who created
         * this LSN. The abortLSN and abortKD fields of the wli will be
         * included in the new logrec.
         */
        final WriteLockInfo wli = lockStanding.prepareForUpdate(bin, index);
        
        /* Log the new record version and lock its new LSN . */
        logItem = ln.optionalLog(
            envImpl, dbImpl, locker, wli,
            newEmbeddedLN, (key != null ? key : currKey),
            expiration, expirationInHours,
            currEmbeddedLN, currLsn, currLoggedSize,
            false/*isInsertion*/, repContext);

        /* Return a copy of resulting data, if requested. [#16932] */
        if (returnNewData != null) {
            returnNewData.setData(null);
            ln.setEntry(returnNewData);
        }

        /*
         * Update the parent BIN. Update the key, if changed. [#15704]
         */
        bin.updateRecord(
            index, oldLNMemSize, logItem.lsn, ln.getVLSNSequence(),
            logItem.size, key, (newEmbeddedLN ? newData : null),
            expiration, expirationInHours);

        /*
         * If the LN child is not cached, attach it to the tree if the DB
         * is a DW one or if the record is not embedded in the BIN. For
         * DW DBs, we must attach the LN even if the record in embedded,
         * because no logrec was generated above, and as a result, the LN
         * must be in the tree so that a logrec will be generated when
         * a db.sync() occurs later (that logrec is needed for crash
         * recovery, because BINs are not replayed during crash recovery).
         *
         * If the LN child is cached, it is desirable to evict it if the
         * record is embedded because it will never be fetched again.
         * But for DW DBs we should not evict a dirty LN since it will
         * be logged unnecessarily.
         */
        final boolean shouldCache =
            (dbImpl.isDeferredWriteMode() ||
             (!dbImpl.getSortedDuplicates() && !newEmbeddedLN));

        if (bin.getTarget(index) == null) {
            if (shouldCache) {
                bin.attachNode(index, ln, null /*lnKey*/);
            }
        } else {
            if (!shouldCache) {
                bin.evictLN(index);
            }
        }

        /* Cache record version/size for update operation. */
        setCurrentVersion(ln.getVLSNSequence(), logItem.lsn);
        setStorageSize();

        trace(Level.FINER, TRACE_MOD, bin, index, currLsn, logItem.lsn);

        return DbInternal.makeUpdateResult(expiration, expirationInHours);
    }

    /**
     * Position the cursor at the first or last record of the dbImpl.
     * It's okay if this record is defunct.
     *
     * The cursor must initially be uninitialized.
     *
     * Returns with the target BIN latched!
     *
     * @return true if a first or last position is found, false if the
     * tree being searched is empty.
     */
    public boolean positionFirstOrLast(boolean first) {

        assert assertCursorState(
            false /*mustBeInitialized*/, true /*mustNotBeInitialized*/);

        boolean found = false;

        try {
            if (first) {
                bin = dbImpl.getTree().getFirstNode(cacheMode);
            } else {
                bin = dbImpl.getTree().getLastNode(cacheMode);
            }

            if (bin != null) {

                TreeWalkerStatsAccumulator treeStatsAccumulator =
                    getTreeStatsAccumulator();

                if (bin.getNEntries() == 0) {

                    /*
                     * An IN was found. Even if it's empty, let Cursor
                     * handle moving to the first non-defunct entry.
                     */
                    found = true;
                    index = -1;
                } else {
                    index = (first ? 0 : (bin.getNEntries() - 1));

                    if (treeStatsAccumulator != null &&
                        !bin.isEntryKnownDeleted(index) &&
                        !bin.isEntryPendingDeleted(index)) {
                        treeStatsAccumulator.incrementLNCount();
                    }

                    /*
                     * Even if the entry is defunct, just leave our
                     * position here and return.
                     */
                    found = true;
                }
            }

            addCursor(bin);
            setInitialized();

            return found;
        } catch (final Throwable e) {
            /* Release latch on error. */
            releaseBIN();
            throw e;
        }
    }

    /**
     * Position this cursor on the slot whose key is the max key less or equal
     * to the given search key.
     *
     * To be more precise, let K1 be search key. The method positions the
     * cursor on the BIN that should contain K1. If the BIN does contain K1,
     * this.index is set to the containing slot. Otherwise, this.index is set
     * to the right-most slot whose key is < K1, or to -1 if K1 is < all
     * keys in the BIN.
     *
     * The cursor must initially be uninitialized.
     *
     * The method returns with the BIN latched, unless an exception is raised.
     *
     * The method returns an integer that encodes the search outcome: If the
     * FOUND bit is not set, the tree is completely empty (has no BINs). If
     * the FOUND bit is set, the EXACT_KEY bit says whether K1 was found or
     * not and the FOUND_LAST bit says whether the cursor is positioned to the
     * very last slot of the BTree (note that this state can only be counted
     * on as long as the BIN is latched). 
     *
     * Even if the search returns an exact result, the record may be defunct.
     * The caller must therefore check  whether the cursor is positioned on a
     * defunct record.
     *
     * This method does not lock the record. The caller is expected to call
     * lockAndGetCurrent to perform locking.
     */
    public int searchRange(
        DatabaseEntry searchKey,
        Comparator comparator) {

        assert assertCursorState(
            false /*mustBeInitialized*/, true /*mustNotBeInitialized*/);

        boolean foundSomething = false;
        boolean foundExactKey = false;
        boolean foundLast = false;
        BINBoundary binBoundary = new BINBoundary();

        try {
            byte[] key = Key.makeKey(searchKey);

            bin = dbImpl.getTree().search(
                key, Tree.SearchType.NORMAL, binBoundary, cacheMode,
                comparator);

            if (bin != null) {

                foundSomething = true;
                if (bin.isBINDelta() && comparator != null) {

                    /*
                     * We must mutate a BIN delta if a non-null comparator is
                     * used. Otherwise, if we positioned the cursor on the
                     * delta using the non-null comparator, we would not be
                     * able to adjust its position correctly later when the
                     * delta gets mutated for some reason (because at that
                     * later time, the comparator used here would not be
                     * known).
                     */
                    bin.mutateToFullBIN(false /*leaveFreeSlot*/);
                }

                index = bin.findEntry(
                    key, true /*indicateIfExact*/, false/*exact*/, comparator);

                if (bin.isBINDelta() &&
                    (index < 0 ||
                     (index & IN.EXACT_MATCH) == 0 ||
                     binBoundary.isLastBin)) {

                    /*
                     * Note: if binBoundary.isLastBin, we must mutate the BIN
                     * in order to compute the foundLast flag below.
                     */
                    bin.mutateToFullBIN(false /*leaveFreeSlot*/);
                    index = bin.findEntry(key, true, false, comparator);
                }

                if (index >= 0) {
                    if ((index & IN.EXACT_MATCH) != 0) {
                        foundExactKey = true;
                        index &= ~IN.EXACT_MATCH;
                    }

                    foundLast = (binBoundary.isLastBin &&
                                 index == bin.getNEntries() - 1);
                }

                /*
                 * Must call addCursor after mutateToFullBIN() to avoid having
                 * to reposition "this" inside mutateToFullBIN(), which would
                 * be both unnecessary and wrong given that this.index could
                 * have the IN.EXACT_MATCH still on.
                 */
                addCursor(bin);
            }

            setInitialized();

            /* Return a multi-part status value */
            return ((foundSomething ? FOUND : 0) |
                    (foundExactKey ? EXACT_KEY : 0) |
                    (foundLast ? FOUND_LAST : 0));

        } catch (final Throwable e) {
            releaseBIN();
            throw e;
        }
    }

    public boolean searchExact(DatabaseEntry searchKey, LockType lockType) {
        return searchExact(searchKey, lockType, false, false) != null;
    }

    /**
     * Position this cursor on the slot (if any) whose key matches the given
     * search key. If no such slot is found or the slot does not hold a "valid"
     * record, return null. Otherwise, lock the found record with the specified
     * lock type (which may be NONE) and return the LockStanding obj that was
     * created by the locking op. Whether the slot contains a "valid" record or
     * not depends on the slot's KD/PD flags and the lockType and dirtyReadAll
     * parameters. Four cases are considered; they are described in the
     * lockLNAndCheckDefunct() method.
     *
     * The cursor must initially be uninitialized.
     *
     * The method returns with the BIN latched, unless an exception is raised.
     *
     * In all cases, the method registers the cursor with the BIN that contains
     * or should contain the search key.
     *
     * @return the LockStanding for the found record, or null if no record was
     * found.
     */
    public LockStanding searchExact(
        final DatabaseEntry searchKey,
        final LockType lockType,
        final boolean dirtyReadAll,
        final boolean dataRequested) {

        assert assertCursorState(
            false /*mustBeInitialized*/, true /*mustNotBeInitialized*/);

        LockStanding lockStanding = null;

        try {
            byte[] key = Key.makeKey(searchKey);

            bin = dbImpl.getTree().search(key, cacheMode);

            if (bin != null) {

                index = bin.findEntry(key, false, true /*exact*/);

                if (index < 0 && bin.isBINDelta()) {

                    if (bin.mayHaveKeyInFullBin(key)) {
                        bin.mutateToFullBIN(false /*leaveFreeSlot*/);
                        index = bin.findEntry(key, false, true /*exact*/);
                    }
                }

                addCursor(bin);

                if (index >= 0) {
                    lockStanding = lockLNAndCheckDefunct(
                        lockType, dirtyReadAll, dataRequested);
                }
            }

            setInitialized();
            return lockStanding;

        } catch (final Throwable e) {
            /* Release latch on error. */
            releaseBIN();
            throw e;
        }
    }

    /**
     * Lock and copy current record into the key and data DatabaseEntry.
     * When calling this method, this.bin should not be latched already.
     * On return, this.bin is unlatched.
     */
    public OperationResult lockAndGetCurrent(
        DatabaseEntry foundKey,
        DatabaseEntry foundData,
        final LockType lockType) {

        return lockAndGetCurrent(
            foundKey, foundData, lockType, false, false, true);
    }

    /**
     * Let S be the slot where this cursor is currently positioned on. If S
     * does not hold a "valid" record, return null. Otherwise, lock the
     * record in S with the specified lock type(which may be NONE), copy its
     * key and data into the key and data DatabaseEntries, and return SUCCESS.
     * Whether the slot contains a "valid" record or not depends on the slot's
     * KD/PD flags, the lockType and dirtyReadAll parameters, and whether the
     * record has expired. For details see {@link #lockLNAndCheckDefunct}.
     *
     * On entry, the isLatched param says whether this.bin is latched or not.
     * On return, this.bin is unlatched if the unlatch param is true or an
     * exception is thrown.
     *
     * @return OperationResult, or null if the LN has been cleaned and cannot
     * be fetched.
     */
    public OperationResult lockAndGetCurrent(
        DatabaseEntry foundKey,
        DatabaseEntry foundData,
        final LockType lockType,
        final boolean dirtyReadAll,
        final boolean isLatched,
        final boolean unlatch) {

        /* Used in the finally to indicate whether exception was raised. */
        boolean success = false;

        try {
            assert assertCursorState(
                true /*mustBeInitialized*/, false /*mustNotBeInitialized*/);

            assert checkAlreadyLatched(isLatched) : dumpToString(true);

            if (!isLatched) {
                latchBIN();
            }

            assert(bin.getCursorSet().contains(this));

            TreeWalkerStatsAccumulator treeStatsAccumulator =
                getTreeStatsAccumulator();

            /*
             * If we encounter a deleted slot, opportunistically add the BIN
             * to the compressor queue. We do not queue expired slots to avoid
             * frequent compression, especially in the CRUD path; we rely
             * instead on the evictor to perform expired slot compression.
             */
            if (index >= 0 &&
                index < bin.getNEntries() &&
                bin.isDeleted(index)) {
                bin.queueSlotDeletion(index);
            }

            /*
             * Check the KD flag in the BIN slot and make sure this isn't an
             * empty BIN. The BIN could be empty by virtue of the compressor
             * running the size of this BIN to 0 but not having yet removed
             * it from the tree.
             *
             * The index may be negative if we're at an intermediate stage in
             * an higher level operation (e.g., the starting search for a range
             * scan op), and we expect a higher level method to do a next or
             * prev operation after this returns KEYEMPTY. [#11700]
             */
            if (index < 0 ||
                index >= bin.getNEntries() ||
                bin.isEntryKnownDeleted(index)) {
                /* Node is no longer present. */
                if (treeStatsAccumulator != null) {
                    treeStatsAccumulator.incrementDeletedLNCount();
                }

                success = true;
                return null;
            }

            assert TestHookExecute.doHookIfSet(testHook);

            final boolean dataRequested =
                (foundData != null &&
                (!foundData.getPartial() ||
                 foundData.getPartialLength() != 0));

            if (lockLNAndCheckDefunct(
                lockType, dirtyReadAll, dataRequested) == null) {
                if (treeStatsAccumulator != null) {
                    treeStatsAccumulator.incrementDeletedLNCount();
                }
                success = true;
                return null;
            }

            final OperationResult result = getCurrent(foundKey, foundData);

            success = true;
            return result;

        } finally {
            if (unlatch || !success) {
                releaseBIN();
            }
        }
    }

    /**
     * Let S be the slot where this cursor is currently positioned on. The
     * method locks S (i.e. its LSN), and depending on S's KD/PD flags and
     * expired status, it returns either null or the LockStanding obj that was
     * created by the locking op. The following 4 cases are considered. By
     * "defunct" below we mean S is KD/PD or expired.
     *
     * 1. If S is not defunct, return the LockStanding obj. In this case, we
     * know that S holds a valid (non-defunct) record.
     *
     * 2. If S is defunct, and the lock type is not NONE, return null. In this
     * case, we know that the record that used to be in S is definitely defunct.
     *
     * 3. If S is defunct, the lock kind is NONE, and dirtyReadAll is false,
     * return null. This case corresponds to the READ_UNCOMMITTED LockMode.
     * The record in S is defunct, but the deleting txn may be active still,
     * and if it aborts later, the record will be restored. To avoid a
     * potentially blocking lock, in READ_UNCOMMITTED mode we consider the
     * record to be non-existing and return null.
     *
     * 4. If S is defunct, the lock kind is NONE, and dirtyReadAll is true,
     * lock the record in READ mode. This case corresponds to the
     * READ_UNCOMMITTED_ALL LockMode, which requires that we do not skip
     * "provisionally defunct" records. There are two sub-cases:
     *
     *  4a. If dataRequested is true, we wait until the deleting txn finishes.
     *      In this case the READ lock is blocking. If after the lock is
     *      granted S is still defunct, release the lock and return null.
     *      Otherwise, release the lock and return the LockStanding obj.
     *
     *  4b. If dataRequested is false, then we check whether the deleting txn is
     *      still open by requested a non-blocking READ lock. If the lock is
     *      granted then the writing txn is closed or this cursor's locker is
     *      the writer, and we proceed as if the READ lock was granted in 4a.
     *      If the lock is denied then the deleting txn is still open, and we
     *      return the LockStanding obj so that the record is not skipped.
     *
     * The BIN must be latched on entry and is latched on exit.
     *
     * @param dirtyReadAll is true if using LockMode.READ_UNCOMMITTED_ALL.
     *
     * @param dataRequested is true if the read operation should return the
     * record data, meaning that a blocking lock must be used for dirtyReadAll.
     * Is ignored if dirtyReadAll is false. Is always false for a dup DB,
     * since data is never requested for dup DB ops at the CursorImpl level.
     */
    private LockStanding lockLNAndCheckDefunct(
        final LockType lockType,
        final boolean dirtyReadAll,
        final boolean dataRequested) {

        assert !(dirtyReadAll && lockType != LockType.NONE);
        assert !(dataRequested && dbImpl.getSortedDuplicates());

        LockStanding standing = lockLN(lockType);

        if (standing.recordExists()) {
            return standing;
        }

        /* The slot is defunct. */

        if (lockType != LockType.NONE) {
            revertLock(standing);

            /*
             * The record was committed by another locker, or has been
             * performed by this locker.
             */
            return null;
        }

        /* We're using dirty-read.  The lockLN above did not actually lock. */

        if (!dirtyReadAll) {
            /* READ_UNCOMMITTED -- skip defunct records without locking. */
            return null;
        }

        /*
         * READ_UNCOMMITTED_ALL -- get a read lock. Whether we can request a
         * no-wait or a blocking lock depends on the dataRequested parameter.
         *
         * Although there is some redundant processing in the sense that lockLN
         * is called more than once (above and below), this is not considered a
         * performance issue because accessing defunct records is normally
         * infrequent. Deleted slots are normally compressed away quickly.
         */
        standing = lockLN(
            LockType.READ, false /*allowUncontended*/,
            !dataRequested /*noWait*/);

        if (standing.lockResult.getLockGrant() == LockGrantType.DENIED) {

            /*
             * The no-wait lock request was denied, which means the data is not
             * needed and the writing transaction is still open. The defunct
             * record should not be skipped in this case, according to the
             * definition of READ_UNCOMMITTED_ALL.
             */
            assert !standing.recordExists();
            return standing;
        }

        /* We have acquired a temporary read lock. */
        revertLock(standing);

        if (standing.recordExists()) {
            /*
             * Another txn aborted the deletion or expiration time change while
             * we waited.
             */
            return standing;
        }

        /*
         * The write was committed by another locker, or has been performed by
         * this locker.
         */
        return null;
    }

    /**
     * Copy current record into the key and data DatabaseEntry.
     *
     * @return OperationResult, or null if the LN has been cleaned and cannot
     * be fetched.
     */
    public OperationResult getCurrent(
        final DatabaseEntry foundKey,
        final DatabaseEntry foundData) {

        assert(bin.isLatchExclusiveOwner());
        assert(index >= 0 && index < bin.getNEntries());
        assert(!bin.isEntryKnownDeleted(index));

        /*
         * We don't need to fetch the LN if the user has not requested that we
         * return the data, or if we know for sure that the LN is empty.
         */
        final boolean isEmptyLN = dbImpl.isLNImmediatelyObsolete();
        final boolean isEmbeddedLN = bin.isEmbeddedLN(index);

        final boolean dataRequested = 
            (foundData != null &&
             (!foundData.getPartial() || foundData.getPartialLength() != 0));

        final LN ln;
        if (!isEmptyLN && !isEmbeddedLN && dataRequested) {
            ln = bin.fetchLN(index, cacheMode);
            if (ln == null) {
                /* An expired LN was purged. */
                return null;
            }
        } else {
            ln = null;
        }

        /* Return the data. */
        if (dataRequested) {

            byte[] data;

            if (ln != null) {
                data = ln.getData();
            } else if (isEmptyLN || bin.isNoDataLN(index)) {
                data = LogUtils.ZERO_LENGTH_BYTE_ARRAY;
            } else {
                assert(isEmbeddedLN);
                data = bin.getData(index);
            }

            LN.setEntry(foundData, data);
        }

        /* Return the key */
        if (foundKey != null) {
            LN.setEntry(foundKey, bin.getKey(index));
        }

        /* Cache record version/size for fetch operation. */
        final long vlsn = (ln != null ?
                           ln.getVLSNSequence() :
                           bin.getVLSN(index, false /*allowFetch*/, cacheMode));

        setCurrentVersion(vlsn, bin.getLsn(index));
        setStorageSize();

        return DbInternal.makeResult(
            bin.getExpiration(index), bin.isExpirationInHours());
    }

    public LN getCurrentLN(final boolean isLatched, final boolean unlatch) {

        /* Used in the finally to indicate whether exception was raised. */
        boolean success = false;

        try {
            assert assertCursorState(
                true /*mustBeInitialized*/, false /*mustNotBeInitialized*/);
            assert checkAlreadyLatched(isLatched) : dumpToString(true);

            if (!isLatched) {
                latchBIN();
            }

            assert(bin.getCursorSet().contains(this));
            assert(!bin.isEmbeddedLN(index));

            LN ln = bin.fetchLN(index, cacheMode);

            success = true;
            return ln;
        } finally {
            if (unlatch || !success) {
                releaseBIN();
            }
        }
    }

    /**
     * Retrieve the current LN. BIN is unlatched on entry and exit.
     */
    public LN lockAndGetCurrentLN(final LockType lockType) {

        try {
            assert assertCursorState(
                true /*mustBeInitialized*/, false /*mustNotBeInitialized*/);
            assert checkAlreadyLatched(false) : dumpToString(true);

            latchBIN();

            assert(bin.getCursorSet().contains(this));

            LockStanding lockStanding = lockLN(lockType);

            if (!lockStanding.recordExists()) {
                revertLock(lockStanding);
                return null;
            }

            assert(!bin.isEmbeddedLN(index));

            return bin.fetchLN(index, cacheMode);
        } finally {
            releaseBIN();
        }
    }

    /**
     * Returns the VLSN and LSN for the record at the current position.  Must
     * be called when the cursor is positioned on a record.
     *
     * If this method is called on a secondary cursor, the version of the
     * associated primary record is returned.  In that case, the allowFetch
     * parameter is ignored, and the version is available only if the primary
     * record was retrieved (see setPriInfo).
     *
     * @param allowFetch is true to fetch the LN to get the VLSN, or false to
     * return -1 for the VLSN if both the LN and VLSN are not cached.
     *
     * @throws IllegalStateException if the cursor is closed or uninitialized,
     * or this is a secondary cursor and the version is not cached.
     */
    public RecordVersion getCurrentVersion(boolean allowFetch) {

        /* Ensure cursor is open and initialized. */
        checkCursorState(
            true /*mustBeInitialized*/, false /*mustNotBeInitialized*/);

        /*
         * For a secondary cursor, the cached version is all we have.
         * See setPriInfo.
         */
        if (isSecondaryCursor) {
            if (currentRecordVersion == null) {
                throw new IllegalStateException(
                    "Record version is available via a SecondaryCursor only " +
                    "if the associated primary record was retrieved.");
            }
            return currentRecordVersion;
        }

        /*
         * Use cached version if available.  Do not use cached version if it
         * does not contain a VLSN, and VLSNs are preserved, and fetching is
         * allowed; instead, try to fetch it below.
         */
        if (currentRecordVersion != null) {
            if ((currentRecordVersion.getVLSN() !=
                 VLSN.NULL_VLSN_SEQUENCE) ||
                !allowFetch ||
                !dbImpl.getEnv().getPreserveVLSN()) {

                return currentRecordVersion;
            }
        }

        /* Get the VLSN from the BIN, create the version and cache it. */
        latchBIN();
        try {
            setCurrentVersion(
                bin.getVLSN(index, allowFetch, cacheMode), bin.getLsn(index));
        } finally {
            releaseBIN();
        }
        return currentRecordVersion;
    }

    private void setCurrentVersion(long vlsn, long lsn) {
        currentRecordVersion = new RecordVersion(vlsn, lsn);
    }

    /**
     * Returns the estimated disk storage size for the record at the current
     * position. The size includes an estimation of the JE overhead for the
     * record, in addition to the user key/data sizes. But it does not include
     * obsolete overhead related to the record, i.e., space that could
     * potentially be reclaimed by the cleaner.
     *
     * 

This method does not fetch the LN. Must be called when the * cursor is positioned on a record.

* *

When called on a secondary cursor that was used to return the primary * data, the size of the primary record is returned by this method. * Otherwise the size of the record at this cursor position is * returned.

* * @return the estimated storage size, or zero when the size is unknown * because a non-embedded LN is not resident and the LN was logged with a * JE version prior to 6.0. * * @throws IllegalStateException if the cursor is closed or uninitialized. * * @see StorageSize */ public int getStorageSize() { assert assertCursorState( true /*mustBeInitialized*/, false /*mustNotBeInitialized*/); return (priStorageSize > 0) ? priStorageSize : storageSize; } private void setStorageSize() { storageSize = StorageSize.getStorageSize(bin, index); } /** * When the primary record is read during a secondary operation, this * method is called to copy the primary version and storage size here. * This allows the secondary cursor API to return the version and size of * the primary record. Note that a secondary record does not have a version * of its own. * * @param sourceCursor contains the primary info, but may be a primary or * secondary cursor. */ public void setPriInfo(final CursorImpl sourceCursor) { currentRecordVersion = sourceCursor.currentRecordVersion; priStorageSize = sourceCursor.storageSize; } /** * Returns the number of secondary records written by the last put/delete * operation at the current cursor position. * * NOTE: this method does not work (returns 0) if primary deletions are * performed via a secondary (SecondaryDatabase/SecondaryCursor.delete). * * @return number of writes, or zero if a put/delete operation was not * performed. */ public int getNSecondaryWrites() { return nSecWrites; } public void setNSecondaryWrites(final int nWrites) { nSecWrites = nWrites; } /** * Advance a cursor. Used so that verify can advance a cursor even in the * face of an exception [12932]. * @param key on return contains the key if available, or null. * @param data on return contains the data if available, or null. */ public boolean advanceCursor(DatabaseEntry key, DatabaseEntry data) { BIN oldBin = bin; int oldIndex = index; key.setData(null); data.setData(null); try { getNext( key, data, LockType.NONE, false /*dirtyReadAll*/, true /*forward*/, false /*isLatched*/, null /*rangeConstraint*/); } catch (DatabaseException ignored) { /* Klockwork - ok */ } /* * If the position changed, regardless of an exception, then we believe * that we have advanced the cursor. */ if (bin != oldBin || index != oldIndex) { /* * Return the key and data from the BIN entries, if we were not * able to read it above. */ if (key.getData() == null && bin != null && index > 0) { LN.setEntry(key, bin.getKey(index)); } return true; } else { return false; } } /** * Move the cursor forward and return the next "valid" record. Whether a * slot contains a "valid" record or not depends on the slot's KD/PD flags * and the lockType and dirtyReadAll parameters. Four cases are considered; * they are described in the lockLNAndCheckDefunct() method. * * This will cross BIN boundaries. On return, no latches are held. If no * exceptions, the cursor is registered with its new location. * * @param foundKey DatabaseEntry to use for returning key * * @param foundData DatabaseEntry to use for returning data * * @param forward if true, move forward, else move backwards * * @param isLatched if true, the bin that we're on is already * latched. * * @param rangeConstraint if non-null, is called to determine whether a key * is out of range. */ public OperationResult getNext( DatabaseEntry foundKey, DatabaseEntry foundData, LockType lockType, boolean dirtyReadAll, boolean forward, boolean isLatched, RangeConstraint rangeConstraint) { assert assertCursorState( true /*mustBeInitialized*/, false /*mustNotBeInitialized*/); assert checkAlreadyLatched(isLatched) : dumpToString(true); OperationResult result = null; BIN anchorBIN = null; try { while (bin != null) { assert checkAlreadyLatched(isLatched) : dumpToString(true); if (!isLatched) { latchBIN(); isLatched = true; } if (DEBUG) { verifyCursor(bin); } bin.mutateToFullBIN(false /*leaveFreeSlot*/); /* Is there anything left on this BIN? */ if ((forward && ++index < bin.getNEntries()) || (!forward && --index > -1)) { if (rangeConstraint != null && !rangeConstraint.inBounds(bin.getKey(index))) { result = null; releaseBIN(); break; } OperationResult ret = lockAndGetCurrent( foundKey, foundData, lockType, dirtyReadAll, true /*isLatched*/, false /*unlatch*/); if (LatchSupport.TRACK_LATCHES) { LatchSupport.expectBtreeLatchesHeld(1); } if (ret != null) { incrementLNCount(); releaseBIN(); result = ret; break; } } else { /* * Make sure that the current BIN will not be pruned away * if it is or becomes empty after it gets unlatched by * Tree.getNextBin() or Tree.getPrevBin(). The operation * of these Tree methods relies on the current BIN not * getting pruned. */ anchorBIN = bin; anchorBIN.pin(); bin.removeCursor(this); bin = null; final Tree tree = dbImpl.getTree(); /* SR #12736 Try to prune away oldBin */ assert TestHookExecute.doHookIfSet(testHook); if (forward) { bin = tree.getNextBin(anchorBIN, cacheMode); index = -1; } else { bin = tree.getPrevBin(anchorBIN, cacheMode); if (bin != null) { index = bin.getNEntries(); } } isLatched = true; if (bin == null) { if (LatchSupport.TRACK_LATCHES) { LatchSupport.expectBtreeLatchesHeld(0); } result = null; break; } else { if (LatchSupport.TRACK_LATCHES) { LatchSupport.expectBtreeLatchesHeld(1); } addCursor(); anchorBIN.unpin(); anchorBIN = null; } } } } finally { if (anchorBIN != null) { anchorBIN.unpin(); } } if (LatchSupport.TRACK_LATCHES) { LatchSupport.expectBtreeLatchesHeld(0); } return result; } /** * Used to detect phantoms during "get next" operations with serializable * isolation. If this method returns true, the caller should restart the * operation from the prior position. * * Something may have been added to the original cursor (cursorImpl) while * we were getting the next BIN. cursorImpl would have been adjusted * properly but we would have skipped a BIN in the process. This can * happen when all INs are unlatched in Tree.getNextBin. It can also * happen without a split, simply due to inserted entries in the previous * BIN. * * @return true if an unaccounted for insertion happened. * * TODO: * Unfortunately, this method doesn't cover all cases where a phantom may * have been inserted. Another case is described below. * * IN-0 * ---------------------- * | | 50 | 100 | | * ---------------------- * / / \ \ * / \ * /---- ----\ * IN-1 / \ IN-2 * ---------------- ---------------- * | 60 | 70 | 80 | | | | | * ---------------- ---------------- * / | \ / * / | \ / * ---------------- ----------------- * | 81 | 83 | 85 | | 110 | | | * ---------------- ----------------- * BIN-3 BIN-4 * * Initially, the tree looks as above and a cursor (C) is located on the * last slot of BIN-3. For simplicity, assume no duplicates and no * serializable isolation. Also assume that C is a sticky cursor. * * 1. Thread 1 calls C.getNext(), which calls retrieveNextAllowPhantoms(), * which duplicates C's cursorImpl, and calls dup.getNext(). * * dup.getNext() latches BIN-3, sets dup.binToBeRemoved to BIN-3 and * then calls Tree.getNextBin(BIN-3). * * Tree.getNextBin(BIN-3) does the following: * - sets searchKey to 85 * - calls Tree.getParentINForChildIN(BIN-3). * - Tree.getParentINForChildIN(BIN-3) unlatches BIN-3 and searches for * BIN-3 parent, thus reaching IN-1. * - IN-1.findEntry(85) sets "index" to 2, * - "index" is incremented, * - "moreEntriesThisIn" is set to false, * - "next" is set to IN-1, . * - Tree.getParentINForChildIN(IN-1) is called and unlatches IN-1. * * Assume at this point thread 1 looses the cpu. * * 2. Thread 2 inserts keys 90 and 95, causing a split of both BIN-3 and * IN-1. So the tree now looks like this: * * IN-0 * --------------------------- * | | 50 | 80 | 100 | | * --------------------------- * / / | \ \ * / | \ * /--------- | ----------\ * IN-1 / | \ IN-2 * / IN-5 | \ * ----------- ----------- ---------------- * | 60 | 70 | | 80 | 90 | | | | | * ----------- ----------- ---------------- * / | / \ / * / | / \ / * ---------------- ----------- ----------------- * | 81 | 83 | 85 | | 90 | 95 | | 110 | | | * ---------------- ----------- ----------------- * BIN-3 BIN-6 BIN-4 * * * Notice that C.cursorImpl still points to the last slot of BIN-3. * * 3. Thread 1 resumes: * * - Tree.getParentINForChildIN(IN-1) reaches IN-0. * - IN-0.findEntry(85) sets "index" to 2, * - "index" is incremented, * - "nextIN" is set to IN-2, which is latched. * - Tree.searchSubTree(IN-2, LEFT) is called, and returns BIN-4. * - BIN-4 is the result of Tree.getNextBin(BIN-3), i.e., BIN-6 was * skipped * * Now we are back in dup.getNext(): * - dup.bin is set to BIN-4, dup.index to -1, and dup is added to BIN-4 * - the while loop repeats, dup.index is set to 0, the 1st slot of * BIN-4 is locked, and dup.getNext() returns SUCCESS. * * Now we are back in C.retrieveNextAllowPhantoms(): * - C.checkForInsertion() is called * - C.cursorImpl and dup are on different BINs, but the condition: * origBIN.getNEntries() - 1 > origCursor.getIndex() * is false, so C.checkForInsertion() returns false. * * The end result is that BIN-6 has been missed. This is not be a "bug" for * non-serializable isolation, but the above scenario applies to * serializable isolation as well, and in that case, BIN-6 should really * not be missed. This could be solved by re-implementing * Tree.getNext/PrevBIN() do a more "logical" kind of search. */ public boolean checkForInsertion( final GetMode getMode, final CursorImpl dupCursor) { final CursorImpl origCursor = this; boolean forward = getMode.isForward(); boolean ret = false; if (origCursor.bin != dupCursor.bin) { /* * We jumped to the next BIN during getNext(). * * Be sure to operate on the BIN returned by latchBIN, not a cached * var [#21121]. * * Note that a cursor BIN can change after the check above, but * that's not relevant; what we're trying to detect are BIN changes * during the operation that has already completed. * * Note that we can call isDefunct without locking. If we see a * non-committed defunct entry, we'll just iterate around in the * caller. So a false positive is ok. */ origCursor.latchBIN(); final BIN origBIN = origCursor.bin; origBIN.mutateToFullBIN(false /*leaveFreeSlot*/); try { if (forward) { if (origBIN.getNEntries() - 1 > origCursor.getIndex()) { /* * We were adjusted to something other than the * last entry so some insertion happened. */ for (int i = origCursor.getIndex() + 1; i < origBIN.getNEntries(); i++) { if (!origBIN.isDefunct(i)) { /* See comment above about locking. */ ret = true; break; } } } } else { if (origCursor.getIndex() > 0) { /* * We were adjusted to something other than the * first entry so some insertion happened. */ for (int i = 0; i < origCursor.getIndex(); i++) { if (!origBIN.isDefunct(i)) { /* See comment above about locking. */ ret = true; break; } } } } } finally { origCursor.releaseBIN(); } return ret; } return false; } /** * Skips over entries until a boundary condition is satisfied, either * because maxCount is reached or RangeConstraint.inBounds returns false. * * If a maxCount is passed, this allows advancing the cursor quickly by N * entries. If a rangeConstraint is passed, this allows returning the * entry count after advancing until the predicate returns false, e.g., the * number of entries in a key range. In either case, the number of entries * advanced is returned. * * Optimized to scan using level two of the tree when possible, to avoid * calling getNextBin/getPrevBin for every BIN of the database. All BINs * beneath a level two IN can be skipped quickly, with the level two parent * IN latched, when all of its children BINs are resident and can be * latched without waiting. When a child BIN is not resident or latching * waits, we revert to the getNextBin/getPrevBin approach, to avoid keeping * the parent IN latched for long time periods. * * Although this method positions the cursor on the last non-defunct entry * seen (before the boundary condition is satisfied), because it does not * lock the LN it is possible that it is made defunct by another thread * after the BIN is unlatched. * * @param forward is true to skip forward, false to skip backward. * * @param maxCount is the maximum number of non-defunct entries to skip, * and may be LTE zero if no maximum is enforced. * * @param rangeConstraint is a predicate that returns false at a position * where advancement should stop, or null if no predicate is enforced. * * @return the number of non-defunct entries that were skipped. */ public long skip( boolean forward, long maxCount, RangeConstraint rangeConstraint) { final CursorImpl c = cloneCursor(true /*samePosition*/); c.setCacheMode(CacheMode.UNCHANGED); try { return c.skipInternal(forward, maxCount, rangeConstraint, this); } catch (final Throwable e) { /* * Get more info on dbsim duplicate.conf failure when c.close below * throws because the BIN latch is already held. It should have * been released by skipInternal and therefore an unexpected * exception must have been throw and the error handling must be * incorrect. */ e.printStackTrace(System.out); throw e; } finally { c.close(); } } /** * Use this cursor to reference the current BIN in the traversal, to * prevent the current BIN from being compressed away. But set the given * finalPositionCursor (the 'user' cursor) position only at non-defunct * entries, since it should be positioned on a valid entry when this method * returns. */ private long skipInternal( boolean forward, long maxCount, RangeConstraint rangeConstraint, CursorImpl finalPositionCursor) { /* Start with the entry at the cursor position. */ final Tree tree = dbImpl.getTree(); latchBIN(); IN parent = null; BIN prevBin = null; BIN curBin = bin; int curIndex = getIndex(); long count = 0; boolean success = false; try { while (true) { curBin.mutateToFullBIN(false /*leaveFreeSlot*/); /* Skip entries in the current BIN. */ count = skipEntries( forward, maxCount, rangeConstraint, finalPositionCursor, curBin, curIndex, count); if (count < 0) { curBin.releaseLatch(); success = true; return (- count); } /* * Get the parent IN at level two. The BIN is unlatched by * getParentINForChildIN. Before releasing the BIN latch, get * the search key for the last entry. */ final byte[] idKey = (curBin.getNEntries() == 0 ? curBin.getIdentifierKey() : (forward ? curBin.getKey(curBin.getNEntries() - 1) : curBin.getKey(0))); final SearchResult result = tree.getParentINForChildIN( curBin, false, /*useTargetLevel*/ true, /*doFetch*/ CacheMode.DEFAULT); parent = result.parent; if (!result.exactParentFound) { throw EnvironmentFailureException.unexpectedState( "Cannot get parent of BIN id=" + curBin.getNodeId() + " key=" + Arrays.toString(idKey)); } /* * Find and latch previous child BIN by matching idKey rather * than using result.index, as in Tree.getNextIN (see comments * there). */ int parentIndex = parent.findEntry(idKey, false, false); curBin = (BIN) parent.fetchIN(parentIndex, CacheMode.DEFAULT); curBin.latch(); if (forward ? (parentIndex < parent.getNEntries() - 1) : (parentIndex > 0)) { /* * There are more entries in the parent. Skip entries for * child BINs that are resident and can be latched no-wait. */ final int incr = forward ? 1 : (-1); for (parentIndex += incr;; parentIndex += incr) { prevBin = curBin; curBin = null; /* Break is no more entries in parent. */ if ((forward ? parentIndex >= parent.getNEntries() : parentIndex < 0)) { parent.releaseLatch(); break; } /* * Latch next child BIN, if cached and unlatched. * * Note that although 2 BINs are latched here, this * can't cause deadlocks because the 2nd latch is * no-wait. */ curBin = (BIN) parent.getTarget(parentIndex); if (curBin == null || !curBin.latchNoWait(CacheMode.DEFAULT)) { parent.releaseLatch(); break; } /* Unlatch the prev BIN */ prevBin.releaseLatch(); prevBin = null; /* Position at new BIN to prevent compression. */ setPosition(curBin, -1); curBin.mutateToFullBIN(false /*leaveFreeSlot*/); /* Skip entries in new child BIN. */ count = skipEntries( forward, maxCount, rangeConstraint, finalPositionCursor, curBin, forward ? (-1) : curBin.getNEntries(), count); if (count < 0) { parent.releaseLatch(); curBin.releaseLatch(); success = true; return (- count); } } } else { /* No more entries in the parent. */ parent.releaseLatch(); prevBin = curBin; } /* * Only the prevBin is still latched here. Move to the next * BIN the "hard" way (i.e., via full tree searches). */ curBin = forward ? tree.getNextBin(prevBin, CacheMode.DEFAULT) : tree.getPrevBin(prevBin, CacheMode.DEFAULT); assert(!prevBin.isLatchOwner()); if (curBin == null) { success = true; return count; } prevBin = null; curIndex = forward ? (-1) : curBin.getNEntries(); /* Position at new BIN to prevent compression. */ setPosition(curBin, -1); } } finally { if (curBin != null && !success) { curBin.releaseLatchIfOwner(); } if (prevBin != null && !success) { prevBin.releaseLatchIfOwner(); } if (parent != null && !success) { parent.releaseLatchIfOwner(); } if (LatchSupport.TRACK_LATCHES) { LatchSupport.expectBtreeLatchesHeld(0); } } } /** * Skip entries in curBin from one past curIndex and onward. Returns * non-negative count if skipping should continue, or negative count if * bounds is exceeded. */ private long skipEntries( boolean forward, long maxCount, RangeConstraint rangeConstraint, CursorImpl finalPositionCursor, BIN curBin, int curIndex, long count) { assert(!curBin.isBINDelta()); final int incr = forward ? 1 : (-1); for (int i = curIndex + incr;; i += incr) { if (forward ? (i >= curBin.getNEntries()) : (i < 0)) { break; } if (rangeConstraint != null && !rangeConstraint.inBounds(curBin.getKey(i))) { return (- count); } if (!curBin.isDefunct(i)) { count += 1; finalPositionCursor.setPosition(curBin, i); if (maxCount > 0 && count >= maxCount) { return (- count); } } } return count; } /** * Returns the stack of ancestor TrackingInfo for the BIN at the cursor, or * null if a split occurs and the information returned would be * inconsistent. * * Used by CountEstimator. */ public List getAncestorPath() { /* * Search for parent of BIN, get TrackingInfo for ancestors. If the * exact parent is not found, a split occurred and null is returned. */ final List trackingList = new ArrayList<>(); latchBIN(); final BIN origBin = bin; final Tree tree = dbImpl.getTree(); final SearchResult result = tree.getParentINForChildIN( origBin, false, /*useTargetLevel*/ true /*doFetch*/, CacheMode.UNCHANGED, trackingList); if (!result.exactParentFound) { /* Must have been a split. */ return null; } /* * The parent was found and is now latched. If the child BIN does not * match the cursor's BIN, then a split occurred and null is returned. */ final long binLsn; try { if (origBin != result.parent.getTarget(result.index) || origBin != bin) { /* Must have been a split. */ return null; } binLsn = result.parent.getLsn(result.index); bin.latch(); } finally { result.parent.releaseLatch(); } /* * The child BIN is now latched. Subtract defunct entries from BIN's * total entries and adjust the index accordingly. Add TrackingInfo * for child BIN. */ try { int binEntries = bin.getNEntries(); int binIndex = getIndex(); for (int i = bin.getNEntries() - 1; i >= 0; i -= 1) { if (bin.isDefunct(i)) { binEntries -= 1; if (i < binIndex) { binIndex -= 1; } } } final TrackingInfo info = new TrackingInfo( binLsn, bin.getNodeId(), binEntries, binIndex); trackingList.add(info); return trackingList; } finally { bin.releaseLatch(); } } /** * Search for the next key following the given key, and acquire a range * insert lock on it. If there are no more records following the given * key, lock the special EOF node for the dbImpl. */ public void lockNextKeyForInsert(DatabaseEntry key) { DatabaseEntry tempKey = new DatabaseEntry( key.getData(), key.getOffset(), key.getSize()); boolean lockedNextKey = false; boolean latched = true; try { while (true) { int searchResult = searchRange(tempKey, null /*comparator*/); if ((searchResult & FOUND) != 0 && (searchResult & FOUND_LAST) == 0) { /* * The search positioned "this" on the BIN that should * contain K1 and this BIN is now latched. If the BIN does * contain K1, this.index points to K1's slot. Otherwise, * this.index points to the right-most slot whose key is * < K1 (or this.index is -1 if K1 is < than all keys in * the BIN). Furthermore, "this" is NOT positioned on the * very last slot of the BTree. * * Call getNext() to advance "this" to the next *valid* * (i.e., not defunct) slot and lock that slot in * RANGE_INSERT mode. Normally, getNext() will move the * cursor to the 1st slot with a key K2 > K1. However, it * is possible that K2 <= K1 (see the comments in * Cursor.searchRangeAdvanceAndCheckKey() about how this * can happen. We handle this race condition by restarting * the search. */ DatabaseEntry tempData = new DatabaseEntry(); tempData.setPartial(0, 0, true); OperationResult result = getNext( tempKey, tempData, LockType.RANGE_INSERT, false, true, true, null /*rangeConstraint*/); latched = false; if (result != null) { Comparator comparator = dbImpl.getKeyComparator(); int c = Key.compareKeys(tempKey, key, comparator); if (c <= 0) { tempKey.setData( key.getData(), key.getOffset(), key.getSize()); continue; } lockedNextKey = true; } } break; } } finally { if (latched) { releaseBIN(); } } /* Lock the EOF node if no next key was found. */ if (!lockedNextKey) { lockEof(LockType.RANGE_INSERT); } } /* * Locking */ /** * Holds the result of a lockLN operation. A lock may not actually be * held (getLockResult may return null) if an uncontended lock is allowed. */ public static class LockStanding { private long lsn; private boolean defunct; private LockResult lockResult; /** * Returns true if the record is not deleted or expired. */ boolean recordExists() { return !defunct; } /** * Called by update and delete ops, after lockLN() and before logging * the LN and updating the BIN. It returns a WriteLockInfo that is * meant to be passed to the LN logging method, where its info will * be included in the LN log entry and also copied into the new * WriteLockInfo that will be created for the new LSN. * * If the locker is not transactional, or the current LSN has not been * write-locked before by this locker, a new WriteLockInfo is created * here and its abortLsn and abortKD fields are set. (note: even though * lockLN() is called before prepareForUpdate(), it may not actually * acquire a lock because of the uncontended optimization). * * Otherwise, a WriteLockInfo exists already. It may have been created * by the lockLN() call during the current updating op, or a lockLN() * call during an earlier updating op by the same txn. In the later * case, the abortLsn and abortKD have been set already and should not * be overwritten here. */ WriteLockInfo prepareForUpdate(BIN bin, int idx) { DatabaseImpl db = bin.getDatabase(); boolean abortKD = !recordExists(); byte[] abortKey = null; byte[] abortData = null; long abortVLSN = VLSN.NULL_VLSN.getSequence(); int abortExpiration = bin.getExpiration(idx); boolean abortExpirationInHours = bin.isExpirationInHours(); if (bin.isEmbeddedLN(idx)) { abortData = bin.getData(idx); abortVLSN = bin.getVLSN( idx, false/*allowFetch*/, null/*cacheMode*/); if (bin.getDatabase().allowsKeyUpdates()) { abortKey = bin.getKey(idx); } } WriteLockInfo wri = (lockResult == null ? null : lockResult.getWriteLockInfo()); if (wri == null) { wri = new WriteLockInfo(); wri.setAbortLsn(lsn); wri.setAbortKnownDeleted(abortKD); wri.setAbortKey(abortKey); wri.setAbortData(abortData); wri.setAbortVLSN(abortVLSN); wri.setAbortExpiration(abortExpiration, abortExpirationInHours); wri.setDb(db); } else { lockResult.setAbortInfo( lsn, abortKD, abortKey, abortData, abortVLSN, abortExpiration, abortExpirationInHours, db); } return wri; } /** * Creates WriteLockInfo that is appropriate for a newly inserted slot. * The return value is meant to be passed to an LN logging method and * copied into the WriteLockInfo for the new LSN. This method is * static because lockLN is never called prior to logging an LN for a * newly inserted slot. */ static WriteLockInfo prepareForInsert(BIN bin) { WriteLockInfo wri = new WriteLockInfo(); wri.setDb(bin.getDatabase()); return wri; } } /** Does not allow uncontended locks. See lockLN(LockType, boolean). */ public LockStanding lockLN(LockType lockType) throws LockConflictException { return lockLN(lockType, false /*allowUncontended*/, false /*noWait*/); } /** * Locks the LN at the cursor position. Attempts to use a non-blocking * lock to avoid unlatching/relatching. * * Retries if necessary, to handle the case where the LSN is changed while * the BIN is unlatched. Because it re-latches the BIN to check the LSN, * this serializes access to the LSN for locking, guaranteeing that two * lockers cannot obtain conflicting locks on the old and new LSNs. * * Preconditions: The BIN must be latched. * * Postconditions: The BIN is latched. * * LN Locking Rules * ---------------- * The lock ID for an LN is its LSN in the parent BIN slot. Because the * LSN changes when logging the LN, only two methods of locking an LN may * be used to support concurrent access: * * 1. This method may be called to lock the old LSN. For read operations, * that is all that is necessary. For write operations, the new LSN must * be locked after logging it, which is done by all the LN logging methods. * Be sure to pass a non-null locker to the LN logging method to lock the * LN, unless locking is not desired. * * 2. A non-blocking lock may be obtained on the old LSN (using * Locker.nonBlockingLock rather than this method), as long as the lock is * released before the BIN latch is released. In this case a null locker * is passed to the LN logging method; locking the new LSN is unnecessary * because no other thread can access the new LSN until the BIN latch is * released. * * The first method is used for all user operations. The second method is * used by the cleaner, when flushing dirty deferred-write LNs, and by * certain btree operations. * * Uncontended Lock Optimization * ----------------------------- * The allowUncontended param is passed as true for update and delete * operations as an optimization for the case where no lock on the old LSN * is held by any locker. In this case we don't need to lock the old LSN * at all, as long as we log the new LSN before releasing the BIN latch. * * 1. Latch BIN * 2. Determine that no lock/waiter exists for oldLsn * 3. Log LN and get lsn * 4. Lock lsn * 5. Update BIN * 6. Release BIN latch * * The oldLsn is never locked, saving operations on the lock table. The * assumption is that another locker will first have to latch the BIN to * get oldLsn, before requesting a lock. * * A potential problem is that the other locker may release the BIN latch * before requesting the lock. * * This Operation Another Operation * -------------- ----------------- * Latch BIN, get oldLsn, release BIN latch * Step 1 and 2 * Request lock for oldLsn, granted * Step 3 and 4 * * Both operations now believe they have an exclusive lock, but they have * locks on different LSNs. * * However, this problem is handled as long as the other lock is performed * using a lockLN method in this class, which will release the lock and * retry if the LSN changes while acquiring the lock. Because it * re-latches the BIN to check the LSN, this will serialize access to the * LSN for locking, guaranteeing that two conflicting locks cannot be * granted on the old and new LSNs. * * Deferred-Write Locking * ---------------------- * When one of the LN optionalLog methods is called, a deferred-write LN is * dirtied but not actually logged. In order to lock an LN that has been * inserted but not yet assigned a true LSN, a transient LSNs is assigned. * These LSNs serve to lock the LN but never appear in the log. See * LN.assignTransientLsn. * * A deferred-write LN is logged when its parent BIN is logged, or when the * LN is evicted. This will replace transient LSNs with durable LSNs. If * a lock is held by a cursor on a deferred-write LN when it is logged, the * same lock is acquired on the new LSN by the cursor. See * lockAfterLsnChange. * * Cleaner Migration Locking * ------------------------- * The cleaner takes a non-blocking read lock on the old LSN before * migrating/logging the LN, while holding the BIN latch. It does not take * a lock on the new LSN, since it does not need to retain a lock after * releasing the BIN latch. * * Because a read, not write, lock is taken, other read locks may be held * during migration. After logging, the cleaner calls lockAfterLsnChange * to lock the new LSN on behalf of other lockers. * * For more info on migration locking, see HandleLocker. * * Expired Record Locking * ---------------------- * To support repeatable-read semantics when a record expires after being * locked, we must check whether a record was previously locked before * attempting to lock it. If it was previously locked, then it is treated * as not expired, even if its expiration time has passed. * * By was previously "locked" here we mean that any lock type is held, or * shared with its owner, by this cursor's locker. Since a read lock will * prevent modification of the expiration time, any lock type is adequate. * A shared lock is considered adequate to account for the case where * multiple lockers are used internally for a single virtual locker, as * seen by the user. This is the case when using a read-committed locker or * a thread-locker, for example. * * To avoid unnecessary added overhead, we do not check whether a record * was previously locked except when expiration is imminent, which is * defined as expiring within {@link * EnvironmentParams#ENV_TTL_MAX_TXN_TIME}. The ENV_TTL_MAX_TXN_TIME buffer * is used because the expiration time may pass while waiting for a lock. * * Another case to account for is when the expiration time of the record * changes while waiting for the lock. This can happen if the record is * updated or an update is aborted. In this case we can assume that the * was not previously locked, since that would have prevented the update. * * Note that when an uncontended lock applies, the expiration of the record * with the current LSN cannot change. It is possible that the update or * deletion requesting the uncontended lock will be aborted, and the LSN of * an expired record will be reinstated in the BIN, but this does not * create a special case. * * Historical Notes * ---------------- * In JE 4.1 and earlier, each LN had a node ID that was used for locking, * rather than using the LSN. The node ID changed only if a deleted slot * was reused. The node ID was stored in the LN, requiring that the LN be * fetched when locking the LN. With LSN locking a fetch is not needed. * * When LN node IDs were used, deferred-write LNs were not assigned an LSN * until they were actually logged. Deferred-write LNs were initially * assigned a null LSN and transient LSNs were not needed. * * @param lockType the type of lock requested. * * @param allowUncontended is true to return immediately (no lock is taken) * when no locker holds or waits for the lock. * * @param noWait is true to perform a no-wait lock request while keeping * the BIN latched. The caller must check the lock result to see whether * the lock was granted. * * @return all information about the lock; see LockStanding. * * @throws LockConflictException if the lsn is non-null, the lock is * contended, and a lock could not be obtained by blocking. */ private LockStanding lockLN( final LockType lockType, final boolean allowUncontended, final boolean noWait) throws LockConflictException { final EnvironmentImpl envImpl = dbImpl.getEnv(); final LockManager lockManager = envImpl.getTxnManager().getLockManager(); final LockStanding standing = new LockStanding(); standing.lsn = bin.getLsn(index); /* Check for a known-deleted null LSN. */ if (standing.lsn == DbLsn.NULL_LSN) { assert bin.isEntryKnownDeleted(index); standing.defunct = true; return standing; } /* Short circuit when no locking is requested. */ if (lockType == LockType.NONE) { standing.defunct = bin.isDefunct(index); return standing; } /* * We can avoid taking a lock if uncontended. However, we must * call preLogWithoutLock to prevent logging on a replica, and as * good measure to prepare for undo. */ if (allowUncontended && lockManager.isLockUncontended(standing.lsn)) { assert verifyPendingDeleted(lockType); locker.preLogWithoutLock(dbImpl); standing.defunct = bin.isDefunct(index); return standing; } /* * If wasLockedAndExpiresSoon is true, we will treat the record as not * expired. If false, we will check for expiration after locking. */ boolean wasLockedAndExpiresSoon = false; final int prevExpiration = bin.getExpiration(index); final boolean prevExpirationInHours = bin.isExpirationInHours(); if (envImpl.expiresWithin( prevExpiration, prevExpirationInHours, dbImpl.getEnv().getTtlMaxTxnTime())) { if (lockManager.ownsOrSharesLock(locker, standing.lsn)) { wasLockedAndExpiresSoon = true; } } /* * Try a non-blocking lock first, to avoid unlatching. If the default * is no-wait, use the standard lock method so * LockNotAvailableException is thrown; there is no need to try a * non-blocking lock twice. * * Even for dirty-read (LockType.NONE) we must call Locker.lock() since * it checks the locker state and may throw LockPreemptedException. */ if (locker.getDefaultNoWait()) { try { standing.lockResult = locker.lock( standing.lsn, lockType, true /*noWait*/, dbImpl); } catch (LockNotAvailableException e) { releaseBIN(); throw e; } catch (LockConflictException e) { releaseBIN(); throw EnvironmentFailureException.unexpectedException(e); } } else { standing.lockResult = locker.nonBlockingLock( standing.lsn, lockType, false /*jumpAheadOfWaiters*/, dbImpl); } if (standing.lockResult.getLockGrant() != LockGrantType.DENIED) { /* Lock was granted whiled latched, no need to check LSN. */ assert verifyPendingDeleted(lockType); standing.defunct = wasLockedAndExpiresSoon ? bin.isDeleted(index) : bin.isDefunct(index); return standing; } if (noWait) { /* We did not acquire the lock. */ standing.defunct = wasLockedAndExpiresSoon ? bin.isDeleted(index) : bin.isDefunct(index); return standing; } /* * Unlatch, get a blocking lock, latch, and get the current LSN from * the slot. If the LSN changes while unlatched, revert the lock and * repeat. */ while (true) { /* Request a blocking lock. */ releaseBIN(); standing.lockResult = locker.lock( standing.lsn, lockType, false /*noWait*/, dbImpl); latchBIN(); /* Check current LSN after locking. */ final long newLsn = bin.getLsn(index); if (standing.lsn == newLsn) { /* * If the expiration time changes while unlatched, then it * could not have been previously locked. */ if (prevExpiration != bin.getExpiration(index) || prevExpirationInHours != bin.isExpirationInHours()) { wasLockedAndExpiresSoon = false; } standing.defunct = wasLockedAndExpiresSoon ? bin.isDeleted(index) : bin.isDefunct(index); assert verifyPendingDeleted(lockType); return standing; } /* The LSN changed, revert the lock and try again. */ revertLock(standing); standing.lsn = newLsn; /* Check for a known-deleted null LSN. */ if (newLsn == DbLsn.NULL_LSN) { assert bin.isEntryKnownDeleted(index); standing.defunct = true; return standing; } } } /** * After logging a deferred-write LN during eviction/checkpoint or a * migrated LN during cleaning, for every existing lock on the old LSN held * by another locker, we must lock the new LSN on behalf of that locker. * * This is done while holding the BIN latch so that the new LSN does not * change during the locking process. The BIN must be latched on entry and * is left latched by this method. * * We release the lock on the oldLsn to prevent locks from accumulating * over time on a HandleLocker, as the cleaner migrates LNs, because * Database handle locks are legitimately very long-lived. It is important * to first acquire all lsn locks and then release the oldLsn locks. * Releasing an oldLsn lock might allow another locker to acquire it, and * then acquiring another lsn lock may encounter a conflict. [#20617] * * @see com.sleepycat.je.txn.HandleLocker * @see #lockLN */ public static void lockAfterLsnChange( DatabaseImpl dbImpl, long oldLsn, long newLsn, Locker excludeLocker) { final LockManager lockManager = dbImpl.getEnv().getTxnManager().getLockManager(); final Set owners = lockManager.getOwners(oldLsn); if (owners == null) { return; } /* Acquire lsn locks. */ for (LockInfo lockInfo : owners) { final Locker locker = lockInfo.getLocker(); if (locker != excludeLocker) { locker.lockAfterLsnChange(oldLsn, newLsn, dbImpl); } } /* Release oldLsn locks. */ for (LockInfo lockInfo : owners) { final Locker locker = lockInfo.getLocker(); if (locker != excludeLocker && locker.allowReleaseLockAfterLsnChange()) { locker.releaseLock(oldLsn); } } } /** * For debugging. Verify that a BINs cursor set refers to the BIN. */ private void verifyCursor(BIN bin) { if (!bin.getCursorSet().contains(this)) { throw new EnvironmentFailureException( dbImpl.getEnv(), EnvironmentFailureReason.UNEXPECTED_STATE, "BIN cursorSet is inconsistent"); } } /** * Calls checkCursorState and asserts false if an exception is thrown. * Otherwise returns true, so it can be called under an assertion. */ private boolean assertCursorState( boolean mustBeInitialized, boolean mustNotBeInitialized) { try { checkCursorState(mustBeInitialized, mustNotBeInitialized); return true; } catch (RuntimeException e) { assert false : e.toString() + " " + dumpToString(true); return false; // for compiler } } /** * Check that the cursor is open and optionally if it is initialized or * uninitialized. * * @throws IllegalStateException via all Cursor methods that call * Cursor.checkState (all get and put methods, plus more). */ public void checkCursorState( boolean mustBeInitialized, boolean mustNotBeInitialized) { switch (status) { case CURSOR_NOT_INITIALIZED: if (mustBeInitialized) { throw new IllegalStateException("Cursor not initialized."); } break; case CURSOR_INITIALIZED: if (mustNotBeInitialized) { throw EnvironmentFailureException.unexpectedState( "Cursor is initialized."); } if (DEBUG) { if (bin != null) { verifyCursor(bin); } } break; case CURSOR_CLOSED: throw new IllegalStateException("Cursor has been closed."); default: throw EnvironmentFailureException.unexpectedState( "Unknown cursor status: " + status); } } /** * Checks that LN deletedness matches KD/PD flag state, at least when the * LN is resident. Should only be called under an assertion. */ private boolean verifyPendingDeleted(LockType lockType) { /* Cannot verify deletedness if LN is not locked. */ if (lockType == LockType.NONE) { return true; } /* Cannot verify deletedness if cursor is not intialized. */ if (bin == null || index < 0) { return true; } /* Cannot verify deletedness if LN is not resident. */ final LN ln = (LN) bin.getTarget(index); if (ln == null) { return true; } /* * If the LN is deleted then KD or PD must be set. If the LN is not * deleted then PD must not be set, but KD may or may not be set since * it used for various purposes (see IN.java). */ final boolean kd = bin.isEntryKnownDeleted(index); final boolean pd = bin.isEntryPendingDeleted(index); final boolean lnDeleted = ln.isDeleted(); assert ((lnDeleted && (kd || pd)) || (!lnDeleted && !pd)) : "Deleted state mismatch LNDeleted = " + lnDeleted + " PD = " + pd + " KD = " + kd; return true; } public void revertLock(LockStanding standing) { if (standing.lockResult != null) { revertLock(standing.lsn, standing.lockResult); standing.lockResult = null; } } /** * Return this lock to its prior status. If the lock was just obtained, * release it. If it was promoted, demote it. */ private void revertLock(long lsn, LockResult lockResult) { LockGrantType lockStatus = lockResult.getLockGrant(); if ((lockStatus == LockGrantType.NEW) || (lockStatus == LockGrantType.WAIT_NEW)) { locker.releaseLock(lsn); } else if ((lockStatus == LockGrantType.PROMOTION) || (lockStatus == LockGrantType.WAIT_PROMOTION)){ locker.demoteLock(lsn); } } /** * Locks the logical EOF node for the dbImpl. */ public void lockEof(LockType lockType) { locker.lock(dbImpl.getEofLsn(), lockType, false /*noWait*/, dbImpl); } /** * @throws EnvironmentFailureException if the underlying environment is * invalid. */ public void checkEnv() { dbImpl.getEnv().checkIfInvalid(); } /** * Callback object for traverseDbWithCursor. */ public interface WithCursor { /** * Called for each record in the dbImpl. * @return true to continue or false to stop the enumeration. */ boolean withCursor(CursorImpl cursor, DatabaseEntry key, DatabaseEntry data); } /** * Enumerates all records in a dbImpl non-transactionally and calls * the withCursor method for each record. Stops the enumeration if the * callback returns false. * * @param db DatabaseImpl to traverse. * * @param lockType non-null LockType for reading records. * * @param allowEviction should normally be true to evict when performing * multiple operations, but may be false if eviction is disallowed in a * particular context. * * @param withCursor callback object. */ public static void traverseDbWithCursor( DatabaseImpl db, LockType lockType, boolean allowEviction, WithCursor withCursor) { DatabaseEntry key = new DatabaseEntry(); DatabaseEntry data = new DatabaseEntry(); Locker locker = null; CursorImpl cursor = null; try { EnvironmentImpl envImpl = db.getEnv(); locker = LockerFactory.getInternalReadOperationLocker(envImpl); cursor = new CursorImpl(db, locker); cursor.setAllowEviction(allowEviction); if (cursor.positionFirstOrLast(true /*first*/)) { OperationResult result = cursor.lockAndGetCurrent( key, data, lockType, false /*dirtyReadAll*/, true /*isLatched*/, true /*unlatch*/); boolean done = false; while (!done) { /* * lockAndGetCurrent may have returned non-SUCCESS if the * first record is defunct, but we can call getNext below * to move forward. */ if (result != null) { if (!withCursor.withCursor(cursor, key, data)) { done = true; } } if (!done) { result = cursor.getNext( key, data, lockType, false /*dirtyReadAll*/, true /*forward*/, false /*isLatched*/, null /*rangeConstraint*/); if (result == null) { done = true; } } } } } finally { if (cursor != null) { cursor.releaseBIN(); cursor.close(); } if (locker != null) { locker.operationEnd(); } } } /** * Dump the cursor for debugging purposes. Dump the bin that the cursor * refers to if verbose is true. */ public void dump(boolean verbose) { System.out.println(dumpToString(verbose)); } /** * dump the cursor for debugging purposes. */ public void dump() { System.out.println(dumpToString(true)); } /* * dumper */ private String statusToString(byte status) { switch(status) { case CURSOR_NOT_INITIALIZED: return "CURSOR_NOT_INITIALIZED"; case CURSOR_INITIALIZED: return "CURSOR_INITIALIZED"; case CURSOR_CLOSED: return "CURSOR_CLOSED"; default: return "UNKNOWN (" + Byte.toString(status) + ")"; } } /* * dumper */ public String dumpToString(boolean verbose) { StringBuilder sb = new StringBuilder(); sb.append("\n"); if (verbose) { sb.append((bin == null) ? "" : bin.dumpString(2, true)); } sb.append("\n"); return sb.toString(); } /* * For unit tests */ public StatGroup getLockStats() { return locker.collectStats(); } /** * Send trace messages to the java.util.logger. Don't rely on the logger * alone to conditionalize whether we send this message, we don't even want * to construct the message if the level is not enabled. */ private void trace( Level level, String changeType, BIN theBin, int lnIndex, long oldLsn, long newLsn) { EnvironmentImpl envImpl = dbImpl.getEnv(); if (envImpl.getLogger().isLoggable(level)) { StringBuilder sb = new StringBuilder(); sb.append(changeType); sb.append(" bin="); sb.append(theBin.getNodeId()); sb.append(" lnIdx="); sb.append(lnIndex); sb.append(" oldLnLsn="); sb.append(DbLsn.getNoFormatString(oldLsn)); sb.append(" newLnLsn="); sb.append(DbLsn.getNoFormatString(newLsn)); LoggerUtils.logMsg (envImpl.getLogger(), envImpl, level, sb.toString()); } } /** * Send trace messages to the java.util.logger. Don't rely on the logger * alone to conditionalize whether we send this message, we don't even want * to construct the message if the level is not enabled. */ private void traceInsert( Level level, BIN insertingBin, long lnLsn, int index) { EnvironmentImpl envImpl = dbImpl.getEnv(); if (envImpl.getLogger().isLoggable(level)) { StringBuilder sb = new StringBuilder(); sb.append(TRACE_INSERT); sb.append(" bin="); sb.append(insertingBin.getNodeId()); sb.append(" lnLsn="); sb.append(DbLsn.getNoFormatString(lnLsn)); sb.append(" index="); sb.append(index); LoggerUtils.logMsg(envImpl.getLogger(), envImpl, level, sb.toString()); } } /* For unit testing only. */ public void setTestHook(TestHook hook) { testHook = hook; } /* Check that the target bin is latched. For use in assertions. */ private boolean checkAlreadyLatched(boolean isLatched) { if (isLatched) { if (bin != null) { return bin.isLatchExclusiveOwner(); } } return true; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy