All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sleepycat.je.tree.Tree Maven / Gradle / Ivy

The newest version!
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.tree;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import com.sleepycat.je.CacheMode;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.dbi.DatabaseImpl;
import com.sleepycat.je.dbi.EnvironmentImpl;
import com.sleepycat.je.dbi.INList;
import com.sleepycat.je.latch.LatchContext;
import com.sleepycat.je.latch.LatchFactory;
import com.sleepycat.je.latch.LatchSupport;
import com.sleepycat.je.latch.LatchTable;
import com.sleepycat.je.latch.SharedLatch;
import com.sleepycat.je.log.Loggable;
import com.sleepycat.je.utilint.DbLsn;
import com.sleepycat.je.utilint.LoggerUtils;
import com.sleepycat.je.utilint.StatGroup;
import com.sleepycat.je.utilint.TestHook;
import com.sleepycat.je.utilint.TestHookExecute;
import com.sleepycat.je.utilint.VLSN;

/**
 * Tree implements the JE B+Tree.
 *
 * A note on tree search patterns:
 * There's a set of Tree.search* methods. Some clients of the tree use
 * those search methods directly, whereas other clients of the tree
 * tend to use methods built on top of search.
 *
 * The semantics of search* are
 *   they leave you pointing at a BIN or IN
 *   they don't tell you where the reference of interest is.
 * The semantics of the get* methods are:
 *   they leave you pointing at a BIN or IN
 *   they return the index of the slot of interest
 *   they traverse down to whatever level is needed
 *   they are built on top of search* methods.
 * For the future:
 * Over time, we need to clarify which methods are to be used by clients
 * of the tree. Preferably clients that call the tree use get*, although
 * their are cases where they need visibility into the tree structure.
 *
 * Also, search* should return the location of the slot to save us a
 * second binary search.
 *
 * Search Method Call Hierarchy
 * ----------------------------
 * getFirst/LastNode
 *  search
 *  CALLED BY:
 *   CursorImpl.getFirstOrLast
 *
 * getNext/PrevBin
 *  getParentINForChildIN
 *  searchSubTree
 *  CALLED BY:
 *   DupConvert
 *   CursorImpl.getNext
 *
 * getParentINForChildIN
 *  IN.findParent
 *  does not use shared latching
 *  CALLED BY:
 *   Checkpointer.flushIN (doFetch=false, targetLevel=-1)
 *   FileProcessor.processIN (doFetch=true, targetLevel=LEVEL)
 *   Evictor.evictIN (doFetch=true, targetLevel=-1)
 *   RecoveryManager.replaceOrInsertChild (doFetch=true, targetLevel=-1)
 *   getNext/PrevBin (doFetch=true, targetLevel=-1)
 *
 * search
 *  searchSubTree
 *  CALLED BY:
 *   CursorImpl.searchAndPosition
 *   INCompressor to find BIN
 *
 * searchSubTree
 *  uses shared grandparent latching
 *
 * getParentBINForChildLN
 *  searchSplitsAllowed
 *   CALLED BY:
 *    RecoveryManager.redo
 *    RecoveryManager.recoveryUndo
 *  search
 *   CALLED BY:
 *    RecoveryManager.abortUndo
 *    RecoveryManager.rollbackUndo
 *    FileProcessor.processLN
 *    Cleaner.processPendingLN
 *    UtilizationProfile.verifyLsnIsObsolete (utility)
 *
 * findBinForInsert
 *  searchSplitsAllowed
 *  CALLED BY:
 *   CursorImpl.putInternal
 *
 * searchSplitsAllowed
 *  uses shared non-grandparent latching
 *  CALLED BY:
 *   DupConvert (instead of findBinForInsert, which needs a cursor)
 *
 * Possible Shared Latching Improvements
 * -------------------------------------
 * By implementing shared latching for BINs we would get better concurrency in
 * these cases:
 *  Reads when LN is in cache, or LN is not needed (key-only op, e.g., dups)
 */
public final class Tree implements Loggable {

    /* For debug tracing */
    private static final String TRACE_ROOT_SPLIT = "RootSplit:";

    private DatabaseImpl database;

    private int maxTreeEntriesPerNode;

    private ChildReference root;

    /*
     * Latch that must be held when using/accessing the root node.  Protects
     * against the root being changed out from underneath us by splitRoot.
     * After the root IN is latched, the rootLatch can be released.
     */
    private SharedLatch rootLatch;

    /*
     * We don't need the stack trace on this so always throw a static and
     * avoid the cost of Throwable.fillInStack() every time it's thrown.
     * [#13354].
     */
    private static SplitRequiredException splitRequiredException =
        new SplitRequiredException();

    /* Stats */
    private StatGroup stats;

    private final ThreadLocal treeStatsAccumulatorTL =
        new ThreadLocal();

    /* For unit tests */
    private TestHook waitHook; // used for generating race conditions
    private TestHook searchHook; // [#12736]
    private TestHook ckptHook; // [#13897]
    private TestHook getParentINHook;
    private TestHook fetchINHook;

    /**
     * Embodies an enum for the type of search being performed.  NORMAL means
     * do a regular search down the tree.  LEFT/RIGHT means search down the
     * left/right side to find the first/last node in the tree.
     */
    public static class SearchType {
        /* Search types */
        public static final SearchType NORMAL = new SearchType();
        public static final SearchType LEFT   = new SearchType();
        public static final SearchType RIGHT  = new SearchType();

        /* No lock types can be defined outside this class. */
        private SearchType() {
        }
    }

    /*
     * Class that overrides ChildReference methods to enforce rules that apply
     * to the root.
     *
     * Overrides fetchTarget() so that if the rootLatch is not held exclusively
     * when the root is fetched, we upgrade it to exclusive. Also overrides
     * setter methods to assert that an exclusive latch is held.
     *
     * Overrides setDirty to dirty the DatabaseImpl, so that the MapLN will be
     * logged during the next checkpoint. This is critical when updating the
     * root LSN.
     */
     private class RootChildReference extends ChildReference {

        private RootChildReference() {
            super();
        }

        private RootChildReference(Node target, byte[] key, long lsn) {
            super(target, key, lsn);
        }

        /* Caller is responsible for releasing rootLatch. */
        @Override
        public Node fetchTarget(DatabaseImpl database, IN in)
            throws DatabaseException {

            if (getTarget() == null && !rootLatch.isExclusiveOwner()) {

                rootLatch.release();
                rootLatch.acquireExclusive();

                /*
                 * If the root field changed while unlatched then we have an
                 * invalid state and cannot continue. [#21686]
                 */
                if (this != root) {
                    throw EnvironmentFailureException.unexpectedState(
                        database.getEnv(),
                        "Root changed while unlatched, dbId=" +
                        database.getId());
                }
            }

            return super.fetchTarget(database, in);
        }

        @Override
        public void setTarget(Node target) {
            assert rootLatch.isExclusiveOwner();
            super.setTarget(target);
        }

        @Override
        public void clearTarget() {
            assert rootLatch.isExclusiveOwner();
            super.clearTarget();
        }

        @Override
        public void setLsn(long lsn) {
            assert rootLatch.isExclusiveOwner();
            super.setLsn(lsn);
        }

        @Override
        void updateLsnAfterOptionalLog(DatabaseImpl dbImpl, long lsn) {
            assert rootLatch.isExclusiveOwner();
            super.updateLsnAfterOptionalLog(dbImpl, lsn);
        }

        @Override
        void setDirty() {
            super.setDirty();
            database.setDirty();
        }
    }

    /**
     * Create a new tree.
     */
    public Tree(DatabaseImpl database) {
        init(database);
        setDatabase(database);
    }

    /**
     * Create a tree that's being read in from the log.
     */
    public Tree() {
        init(null);
        maxTreeEntriesPerNode = 0;
    }

    /**
     * constructor helper
     */
    private void init(DatabaseImpl database) {
        this.root = null;
        this.database = database;
    }

    /**
     * Set the database for this tree. Used by recovery when recreating an
     * existing tree.
     */
    public void setDatabase(DatabaseImpl database) {
        this.database = database;

        final EnvironmentImpl envImpl = database.getEnv();

        /*
         * The LatchContext for the root is special in that it is considered a
         * Btree latch (the Btree latch table is used), but the context is not
         * implemented by the IN class.
         */
        final LatchContext latchContext = new LatchContext() {
            @Override
            public int getLatchTimeoutMs() {
                return envImpl.getLatchTimeoutMs();
            }
            @Override
            public String getLatchName() {
                return "RootLatch";
            }
            @Override
            public LatchTable getLatchTable() {
                return LatchSupport.btreeLatchTable;
            }
            @Override
            public EnvironmentImpl getEnvImplForFatalException() {
                return envImpl;
            }
        };

        rootLatch = LatchFactory.createSharedLatch(
            latchContext, false /*exclusiveOnly*/);


        maxTreeEntriesPerNode = database.getNodeMaxTreeEntries();
    }

    /**
     * @return the database for this Tree.
     */
    public DatabaseImpl getDatabase() {
        return database;
    }

    /**
     * Called when latching a child and the parent is latched. Used to
     * opportunistically validate the parent pointer.
     */
    private static void latchChild(final IN parent,
                                   final IN child,
                                   final CacheMode cacheMode) {
        child.latch(cacheMode);

        if (child.getParent() != parent) {
            throw EnvironmentFailureException.unexpectedState();
        }
    }

    /**
     * Called when latching a child and the parent is latched. Used to
     * opportunistically validate the parent pointer.
     */
    private static void latchChildShared(final IN parent,
                                         final IN child,
                                         final CacheMode cacheMode) {
        child.latchShared(cacheMode);

        if (child.getParent() != parent) {
            throw EnvironmentFailureException.unexpectedState();
        }
    }

    public void latchRootLatchExclusive()
        throws DatabaseException {

        rootLatch.acquireExclusive();
    }

    public void releaseRootLatch()
        throws DatabaseException {

        rootLatch.release();
    }

    /**
     * Set the root for the tree. Should only be called within the root latch.
     */
    public void setRoot(ChildReference newRoot, boolean notLatched) {
        assert (notLatched || rootLatch.isExclusiveOwner());
        root = newRoot;
    }

    public ChildReference makeRootChildReference(
        Node target,
        byte[] key,
        long lsn) {

        return new RootChildReference(target, key, lsn);
    }

    private ChildReference makeRootChildReference() {
        return new RootChildReference();
    }

    /*
     * A tree doesn't have a root if (a) the root field is null, or (b) the
     * root is non-null, but has neither a valid target nor a valid LSN. Case
     * (b) can happen if the database is or was previously opened in deferred
     * write mode.
     *
     * @return false if there is no real root.
     */
    public boolean rootExists() {
        if (root == null) {
            return false;
        }

        if ((root.getTarget() == null) &&
            (root.getLsn() == DbLsn.NULL_LSN)) {
            return false;
        }

        return true;
    }

    /**
     * Perform a fast check to see if the root IN is resident.  No latching is
     * performed.  To ensure that the root IN is not loaded by another thread,
     * this method should be called while holding a write lock on the MapLN.
     * That will prevent opening the DB in another thread, and potentially
     * loading the root IN. [#13415]
     */
    public boolean isRootResident() {
        return root != null && root.getTarget() != null;
    }

    /**
     * Helper to obtain the root IN with shared root latching.  Optionally
     * updates the generation of the root when latching it.
     */
    public IN getRootIN(CacheMode cacheMode)
        throws DatabaseException {

        return getRootINInternal(cacheMode, false/*exclusive*/);
    }

    /**
     * Helper to obtain the root IN with exclusive root latching.  Optionally
     * updates the generation of the root when latching it.
     */
    public IN getRootINLatchedExclusive(CacheMode cacheMode)
        throws DatabaseException {

        return getRootINInternal(cacheMode, true/*exclusive*/);
    }

    private IN getRootINInternal(CacheMode cacheMode, boolean exclusive)
        throws DatabaseException {

        rootLatch.acquireShared();
        try {
            return getRootINRootAlreadyLatched(cacheMode, exclusive);
        } finally {
            rootLatch.release();
        }
    }

    /**
     * Helper to obtain the root IN, when the root latch is already held.
     */
    public IN getRootINRootAlreadyLatched(
        CacheMode cacheMode,
        boolean exclusive) {

        if (!rootExists()) {
            return null;
        }

        final IN rootIN = (IN) root.fetchTarget(database, null);

        if (exclusive) {
            rootIN.latch(cacheMode);
        } else {
            rootIN.latchShared(cacheMode);
        }
        return rootIN;
    }

    public IN getResidentRootIN(boolean latched)
        throws DatabaseException {

        IN rootIN = null;
        if (rootExists()) {
            rootIN = (IN) root.getTarget();
            if (rootIN != null && latched) {
                rootIN.latchShared(CacheMode.UNCHANGED);
            }
        }
        return rootIN;
    }

    public IN withRootLatchedExclusive(WithRootLatched wrl)
        throws DatabaseException {

        try {
            rootLatch.acquireExclusive();
            return wrl.doWork(root);
        } finally {
            rootLatch.release();
        }
    }

    public IN withRootLatchedShared(WithRootLatched wrl)
        throws DatabaseException {

        try {
            rootLatch.acquireShared();
            return wrl.doWork(root);
        } finally {
            rootLatch.release();
        }
    }

    /**
     * Get LSN of the rootIN. Obtained without latching, should only be
     * accessed while quiescent.
     */
    public long getRootLsn() {
        if (root == null) {
            return DbLsn.NULL_LSN;
        } else {
            return root.getLsn();
        }
    }

    /**
     * Cheaply calculates and returns the maximum possible number of LNs in the
     * btree.
     */
    public long getMaxLNs() {

        final int levels;
        final int topLevelSlots;
        rootLatch.acquireShared();
        try {
            IN rootIN = (IN) root.fetchTarget(database, null);
            levels = rootIN.getLevel() & IN.LEVEL_MASK;
            topLevelSlots = rootIN.getNEntries();
        } finally {
            rootLatch.release();
        }
        return (long) (topLevelSlots *
                       Math.pow(database.getNodeMaxTreeEntries(), levels - 1));
    }

    /**
     * Deletes a BIN specified by key from the tree. If the BIN resides in a
     * subtree that can be pruned away, prune as much as possible, so we
     * don't leave a branch that has no BINs.
     *
     * It's possible that the targeted BIN will now have entries, or will
     * have resident cursors. Either will prevent deletion (see exceptions).
     *
     * Unlike splits, IN deletion does not immediately log the subtree parent
     * or its ancestors. It is sufficient to simply dirty the subtree parent.
     * Logging is not necessary for correctness, and if a checkpoint does not
     * flush the subtree parent then recovery will add the BINs to the
     * compressor queue when redoing the LN deletions.
     *
     * @param idKey - the identifier key of the node to delete.
     *
     * @throws NodeNotEmptyException if the BIN is not empty. The deletion is
     * no longer possible.
     *
     * @throws CursorsExistException is the BIN has cursors. The deletion
     * should be retried later by the INCompressor.
     */
    public void delete(byte[] idKey)
        throws NodeNotEmptyException, CursorsExistException {

        final EnvironmentImpl envImpl = database.getEnv();
        final Logger logger = envImpl.getLogger();

        final List nodeLadder = searchDeletableSubTree(idKey);

        if (nodeLadder == null) {
            /*
             * The tree is empty, so do nothing.  Root compression is no
             * longer supported.  Root compression has no impact on memory
             * usage now that we evict the root IN.  It reduces log space
             * taken by INs for empty (but not removed) databases, yet
             * requires logging a INDelete and MapLN; this provides very
             * little benefit, if any.  Because it requires extensive
             * testing (which has not been done), this minor benefit is not
             * worth the cost.  And by removing it we no longer log
             * INDelete, which reduces complexity going forward. [#17546]
             */
            return;
        }

        /* Detach this subtree. */
        final SplitInfo detachPoint = nodeLadder.get(0);
        try {
            final IN branchParent = detachPoint.parent;
            final IN branchRoot = detachPoint.child;

            if (logger.isLoggable(Level.FINEST)) {
                LoggerUtils.envLogMsg(
                    Level.FINEST, envImpl,
                    "Tree.delete() " +
                    Thread.currentThread().getId() + "-" +
                    Thread.currentThread().getName() + "-" +
                    envImpl.getName() +
                    " Deleting child node: " + branchRoot.getNodeId() +
                    " from parent node: " + branchParent.getNodeId() +
                    " parent has " + branchParent.getNEntries() +
                    " children");
            }

            branchParent.deleteEntry(detachPoint.index);

            /*
             * Remove deleted INs from the INList/cache and count them as
             * provisionally obsolete. The parent is not logged immediately, so
             * we can't count them immediately obsolete. They will be counted
             * obsolete when an ancestor is logged non-provisionally. [#21348]
             */
            final INList inList = database.getEnv().getInMemoryINs();

            for (final SplitInfo info : nodeLadder) {

                final IN child = info.child;

                assert !child.isBINDelta(false);
                assert !(child.isUpperIN() && child.getNEntries() > 1);
                assert !(child.isBIN() && child.getNEntries() > 0);

                /*
                 * Remove child from cache. The branch root was removed by
                 * deleteEntry above.
                 */
                if (child != branchRoot) {
                    inList.remove(child);
                }

                /* Count full and delta versions as obsolete. */
                branchParent.trackProvisionalObsolete(
                    child, child.getLastFullLsn());

                branchParent.trackProvisionalObsolete(
                    child, child.getLastDeltaLsn());
            }

            if (logger.isLoggable(Level.FINE)) {
                LoggerUtils.envLogMsg(
                    Level.FINE, envImpl,
                    "SubtreeRemoval: subtreeRoot = " + branchRoot.getNodeId());
            }

        } finally {
            releaseNodeLadderLatches(nodeLadder);
        }
    }

    /**
     * Search down the tree using a key, but instead of returning the BIN that
     * houses that key, find the point where we can detach a deletable
     * subtree. A deletable subtree is a branch where each IN has one child,
     * and the bottom BIN has no entries and no resident cursors. That point
     * can be found by saving a pointer to the lowest node in the path with
     * more than one entry.
     *
     *              INa
     *             /   \
     *          INb    INc
     *          |       |
     *         INd     ..
     *         / \
     *      INe  ..
     *       |
     *     BINx (suspected of being empty)
     *
     * In this case, we'd like to prune off the subtree headed by INe. INd
     * is the parent of this deletable subtree.
     *
     * The method returns a list of parent/child/index structures. In this
     * example, the list will hold:
     *  INd/INe/index
     *  INe/BINx/index
     * All three nodes will be EX-latched.
     *
     * @return null if the entire Btree is empty, or a list of SplitInfo for
     * the branch to be deleted. If non-null is returned, the INs in the list
     * will be EX-latched; otherwise, no INs will be latched.
     *
     * @throws NodeNotEmptyException if the BIN is not empty.
     *
     * @throws CursorsExistException is the BIN has cursors.
     */
    private List searchDeletableSubTree(byte[] key)
        throws NodeNotEmptyException, CursorsExistException {

        assert (key!= null);

        IN parent = getRootINLatchedExclusive(CacheMode.UNCHANGED);

        if (parent == null) {
            /* Tree was never persisted. */
            return null;
        }

        final ArrayList nodeLadder = new ArrayList<>();

        try {
            IN child;
            IN pinedIN = null;

            do {
                if (parent.getNEntries() == 0) {
                    throw EnvironmentFailureException.unexpectedState(
                        "Found upper IN with 0 entries");
                }

                if (parent.getNEntries() > 1) {
                    /*
                     * A node with more than one entry is the lowest potential
                     * branch parent. Unlatch/discard ancestors of this parent.
                     */
                    for (final SplitInfo info : nodeLadder) {
                        info.parent.releaseLatch();
                    }
                    nodeLadder.clear();
                    pinedIN = null;
                } else if (parent.isPinned()) {
                    pinedIN = parent;
                }

                final int index = parent.findEntry(key, false, false);
                assert index >= 0;

                /* Get the child node that matches. */
                child = parent.fetchIN(index, CacheMode.UNCHANGED);

                latchChild(parent, child, CacheMode.UNCHANGED);

                nodeLadder.add(new SplitInfo(parent, child, index));

                /* Continue down a level */
                parent = child;
            } while (!parent.isBIN());

            if (pinedIN != null) {
                throw CursorsExistException.CURSORS_EXIST;
            }

            /*
             * See if there is a reason we can't delete this BIN -- i.e.
             * new items have been inserted, or a cursor exists on it.
             */
            assert (child.isBIN());
            final BIN bin = (BIN) child;

            if (bin.getNEntries() != 0) {
                throw NodeNotEmptyException.NODE_NOT_EMPTY;
            }

            if (bin.isBINDelta()) {
                throw EnvironmentFailureException.unexpectedState(
                    "Found BIN delta with 0 entries");
            }

            /*
             * This case can happen if we are keeping a cursor on an empty
             * BIN as we traverse.
             */
            if (bin.nCursors() > 0 || child.isPinned()) {
                throw CursorsExistException.CURSORS_EXIST;
            }

            if (nodeLadder.get(0).parent.getNEntries() <= 1) {
                /* The entire tree is empty. */
                releaseNodeLadderLatches(nodeLadder);
                return null;
            }

            return nodeLadder;

        } catch (Throwable e) {
            releaseNodeLadderLatches(nodeLadder);
            /* Release parent in case it was not added to nodeLadder. */
            parent.releaseLatchIfOwner();
            throw e;
        }
    }

    /**
     * Release latched acquired by searchDeletableSubTree. Each child is
     * latched, plus the parent of the first node (the branch parent).
     */
    private void releaseNodeLadderLatches(List nodeLadder)
        throws DatabaseException {

        if (nodeLadder.isEmpty()) {
            return;
        }

        nodeLadder.get(0).parent.releaseLatch();

        for (final SplitInfo info : nodeLadder) {
            info.child.releaseLatch();
        }

        nodeLadder.clear();
    }

    /**
     * Find the leftmost node (IN or BIN) in the tree.
     *
     * @return the leftmost node in the tree, null if the tree is empty.  The
     * returned node is latched and the caller must release it.
     */
    public BIN getFirstNode(CacheMode cacheMode)
        throws DatabaseException {

        BIN bin = search(
            null /*key*/, SearchType.LEFT, null /*binBoundary*/,
            cacheMode, null /*comparator*/);

        if (bin != null) {
            bin.mutateToFullBIN(false /*leaveFreeSlot*/);
        }

        return bin;
    }

    /**
     * Find the rightmost node (IN or BIN) in the tree.
     *
     * @return the rightmost node in the tree, null if the tree is empty.  The
     * returned node is latched and the caller must release it.
     */
    public BIN getLastNode(CacheMode cacheMode)
        throws DatabaseException {

        BIN bin = search(
            null /*key*/, SearchType.RIGHT, null /*binBoundary*/,
            cacheMode, null /*comparator*/);

        if (bin != null) {
            bin.mutateToFullBIN(false /*leaveFreeSlot*/);
        }

        return bin;
    }

    /**
     * Return a reference to the adjacent BIN.
     *
     * @param bin The BIN to find the next BIN for.  This BIN is latched.
     *
     * @return The next BIN, or null if there are no more.  The returned node
     * is latched and the caller must release it.  If null is returned, the
     * argument BIN remains latched.
     */
    public BIN getNextBin(BIN bin, CacheMode cacheMode)
        throws DatabaseException {

        return (BIN) getNextIN(bin, true, false, cacheMode);
    }

    /**
     * Return a reference to the previous BIN.
     *
     * @param bin The BIN to find the next BIN for.  This BIN is latched.
     *
     * @return The previous BIN, or null if there are no more.  The returned
     * node is latched and the caller must release it.  If null is returned,
     * the argument bin remains latched.
     */
    public BIN getPrevBin(BIN bin, CacheMode cacheMode)
        throws DatabaseException {

        return (BIN) getNextIN(bin, false, false, cacheMode);
    }

    /**
     * Returns the next IN in the tree before/after the given IN, and at the
     * same level.  For example, if a BIN is passed in the prevIn parameter,
     * the next BIN will be returned.
     *
     * TODO: A possible problem with this method is that we don't know for
     * certain whether it works properly in the face of splits.  There are
     * comments below indicating it does.  But the Cursor.checkForInsertion
     * method was apparently added because getNextBin/getPrevBin didn't work
     * properly, and may skip a BIN.  So at least it didn't work properly in
     * the distant past.  Archeology and possibly testing are needed to find
     * the truth.  Hopefully it does now work, and Cursor.checkForInsertion can
     * be removed.
     *
     * TODO: To eliminate EX latches on upper INs, a new getParentINForChildIN
     * is needed, which will return with both the parent and the grandparent
     * SH-latched. If we do this, then in Tree.getNextIN() the call to
     * searchSubtree() will be able to do grandparent latching, and the call
     * to parent.fetchIN(index) will also be replace with a local version of
     * grandparent latching. 
     */
    public IN getNextIN(
        IN prevIn,
        boolean forward,
        boolean latchShared,
        CacheMode cacheMode) {

        assert(prevIn.isLatchOwner());

        if (LatchSupport.TRACK_LATCHES) {
            LatchSupport.expectBtreeLatchesHeld(1);
        }

        prevIn.mutateToFullBIN(false /*leaveFreeSlot*/);

        /*
         * Use the right most key (for a forward progressing cursor) or the
         * left most key (for a backward progressing cursor) as the search key.
         * The reason is that the IN may get split while finding the next IN so
         * it's not safe to take the IN's identifierKey entry.  If the IN gets
         * split, then the right (left) most key will still be on the
         * resultant node.  The exception to this is that if there are no
         * entries, we just use the identifier key.
         */
        final byte[] searchKey;

        if (prevIn.getNEntries() == 0) {
            searchKey = prevIn.getIdentifierKey();
        } else if (forward) {
            searchKey = prevIn.getKey(prevIn.getNEntries() - 1);
        } else {
            searchKey = prevIn.getKey(0);
        }

        final int targetLevel = prevIn.getLevel();
        IN curr = prevIn;
        boolean currIsLatched = false;
        IN parent = null;
        IN nextIN = null;
        boolean nextINIsLatched = false;
        boolean normalExit = false;

        /*
         * Ascend the tree until we find a level that still has nodes to the
         * right (or left if !forward) of the path that we're on.  If we reach
         * the root level, we're done.
         */
        try {
            while (true) {

                /*
                 * Move up a level from where we are now and check to see if we
                 * reached the top of the tree.
                 */
                currIsLatched = false;

                if (curr.isRoot()) {
                    /* We've reached the root of the tree. */
                    curr.releaseLatch();

                    if (LatchSupport.TRACK_LATCHES) {
                        LatchSupport.expectBtreeLatchesHeld(0);
                    }
                    normalExit = true;
                    return null;
                }

                final SearchResult result = getParentINForChildIN(
                    curr, false, /*useTargetLevel*/
                    true, /*doFetch*/ cacheMode);

                if (result.exactParentFound) {
                    if (LatchSupport.TRACK_LATCHES) {
                        LatchSupport.expectBtreeLatchesHeld(1);
                    }
                    parent = result.parent;
                } else {
                    throw EnvironmentFailureException.unexpectedState(
                        "Failed to find parent for IN");
                }

                /*
                 * Figure out which entry we are in the parent. Add (subtract)
                 * 1 to move to the next (previous) one and check that we're
                 * still pointing to a valid child.  Don't just use the result
                 * of the parent.findEntry call in getParentNode, because we
                 * want to use our explicitly chosen search key.
                 */
                int index = parent.findEntry(searchKey, false, false);

                final boolean moreEntriesThisIn;

                if (forward) {
                    index++;
                    moreEntriesThisIn = (index < parent.getNEntries());
                } else {
                    moreEntriesThisIn = (index > 0);
                    index--;
                }

                if (moreEntriesThisIn) {

                    /*
                     * There are more entries to the right of the current path
                     * in parent.  Get the entry, and then descend down the
                     * left most path to an IN.
                     */
                    nextIN = parent.fetchIN(index, cacheMode);

                    if (LatchSupport.TRACK_LATCHES) {
                        LatchSupport.expectBtreeLatchesHeld(1);
                    }

                    if (nextIN.getLevel() == targetLevel) {

                        if (latchShared) {
                            latchChildShared(parent, nextIN, cacheMode);
                        } else {
                            latchChild(parent, nextIN, cacheMode);
                        }
                        nextINIsLatched = true;

                        nextIN.mutateToFullBIN(false /*leaveFreeSlot*/);

                        parent.releaseLatch();
                        parent = null; // to avoid falsely unlatching parent

                        final TreeWalkerStatsAccumulator treeStatsAccumulator =
                            getTreeStatsAccumulator();
                        if (treeStatsAccumulator != null) {
                            nextIN.accumulateStats(treeStatsAccumulator);
                        }

                        if (LatchSupport.TRACK_LATCHES) {
                            LatchSupport.expectBtreeLatchesHeld(1);
                        }

                        normalExit = true;
                        return nextIN;

                    } else {

                        /*
                         * We landed at a higher level than the target level.
                         * Descend down to the appropriate level.
                         */
                        assert(nextIN.isUpperIN());
                        nextIN.latch(cacheMode);
                        nextINIsLatched = true;

                        parent.releaseLatch();
                        parent = null; // to avoid falsely unlatching parent
                        nextINIsLatched = false;

                        final IN ret = searchSubTree(
                            nextIN, null, /*key*/
                            (forward ? SearchType.LEFT : SearchType.RIGHT),
                            targetLevel, latchShared, cacheMode,
                            null /*comparator*/);

                        if (LatchSupport.TRACK_LATCHES) {
                            LatchSupport.expectBtreeLatchesHeld(1);
                        }

                        if (ret.getLevel() == targetLevel) {
                            normalExit = true;
                            return ret;
                        } else {
                            throw EnvironmentFailureException.unexpectedState(
                                "subtree did not have a IN at level " +
                                targetLevel);
                        }
                    }
                }

                /* Nothing at this level. Ascend to a higher level. */
                curr = parent;
                currIsLatched = true;
                parent = null; // to avoid falsely unlatching parent below
            }
        } finally {
            if (!normalExit) {
                if (curr != null && currIsLatched) {
                    curr.releaseLatch();
                }

                if (parent != null) {
                    parent.releaseLatch();
                }

                if (nextIN != null && nextINIsLatched) {
                    nextIN.releaseLatch();
                }
            }
        }
    }

    /**
     * Search for the parent P of a given IN C (where C is viewed as a logical
     * node; not as a java obj). If found, P is returned latched exclusively.
     * The method is used when C has been accessed "directly", i.e., not via a
     * tree search, and we need to perform an operation on C that requires an
     * update to its parent. Such situations arise during eviction (C has been
     * accessed via the LRU list), checkpointing, and recovery (C has been read
     * from the log and is not attached to the in-memory tree).
     *
     * The method uses C's identifierKey to search down the tree until:
     *
     * (a) doFetch is false and we need to access a node that is not cached.
     * In this case, we are actually looking for the cached copies of both C
     * and its parent, so a cache miss on the path to C is considered a 
     * failure. This search mode is used by the evictor: to evict C (which has
     * been retrieved from the LRU), its parent must be found and EX-latched;
     * however, if the C has been evicted already by another thread, there is
     * nothing to do (C will GC-ed).
     * or
     * (b) We reach a node whose node id equals the C's node id. In this case,
     * we know for sure that C still belongs to the BTree and its parent has
     * been found.
     * or
     * (c) useTargetLevel is true and we reach a node P that is at one level
     * above C's level. We know that P contains a slot S whose corresponding
     * key range includes C's identifierKey. Since we haven't read the child
     * node under S to check its node id, we cannot know for sure that C is
     * still in the tree. Nevertheless, we consider this situation a success,
     * i.e., P is the parent node we are looking for. In this search mode,
     * after this method returns, the caller is expected to take further
     * action based on the info in slot S. For example, if C was created
     * by reading a log entry at LSN L, and the LSN at slot S is also L, then
     * we know P is the real parent (and we have thus saved a possible extra
     * I/O to refetch the C node from the log to check its node id). This
     * search mode is used by the cleaner.
     * or
     * (d) None of the above conditions occur and the bottom of the BTree is
     * reached. In this case, no parent exists (the child node is an old 
     * version of a node that has been removed from the BTree).
     *
     * @param child The child node for which to find the parent.  This node is
     * latched by the caller and is unlatched by this function before returning
     * to the caller.
     *
     * @param useTargetLevel If true, the search is considered successful if
     * a node P is reached at one level above C's level. P is the parent to
     * return to the caller.
     *
     * @param doFetch if false, stop the search if we run into a non-resident
     * child and assume that no parent exists.
     *
     * @param cacheMode The CacheMode for affecting the hotness of the nodes
     * visited during the search.
     *
     * @return a SearchResult object. If the parent has been found,
     * result.foundExactMatch is true, result.parent refers to that node, and
     * result.index is the slot for the child IN inside the parent IN.
     * Otherwise, result.foundExactMatch is false and  result.parent is null.
     */
    public SearchResult getParentINForChildIN(
        IN child,
        boolean useTargetLevel,
        boolean doFetch,
        CacheMode cacheMode)
        throws DatabaseException {

        return getParentINForChildIN(
            child, useTargetLevel, doFetch,
            cacheMode, null /*trackingList*/);
    }


    /**
     * This version of getParentINForChildIN does the same thing as the version
     * above, but also adds a "trackingList" param. If trackingList is not
     * null, the LSNs of the parents visited along the way are added to the
     * list, as a debug tracing mechanism. This is meant to stay in production,
     * to add information to the log.
     */
    public SearchResult getParentINForChildIN(
        IN child,
        boolean useTargetLevel,
        boolean doFetch,
        CacheMode cacheMode,
        List trackingList)
        throws DatabaseException {

        /* Sanity checks */
        if (child == null) {
            throw EnvironmentFailureException.unexpectedState(
                "getParentINForChildIN given null child node");
        }

        assert child.isLatchOwner();

        /*
         * Get information from child before releasing latch.
         */
        long targetId = child.getNodeId();
        byte[] targetKey = child.getIdentifierKey();
        int targetLevel = (useTargetLevel ? child.getLevel() : -1);
        int exclusiveLevel = child.getLevel() + 1;
        boolean requireExactMatch = true;

        child.releaseLatch();

        return getParentINForChildIN(
            targetId, targetKey, targetLevel,
            exclusiveLevel, requireExactMatch, doFetch,
            cacheMode, trackingList);
    }

    /**
     * This version of getParentINForChildIN() is the actual implementation
     * of the previous 2 versions (read the comments there), but it also
     * implements one additional use cases via the extra "requireExactMatch"
     * param.
     *
     * {@literal requireExactMatch == false && doFetch == false}
     * In this case we are actually looking for the lowest cached ancestor
     * of the C node. The method will always return a node (considered as the
     * "parent") unless the BTree is empty (has no nodes at all). The returned
     * node must be latched, but not necessarily in EX mode. This search mode
     * is used by the checkpointer.
     *
     * The exclusiveLevel param:
     * In general, if exclusiveLevel == L, nodes above L will be SH latched and
     * nodes at or below L will be EX-latched. In all current use cases, L is
     * set to 1 + C.level. Note that if doFetch == false, the normalized
     * exclusiveLevel must be {@literal >=} 2 so that loadIN can be called.
     */
    public SearchResult getParentINForChildIN(
        long targetNodeId,
        byte[] targetKey,
        int targetLevel,
        int exclusiveLevel,
        boolean requireExactMatch,
        boolean doFetch,
        CacheMode cacheMode,
        List trackingList)
        throws DatabaseException {

        /* Call hook before latching. No latches are held. */
        TestHookExecute.doHookIfSet(getParentINHook);

        assert doFetch || (exclusiveLevel & IN.LEVEL_MASK) >= 2;

        /*
         * SearchResult is initialized as follows:
         * exactParentFound = false;
         * parent = null; index = -1; childNotResident = false;
         */
        SearchResult result = new SearchResult();

        /* Get the tree root, SH-latched. */
        IN rootIN = getRootIN(cacheMode);

        if (rootIN == null) {
            return result;
        }

        /* If the root is the target node, there is no parent */
        assert(rootIN.getNodeId() != targetNodeId);
        assert(rootIN.getLevel() >= exclusiveLevel) :
            " rootLevel=" + rootIN.getLevel() +
            " exLevel=" + exclusiveLevel;

        IN parent = rootIN;
        IN child = null;
        boolean success = false;

        try {

            if (rootIN.getLevel() <= exclusiveLevel) {
                rootIN.releaseLatch();
                rootIN = getRootINLatchedExclusive(cacheMode);
                assert(rootIN != null);
                parent = rootIN;
            }

            while (true) {

                assert(parent.getNEntries() > 0);

                result.index = parent.findEntry(targetKey, false, false);

                if (trackingList != null) {
                    trackingList.add(new TrackingInfo(
                        parent.getLsn(result.index), parent.getNodeId(),
                        parent.getNEntries(), result.index));
                }

                assert TestHookExecute.doHookIfSet(searchHook);

                if (targetLevel > 0 && parent.getLevel() == targetLevel + 1) {
                    result.exactParentFound = true;
                    result.parent = parent;
                    break;
                }

                if (doFetch) {
                    child = parent.fetchINWithNoLatch(result, targetKey);

                    if (child == null) {
                        if (trackingList != null) {
                            trackingList.clear();
                        }
                        result.reset();

                        TestHookExecute.doHookIfSet(fetchINHook, child);

                        rootIN = getRootIN(cacheMode);
                        assert(rootIN != null);

                        if (rootIN.getLevel() <= exclusiveLevel) {
                            rootIN.releaseLatch();
                            rootIN = getRootINLatchedExclusive(cacheMode);
                            assert(rootIN != null);
                        }

                        parent = rootIN;
                        continue;
                    }
                } else {

                    /*
                     * We can only call loadIN if we have an EX-latch on the
                     * parent. However, calling loadIN is only necessary when
                     * the parent is at level 2, since UINs are not cached
                     * off-heap, and exclusiveLevel is currently always >= 2.
                     */
                    if (parent.getNormalizedLevel() == 2) {
                        child = parent.loadIN(result.index, cacheMode);
                    } else {
                        child = (IN) parent.getTarget(result.index);
                    }
                }

                assert(child != null || !doFetch);

                if (child == null) {
                    if (requireExactMatch) {
                        parent.releaseLatch();
                    } else {
                        result.parent = parent;
                    }
                    result.childNotResident = true;
                    break;
                }

                if (child.getNodeId() == targetNodeId) {
                    result.exactParentFound = true;
                    result.parent = parent;
                    break;
                }

                if (child.isBIN()) {
                    if (requireExactMatch) {
                        parent.releaseLatch();
                    } else {
                        result.parent = parent;
                    }
                    break;
                }

                /* We can search further down the tree. */
                if (child.getLevel() <= exclusiveLevel) {
                    latchChild(parent, child, cacheMode);
                } else {
                    latchChildShared(parent, child, cacheMode);
                }

                parent.releaseLatch();
                parent = child;
                child = null;
            }

            success = true;
            
        } finally {
            
            if (!success) {
                if (parent.isLatchOwner()) {
                    parent.releaseLatch();
                }

                if (child != null && child.isLatchOwner()) {
                    child.releaseLatch();
                }
            }
        }

        if (result.parent != null) {
            if (LatchSupport.TRACK_LATCHES) {
                LatchSupport.expectBtreeLatchesHeld(1);
            }
            assert((!doFetch && !requireExactMatch) ||
                   result.parent.isLatchOwner());
        }

        return result;
    }

    /**
     * Return a reference to the parent of this LN. This searches through the
     * tree and allows splits, if the splitsAllowed param is true. Set the
     * tree location to the proper BIN parent whether or not the LN child is
     * found. That's because if the LN is not found, recovery or abort will
     * need to place it within the tree, and so we must point at the
     * appropriate position.
     *
     * 

When this method returns with location.bin non-null, the BIN is * latched and must be unlatched by the caller. Note that location.bin may * be non-null even if this method returns false.

* * @param location a holder class to hold state about the location * of our search. Sort of an internal cursor. * * @param key key to navigate through main key * * @param splitsAllowed true if this method is allowed to cause tree splits * as a side effect. In practice, recovery can cause splits, but abort * can't. * * @param blindDeltaOps Normally, if this method lands on a BIN-delta and * the search key is not in that delta, it will mutate the delta to a full * BIN to make sure whether the search key exists in the tree or not. * However, by passing true for blindDeltaOps, the caller indicates that * it doesn't really care whether the key is in the tree or not: it is * going to insert the key in the BIN-delta, if not already there, * essentially overwritting the slot that may exist in the full BIN. So, * if blindDeltaOps is true, the method will not mutate a BIN-delta parent * (unless the BIN-delta has no space for a slot insertion). * * @param cacheMode The CacheMode for affecting the hotness of the tree. * * @return true if node found in tree. * If false is returned and there is the possibility that we can insert * the record into a plausible parent we must also set * - location.bin (may be null if no possible parent found) * - location.lnKey (don't need to set if no possible parent). */ public boolean getParentBINForChildLN( TreeLocation location, byte[] key, boolean splitsAllowed, boolean blindDeltaOps, CacheMode cacheMode) throws DatabaseException { /* * Find the BIN that either points to this LN or could be its * ancestor. */ location.reset(); BIN bin; int index; if (splitsAllowed) { bin = searchSplitsAllowed(key, cacheMode, null /*comparator*/); } else { bin = search(key, cacheMode); } if (bin == null) { return false; } try { while (true) { location.bin = bin; index = bin.findEntry( key, true /*indicateIfExact*/, false /*exactSearch*/); boolean match = (index >= 0 && (index & IN.EXACT_MATCH) != 0); index &= ~IN.EXACT_MATCH; location.index = index; location.lnKey = key; /* if (!match && bin.isBINDelta() && blindDeltaOps) { System.out.println( "Blind op on BIN-delta : " + bin.getNodeId() + " nEntries = " + bin.getNEntries() + " max entries = " + bin.getMaxEntries() + " full BIN entries = " + bin.getFullBinNEntries() + " full BIN max entries = " + bin.getFullBinMaxEntries()); } */ if (match) { location.childLsn = bin.getLsn(index); location.childLoggedSize = bin.getLastLoggedSize(index); location.isKD = bin.isEntryKnownDeleted(index); location.isEmbedded = bin.isEmbeddedLN(index); return true; } else { if (bin.isBINDelta() && (!blindDeltaOps || bin.getNEntries() >= bin.getMaxEntries())) { bin.mutateToFullBIN(splitsAllowed /*leaveFreeSlot*/); location.reset(); continue; } return false; } } } catch (RuntimeException e) { bin.releaseLatch(); location.bin = null; throw e; } } /** * Find the BIN that is relevant to the insert. If the tree doesn't exist * yet, then create the first IN and BIN. On return, the cursor is set to * the BIN that is found or created, and the BIN is latched. */ public BIN findBinForInsert(final byte[] key, final CacheMode cacheMode) { boolean rootLatchIsHeld = false; BIN bin = null; try { long logLsn; /* * We may have to try several times because of a small * timing window, explained below. */ while (true) { rootLatchIsHeld = true; rootLatch.acquireShared(); if (!rootExists()) { rootLatch.release(); rootLatch.acquireExclusive(); if (rootExists()) { rootLatch.release(); rootLatchIsHeld = false; continue; } final EnvironmentImpl env = database.getEnv(); final INList inMemoryINs = env.getInMemoryINs(); /* * This is an empty tree, either because it's brand new * tree or because everything in it was deleted. Create an * IN and a BIN. We could latch the rootIN here, but * there's no reason to since we're just creating the * initial tree and we have the rootLatch held. Remember * that referred-to children must be logged before any * references to their LSNs. */ IN rootIN = new IN(database, key, maxTreeEntriesPerNode, 2); rootIN.setIsRoot(true); rootIN.latch(cacheMode); /* First BIN in the tree, log provisionally right away. */ bin = new BIN(database, key, maxTreeEntriesPerNode, 1); bin.latch(cacheMode); logLsn = bin.optionalLogProvisionalNoCompress(rootIN); /* * Log the root right away. Leave the root dirty, because * the MapLN is not being updated, and we want to avoid * this scenario from [#13897], where the LN has no * possible parent. * provisional BIN * root IN * checkpoint start * LN is logged * checkpoint end * BIN is dirtied, but is not part of checkpoint */ boolean insertOk = rootIN.insertEntry(bin, key, logLsn); assert insertOk; logLsn = rootIN.optionalLog(); rootIN.setDirty(true); /*force re-logging, see [#13897]*/ root = makeRootChildReference(rootIN, new byte[0], logLsn); rootIN.releaseLatch(); /* Add the new nodes to the in memory list. */ inMemoryINs.add(bin); inMemoryINs.add(rootIN); env.getEvictor().addBack(bin); rootLatch.release(); rootLatchIsHeld = false; break; } else { rootLatch.release(); rootLatchIsHeld = false; /* * There's a tree here, so search for where we should * insert. However, note that a window exists after we * release the root latch. We release the latch because the * search method expects to take the latch. After the * release and before search, the INCompressor may come in * and delete the entire tree, so search may return with a * null. */ bin = searchSplitsAllowed(key, cacheMode); if (bin == null) { /* The tree was deleted by the INCompressor. */ continue; } else { /* search() found a BIN where this key belongs. */ break; } } } } finally { if (rootLatchIsHeld) { rootLatch.release(); } } /* testing hook to insert item into log. */ assert TestHookExecute.doHookIfSet(ckptHook); return bin; } /** * Do a key based search, permitting pre-emptive splits. Returns the * target node's parent. */ public BIN searchSplitsAllowed(byte[] key, CacheMode cacheMode) { return searchSplitsAllowed(key, cacheMode, null); } private BIN searchSplitsAllowed( byte[] key, CacheMode cacheMode, Comparator comparator) { BIN insertTarget = null; while (insertTarget == null) { rootLatch.acquireShared(); boolean rootLatched = true; boolean rootINLatched = false; boolean success = false; IN rootIN = null; /* * Latch the rootIN, check if it needs splitting. If so split it * and update the associated MapLN. To update the MapLN, we must * lock it, which implies that all latches must be released prior * to the lock, and as a result, the root may require splitting * again or may be split by another thread. So we must restart * the loop to get the latest root. */ try { if (!rootExists()) { return null; } rootIN = (IN) root.fetchTarget(database, null); if (rootIN.needsSplitting()) { rootLatch.release(); rootLatch.acquireExclusive(); if (!rootExists()) { return null; } rootIN = (IN) root.fetchTarget(database, null); if (rootIN.needsSplitting()) { splitRoot(cacheMode); rootLatch.release(); rootLatched = false; EnvironmentImpl env = database.getEnv(); env.getDbTree().optionalModifyDbRoot(database); continue; } } rootIN.latchShared(cacheMode); rootINLatched = true; success = true; } finally { if (!success && rootINLatched) { rootIN.releaseLatch(); } if (rootLatched) { rootLatch.release(); } } /* * Now, search the tree, doing splits if required. The rootIN * is latched in SH mode, but this.root is not latched. If any * splits are needed, this.root will first be latched exclusively * and will stay latched until all splits are done. */ try { assert(rootINLatched); insertTarget = searchSplitsAllowed( rootIN, key, cacheMode, comparator); if (insertTarget == null) { if (LatchSupport.TRACK_LATCHES) { LatchSupport.expectBtreeLatchesHeld(0); } database.getEnv().incRelatchesRequired(); } } catch (SplitRequiredException e) { /* * The last slot in the root was used at the point when this * thread released the rootIN latch in order to force splits. * Retry. SR [#11147]. */ continue; } } return insertTarget; } /** * Search the tree, permitting preemptive splits. * * When this returns, parent will be unlatched unless parent is the * returned IN. */ private BIN searchSplitsAllowed( IN rootIN, byte[] key, CacheMode cacheMode, Comparator comparator) throws SplitRequiredException { assert(rootIN.isLatchOwner()); if (!rootIN.isRoot()) { throw EnvironmentFailureException.unexpectedState( "A null or non-root IN was given as the parent"); } int index; IN parent = rootIN; IN child = null; boolean success = false; /* * Search downward until we hit a node that needs a split. In that * case, retreat to the top of the tree and force splits downward. */ try { do { if (parent.getNEntries() == 0) { throw EnvironmentFailureException.unexpectedState( "Found upper IN with 0 entries"); } index = parent.findEntry(key, false, false, comparator); assert index >= 0; child = parent.fetchINWithNoLatch(index, key); if (child == null) { return null; // restart the search } /* if child is a BIN, it is actually EX-latched */ latchChildShared(parent, child, cacheMode); /* * If we need to split, try compressing first and check again. * Mutate to a full BIN because compression has no impact on a * BIN-delta, and a full BIN is needed for splitting anyway. */ if (child.needsSplitting()) { child.mutateToFullBIN(false /*leaveFreeSlot*/); database.getEnv().lazyCompress( child, true /*compressDirtySlots*/); if (child.needsSplitting()) { child.releaseLatch(); parent.releaseLatch(); /* SR [#11144]*/ assert TestHookExecute.doHookIfSet(waitHook); /* * forceSplit may throw SplitRequiredException if it * finds that the root needs splitting. Allow the * exception to propagate up to the caller, who will * do the root split. Otherwise, restart the search * from the root IN again. */ rootIN = forceSplit(key, cacheMode); parent = rootIN; assert(rootIN.isLatchOwner()); if (!rootIN.isRoot()) { throw EnvironmentFailureException.unexpectedState( "A null or non-root IN was given as the parent"); } continue; } } /* Continue down a level */ parent.releaseLatch(); parent = child; child = null; } while (!parent.isBIN()); success = true; return (BIN)parent; } finally { if (!success) { if (child != null && child.isLatchOwner()) { child.releaseLatch(); } if (parent != child && parent.isLatchOwner()) { parent.releaseLatch(); } } } } /** * Do pre-emptive splitting: search down the tree until we get to the BIN * level, and split any nodes that fit the splittable requirement except * for the root. If the root needs splitting, a splitRequiredException * is thrown and the root split is handled at a higher level. * * Note that more than one node in the path may be splittable. For example, * a tree might have a level2 IN and a BIN that are both splittable, and * would be encountered by the same insert operation. * * Splits cause INs to be logged in all ancestors, including the root. This * is to avoid the "great aunt" problem described in * RecoveryManager.buildINs. * * INs below the root are logged provisionally; only the root is logged * non-provisionally. Provisional logging is necessary during a checkpoint * for levels less than maxFlushLevel. * * This method acquires and holds this.rootLatch in EX mode during its * whole duration (so splits are serialized). The rootLatch is released * on return. * * @return the tree root node, latched in EX mode. This may be different * than the tree root when this method was called, because no latches are * held on entering this method. * * All latches are released in case of exception. */ private IN forceSplit(byte[] key, CacheMode cacheMode) throws DatabaseException, SplitRequiredException { final ArrayList nodeLadder = new ArrayList(); boolean allLeftSideDescent = true; boolean allRightSideDescent = true; int index; IN parent; IN child = null; IN rootIN = null; /* * Latch the root in order to update the root LSN when we're done. * Latch order must be: root, root IN. We'll leave this method with the * original parent latched. */ rootLatch.acquireExclusive(); boolean success = false; try { /* The root IN may have been evicted. [#16173] */ rootIN = (IN) root.fetchTarget(database, null); parent = rootIN; parent.latch(cacheMode); /* * Another thread may have crept in and * - used the last free slot in the parent, making it impossible * to correctly propagate the split. * - actually split the root, in which case we may be looking at * the wrong subtree for this search. * If so, throw and retry from above. SR [#11144] */ if (rootIN.needsSplitting()) { throw splitRequiredException; } /* * Search downward to the BIN level, saving the information * needed to do a split if necessary. */ do { if (parent.getNEntries() == 0) { throw EnvironmentFailureException.unexpectedState( "Found upper IN with 0 entries"); } /* Look for the entry matching key in the current node. */ index = parent.findEntry(key, false, false); assert index >= 0; if (index != 0) { allLeftSideDescent = false; } if (index != (parent.getNEntries() - 1)) { allRightSideDescent = false; } /* * Get the child node that matches. We only need to work on * nodes in residence. */ child = parent.loadIN(index, cacheMode); if (child == null) { break; } latchChild(parent, child, cacheMode); nodeLadder.add(new SplitInfo(parent, child, index)); /* Continue down a level */ parent = child; } while (!parent.isBIN()); boolean startedSplits = false; /* * Process the accumulated nodes from the bottom up. Split each * node if required. If the node should not split, we check if * there have been any splits on the ladder yet. If there are none, * we merely release the node, since there is no update. If splits * have started, we need to propagate new LSNs upward, so we log * the node and update its parent. */ long lastParentForSplit = Node.NULL_NODE_ID; for (int i = nodeLadder.size() - 1; i >= 0; i -= 1) { final SplitInfo info = nodeLadder.get(i); child = info.child; parent = info.parent; index = info.index; /* Opportunistically split the node if it is full. */ if (child.needsSplitting()) { child.mutateToFullBIN(false /*leaveFreeSlot*/); final IN grandParent = (i > 0) ? nodeLadder.get(i - 1).parent : null; if (allLeftSideDescent || allRightSideDescent) { child.splitSpecial( parent, index, grandParent, maxTreeEntriesPerNode, key, allLeftSideDescent); } else { child.split( parent, index, grandParent, maxTreeEntriesPerNode); } lastParentForSplit = parent.getNodeId(); startedSplits = true; /* * If the DB root IN was logged, update the DB tree's child * reference. Now the MapLN is logically dirty. Be sure to * flush the MapLN if we ever evict the root. */ if (parent.isRoot()) { root.updateLsnAfterOptionalLog( database, parent.getLastLoggedLsn()); } } else { if (startedSplits) { final long newChildLsn; /* * If this child was the parent of a split, it's * already logged by the split call. We just need to * propagate the logging upwards. If this child is just * a link in the chain upwards, log it. */ if (lastParentForSplit == child.getNodeId()) { newChildLsn = child.getLastLoggedLsn(); } else { newChildLsn = child.optionalLogProvisional(parent); } parent.updateEntry( index, newChildLsn, VLSN.NULL_VLSN_SEQUENCE, 0/*lastLoggedSize*/); /* * The root is never a 'child' in nodeLadder so it must * be logged separately. */ if (parent.isRoot()) { final long newRootLsn = parent.optionalLog(); root.updateLsnAfterOptionalLog( database, newRootLsn); } } } child.releaseLatch(); child = null; } success = true; } finally { if (!success) { if (child != null) { child.releaseLatchIfOwner(); } for (SplitInfo info : nodeLadder) { info.child.releaseLatchIfOwner(); } if (rootIN != null) { rootIN.releaseLatchIfOwner(); } } rootLatch.release(); } return rootIN; } /** * Split the root of the tree. */ private void splitRoot(CacheMode cacheMode) throws DatabaseException { /* * Create a new root IN, insert the current root IN into it, and then * call split. */ EnvironmentImpl env = database.getEnv(); INList inMemoryINs = env.getInMemoryINs(); IN curRoot = null; curRoot = (IN) root.fetchTarget(database, null); curRoot.latch(cacheMode); long curRootLsn = 0; long logLsn = 0; IN newRoot = null; try { /* * Make a new root IN, giving it an id key from the previous root. */ byte[] rootIdKey = curRoot.getKey(0); newRoot = new IN(database, rootIdKey, maxTreeEntriesPerNode, curRoot.getLevel() + 1); newRoot.latch(cacheMode); newRoot.setIsRoot(true); curRoot.setIsRoot(false); /* * Make the new root IN point to the old root IN. Log the old root * provisionally, because we modified it so it's not the root * anymore, then log the new root. We are guaranteed to be able to * insert entries, since we just made this root. */ boolean logSuccess = false; try { curRootLsn = curRoot.optionalLogProvisional(newRoot); boolean inserted = newRoot.insertEntry( curRoot, rootIdKey, curRootLsn); assert inserted; logLsn = newRoot.optionalLog(); logSuccess = true; } finally { if (!logSuccess) { /* Something went wrong when we tried to log. */ curRoot.setIsRoot(true); } } inMemoryINs.add(newRoot); /* * Don't add the new root into the LRU because it has a cached * child. */ /* * Make the tree's root reference point to this new node. Now the * MapLN is logically dirty, but the change hasn't been logged. Be * sure to flush the MapLN if we ever evict the root. */ root.setTarget(newRoot); root.updateLsnAfterOptionalLog(database, logLsn); curRoot.split(newRoot, 0, null, maxTreeEntriesPerNode); root.setLsn(newRoot.getLastLoggedLsn()); } finally { /* FindBugs ignore possible null pointer dereference of newRoot. */ newRoot.releaseLatch(); curRoot.releaseLatch(); } database.getEnv().incRootSplits(); traceSplitRoot(Level.FINE, TRACE_ROOT_SPLIT, newRoot, logLsn, curRoot, curRootLsn); } public BIN search(byte[] key, CacheMode cacheMode) { return search(key, SearchType.NORMAL, null, cacheMode, null); } /** * Search the tree, starting at the root. Depending on search type either * (a) search for the BIN that *should* contain a given key, or (b) return * the right-most or left-most BIN in the tree. * * Preemptive splitting is not done during the search. * * @param key - the key to search for, or null if searchType is LEFT or * RIGHT. * * @param searchType - The type of tree search to perform. NORMAL means * we're searching for key in the tree. LEFT/RIGHT means we're descending * down the left or right side, resp. * * @param binBoundary - If non-null, information is returned about whether * the BIN found is the first or last BIN in the database. * * @return - the BIN that matches the criteria, if any. Returns null if * the root is null. BIN is latched (unless it's null) and must be * unlatched by the caller. In a NORMAL search, it is the caller's * responsibility to do the findEntry() call on the key and BIN to locate * the entry (if any) that matches key. */ public BIN search( byte[] key, SearchType searchType, BINBoundary binBoundary, CacheMode cacheMode, Comparator comparator) { IN rootIN = getRootIN(cacheMode); if (rootIN == null) { return null; } assert ((searchType != SearchType.LEFT && searchType != SearchType.RIGHT) || key == null); if (binBoundary != null) { binBoundary.isLastBin = true; binBoundary.isFirstBin = true; } boolean success = false; int index; IN parent = rootIN; IN child = null; TreeWalkerStatsAccumulator treeStatsAccumulator = getTreeStatsAccumulator(); try { if (treeStatsAccumulator != null) { parent.accumulateStats(treeStatsAccumulator); } do { if (parent.getNEntries() == 0) { throw EnvironmentFailureException.unexpectedState( "Upper IN with 0 entries"); } if (searchType == SearchType.NORMAL) { index = parent.findEntry(key, false, false, comparator); } else if (searchType == SearchType.LEFT) { index = 0; } else if (searchType == SearchType.RIGHT) { index = parent.getNEntries() - 1; } else { throw EnvironmentFailureException.unexpectedState( "Invalid value of searchType: " + searchType); } assert(index >= 0); if (binBoundary != null) { if (index != parent.getNEntries() - 1) { binBoundary.isLastBin = false; } if (index != 0) { binBoundary.isFirstBin = false; } } child = parent.fetchINWithNoLatch(index, key); if (child == null) { parent = getRootIN(cacheMode); assert(parent != null); if (treeStatsAccumulator != null) { parent.accumulateStats(treeStatsAccumulator); } continue; } /* Latch the child. Note: BINs are always latched exclusive. */ latchChildShared(parent, child, cacheMode); if (treeStatsAccumulator != null) { child.accumulateStats(treeStatsAccumulator); } parent.releaseLatch(); parent = child; child = null; } while (!parent.isBIN()); success = true; return (BIN)parent; } finally { if (!success) { /* * In [#14903] we encountered a latch exception below and the * original exception was lost. Print the stack trace and * allow the original exception to be thrown if this happens * again, to get more information about the problem. */ try { if (child != null && child.isLatchOwner()) { child.releaseLatch(); } if (parent != child && parent.isLatchOwner()) { parent.releaseLatch(); } } catch (Exception e) { LoggerUtils.traceAndLogException( database.getEnv(), "Tree", "searchSubTreeInternal", "", e); } } } } /* * Search for the given key in the subtree rooted at the given parent IN. * The search descends until the given target level, and the IN that * contains or covers the key is returned latched in EX or SH mode as * specified by the latchShared param. * * The method uses grandparent latching, but only if the parent is the * root of the whole Btree and it is SH-latched on entry. */ private IN searchSubTree( IN parent, byte[] key, SearchType searchType, int targetLevel, boolean latchShared, CacheMode cacheMode, Comparator comparator) { /* * If a an intermediate IN (e.g., from getNextIN) was * originally passed, it was latched exclusively. */ assert(parent != null && (parent.isRoot() || parent.isLatchExclusiveOwner())); if ((searchType == SearchType.LEFT || searchType == SearchType.RIGHT) && key != null) { /* * If caller is asking for a right or left search, they shouldn't * be passing us a key. */ throw EnvironmentFailureException.unexpectedState( "searchSubTree passed key and left/right search"); } assert(parent.isUpperIN()); assert(parent.isLatchOwner()); boolean success = false; int index; IN subtreeRoot = parent; IN child = null; IN grandParent = null; boolean childIsLatched = false; boolean grandParentIsLatched = false; boolean doGrandparentLatching = !parent.isLatchExclusiveOwner(); TreeWalkerStatsAccumulator treeStatsAccumulator = getTreeStatsAccumulator(); try { do { if (treeStatsAccumulator != null) { parent.accumulateStats(treeStatsAccumulator); } assert(parent.getNEntries() > 0); if (searchType == SearchType.NORMAL) { /* Look for the entry matching key in the current node. */ index = parent.findEntry(key, false, false, comparator); } else if (searchType == SearchType.LEFT) { /* Left search, always take the 0th entry. */ index = 0; } else if (searchType == SearchType.RIGHT) { /* Right search, always take the highest entry. */ index = parent.getNEntries() - 1; } else { throw EnvironmentFailureException.unexpectedState( "Invalid value of searchType: " + searchType); } assert(index >= 0); /* * Get the child IN. * * If the child is not cached and we are usimg grandparent * latching, then: * * (a) If "parent" is not the subtree root, is is always * SH-latched at this point. So, to fetch the child, we need to * unlatch the parent and relatch it exclusively. Because we * have the grandparent latch (in either SH or EX mode), the * parent will not be evicted or detached from the tree and the * index of the child within the parent won't change. After * the parent is EX-latched, we can release the grandparent so *. it won't be held while reading the child from the log. * * (b) If "parent" is the BTree root, it may be SH-latched. In * this case, since there is no grandparent, we must unlatch * the parent and relatch it in EX mode under the protection * of the rootLatch; then we restart the do-loop. * * (c) If "parent" is the subtree root, but not the root of * the full Btree, then it must be EX-latched already, and * we can just fetch the child. */ child = (IN) parent.getTarget(index); if (child == null && doGrandparentLatching) { if (parent != subtreeRoot) { assert(!parent.isLatchExclusiveOwner()); parent.releaseLatch(); parent.latch(cacheMode); grandParent.releaseLatch(); grandParentIsLatched = false; grandParent = null; doGrandparentLatching = false; } else if (parent.isRoot() && !parent.isLatchExclusiveOwner()) { parent.releaseLatch(); subtreeRoot = getRootINLatchedExclusive(cacheMode); parent = subtreeRoot; assert(parent != null); assert(grandParent == null); doGrandparentLatching = false; continue; } child = parent.fetchIN(index, cacheMode); } else if (child == null) { child = parent.fetchIN(index, CacheMode.UNCHANGED); } /* After fetching the child we can release the grandparent. */ if (grandParent != null) { grandParent.releaseLatch(); grandParentIsLatched = false; } /* Latch the child. Note: BINs are always latched exclusive. */ if (child.getLevel() == targetLevel) { if (latchShared) { child.latchShared(cacheMode); } else { child.latch(cacheMode); } } else if (doGrandparentLatching) { } else { latchChild(parent, child, cacheMode); } childIsLatched = true; child.mutateToFullBIN(false /*leaveFreeSlot*/); if (treeStatsAccumulator != null) { child.accumulateStats(treeStatsAccumulator); } /* Continue down a level */ if (doGrandparentLatching) { grandParent = parent; grandParentIsLatched = true; } else { parent.releaseLatch(); } parent = child; } while (!parent.isBIN() && parent.getLevel() != targetLevel); success = true; return child; } finally { if (!success) { /* * In [#14903] we encountered a latch exception below and the * original exception was lost. Print the stack trace and * allow the original exception to be thrown if this happens * again, to get more information about the problem. */ try { if (child != null && childIsLatched) { child.releaseLatch(); } if (parent != child) { parent.releaseLatch(); } } catch (Exception e) { LoggerUtils.traceAndLogException( database.getEnv(), "Tree", "searchSubTreeInternal", "", e); } } if (grandParent != null && grandParentIsLatched) { grandParent.releaseLatch(); } } } /** * rebuildINList is used by recovery to add all the resident nodes to the * IN list. */ public void rebuildINList() throws DatabaseException { INList inMemoryList = database.getEnv().getInMemoryINs(); if (root != null) { rootLatch.acquireShared(); try { Node rootIN = root.getTarget(); if (rootIN != null) { rootIN.rebuildINList(inMemoryList); } } finally { rootLatch.release(); } } } /** * Debugging check that all resident nodes are on the INList and no stray * nodes are present in the unused portion of the IN arrays. */ public void validateINList(IN parent) throws DatabaseException { if (parent == null) { parent = (IN) root.getTarget(); } if (parent != null) { INList inList = database.getEnv().getInMemoryINs(); if (!inList.contains(parent)) { throw EnvironmentFailureException.unexpectedState( "IN " + parent.getNodeId() + " missing from INList"); } for (int i = 0;; i += 1) { try { Node node = parent.getTarget(i); if (i >= parent.getNEntries()) { if (node != null) { throw EnvironmentFailureException.unexpectedState( "IN " + parent.getNodeId() + " has stray node " + node + " at index " + i); } byte[] key = parent.getKey(i); if (key != null) { throw EnvironmentFailureException.unexpectedState( "IN " + parent.getNodeId() + " has stray key " + key + " at index " + i); } } if (node instanceof IN) { validateINList((IN) node); } } catch (ArrayIndexOutOfBoundsException e) { break; } } } } /* * Logging support */ /** * @see Loggable#getLogSize */ public int getLogSize() { int size = 1; // rootExists if (root != null) { size += root.getLogSize(); } return size; } /** * @see Loggable#writeToLog */ public void writeToLog(ByteBuffer logBuffer) { byte booleans = (byte) ((root != null) ? 1 : 0); logBuffer.put(booleans); if (root != null) { root.writeToLog(logBuffer); } } /** * @see Loggable#readFromLog */ public void readFromLog(ByteBuffer itemBuffer, int entryVersion) { boolean rootExists = false; byte booleans = itemBuffer.get(); rootExists = (booleans & 1) != 0; if (rootExists) { root = makeRootChildReference(); root.readFromLog(itemBuffer, entryVersion); } } /** * @see Loggable#dumpLog */ public void dumpLog(StringBuilder sb, boolean verbose) { sb.append(""); if (root != null) { root.dumpLog(sb, verbose); } sb.append(""); } /** * @see Loggable#getTransactionId */ public long getTransactionId() { return 0; } /** * @see Loggable#logicalEquals * Always return false, this item should never be compared. */ public boolean logicalEquals(Loggable other) { return false; } private TreeWalkerStatsAccumulator getTreeStatsAccumulator() { if (EnvironmentImpl.getThreadLocalReferenceCount() > 0) { return treeStatsAccumulatorTL.get(); } else { return null; } } public void setTreeStatsAccumulator(TreeWalkerStatsAccumulator tSA) { treeStatsAccumulatorTL.set(tSA); } /* * Debugging stuff. */ public void dump() { System.out.println(dumpString(0)); } public String dumpString(int nSpaces) { StringBuilder sb = new StringBuilder(); sb.append(TreeUtils.indent(nSpaces)); sb.append(""); sb.append('\n'); if (root != null) { sb.append(DbLsn.dumpString(root.getLsn(), nSpaces)); sb.append('\n'); IN rootIN = (IN) root.getTarget(); if (rootIN == null) { sb.append(""); } else { sb.append(rootIN.toString()); } sb.append('\n'); } sb.append(TreeUtils.indent(nSpaces)); sb.append(""); return sb.toString(); } /** * Unit test support to validate subtree pruning. Didn't want to make root * access public. */ boolean validateDelete(int index) throws DatabaseException { rootLatch.acquireShared(); try { IN rootIN = (IN) root.fetchTarget(database, null); rootIN.latch(); try { return rootIN.validateSubtreeBeforeDelete(index); } finally { rootIN.releaseLatch(); } } finally { rootLatch.release(); } } /* For unit testing only. */ public void setWaitHook(TestHook hook) { waitHook = hook; } /* For unit testing only. */ public void setSearchHook(TestHook hook) { searchHook = hook; } /* For unit testing only. */ public void setCkptHook(TestHook hook) { ckptHook = hook; } /* For unit testing only. */ public void setGetParentINHook(TestHook hook) { getParentINHook = hook; } public void setFetchINHook(TestHook hook) { fetchINHook = hook; } /** * Send trace messages to the java.util.logger. Don't rely on the logger * alone to conditionalize whether we send this message, we don't even want * to construct the message if the level is not enabled. */ private void traceSplitRoot(Level level, String splitType, IN newRoot, long newRootLsn, IN oldRoot, long oldRootLsn) { Logger logger = database.getEnv().getLogger(); if (logger.isLoggable(level)) { StringBuilder sb = new StringBuilder(); sb.append(splitType); sb.append(" newRoot=").append(newRoot.getNodeId()); sb.append(" newRootLsn="). append(DbLsn.getNoFormatString(newRootLsn)); sb.append(" oldRoot=").append(oldRoot.getNodeId()); sb.append(" oldRootLsn="). append(DbLsn.getNoFormatString(oldRootLsn)); LoggerUtils.logMsg( logger, database.getEnv(), level, sb.toString()); } } private static class SplitInfo { IN parent; IN child; int index; SplitInfo(IN parent, IN child, int index) { this.parent = parent; this.child = child; this.index = index; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy