com.swirlds.virtualmap.internal.ConcurrentNodeStatusTracker Maven / Gradle / Ivy

Go to download
/*
 * Copyright (C) 2021-2024 Hedera Hashgraph, LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.swirlds.virtualmap.internal;

import java.util.BitSet;
import java.util.concurrent.ConcurrentHashMap;

/**
 * 
 * During reconnect, we have a thread that is sending nodes, and another thread
 * receiving information about the nodes the learner has. We need to keep track
 * of the nodes the learner has notified it knows about, so the sender thread
 * can skip those nodes. Notice that when one node is known by the learner, their
 * descendants are also known. At the moment the teacher receives the notification
 * that the learner has a node, the teacher might have already sent that node,
 * but still any descendant of that node is already known by the learner, and we
 * shouldn't send it or try to send the least possible amount of known nodes.
 * 
 * This class keeps track of the status of each node the learner has notified us
 * about. The status is classified as:
 * 
 *     UNKNOWN: We don't know if the learner knows or doesn't
 *     know about about a node
 *     
 *     KNOWN: The learner has notified us that it knows
 *     about the node, as well as all of its descendants
 *     
 *     NOT_KNOWN: The learner has notified us that it
 *     doesn't know about the node, but we can't make any assumption about
 *     the descendants
 *     
 * 
 *
 * This class is thread safe supporting multiple concurrent readers but only
 * one concurrent writer.
 */
public final class ConcurrentNodeStatusTracker {

    /**
     * Node status
     */
    public enum Status {
        UNKNOWN,
        KNOWN,
        NOT_KNOWN
    }

    /**
     * Maximum number of elements per BitSet.
     * Currently, at 2^30 to improve performance on divisions
     */
    private static final int LIMIT = 0x40000000;

    /**
     * To speed up division, instead of doing {@code n / LIMIT},
     * we can compute it as {@code n >> lg(LIMIT)}, i.e., as
     * {@code n >> RIGHT_SHIFTS_FOR_LIMIT_AS_DIVISOR}.
     */
    private static final int RIGHT_SHIFTS_FOR_LIMIT_AS_DIVISOR = 30;

    /**
     * Value to compute {@code n % LIMIT} with the & operator
     * to improve performance. Instead of {@code n % LIMIT},
     * we should use {@code n & DIVISOR}.
     */
    private static final int DIVISOR = LIMIT - 1;

    /**
     * Map to keep track of the status of the bits for which we know their status.
     * Given that each {@link BitSetGroup} supports up to {@value LIMIT} elements,
     * for each value, we compute the quotient {@code value / }{@value LIMIT}, which
     * provides the {@code bitsetIndex}, which is used as a key for this map, and
     * then the remainder {@code value % }{@value LIMIT} gives us the index at each
     * {@link BitSetGroup}.
     */
    private final ConcurrentHashMap statusBitSets;

    /**
     * Upper bound of the values to track
     */
    private final long capacity;

    /**
     * Creates a new {@link ConcurrentNodeStatusTracker} with a specified capacity.
     * The capacity is important because when a node is set as KNOWN
     * we iterate through all of its descendants, and we need to know when to stop.
     * The capacity specifies the maximum number of nodes in the tree.
     *
     * @param capacity
     * 		maximum number of nodes to track
     */
    public ConcurrentNodeStatusTracker(final long capacity) {
        this.capacity = capacity;
        this.statusBitSets = new ConcurrentHashMap<>();
        // we set the root as NOT_KNOW, otherwise there wouldn't be a
        // need to reconnect a VirtualMap
        this.setNodeStatus(0, 0, Status.NOT_KNOWN);
    }

    /**
     * For a node (specified as a long), we specified its status based
     * on the information provided by the learner. By default, the status
     * of each known is UNKNOWN, and setting a value with
     * a status to UNKNOWN wil throw an {@link IllegalArgumentException}
     * because it could be due to a bug that might try to set the status
     * of a node that already has a status set. Even though the current
     * status might be UNKNOWN setting it again to be
     * UNKNOWN would just unnecessary lock on the internal
     * {@link BitSet} used to track the values, causing an increase of
     * unnecessary contention.
     *
     * @param value
     * 		path of the node
     * @param status
     * 		the status the learner has reported
     * @throws IllegalArgumentException
     * 		if value is zero or less, or equal or greater than the capacity,
     * 		or if the status is UNKNOWN
     */
    public void set(final long value, final Status status) {
        if (value <= 0 || value >= capacity) {
            throw new IllegalArgumentException(
                    String.format("Value can only be between [0, %d), %d is illegal", capacity, value));
        }

        if (status == Status.UNKNOWN) {
            throw new IllegalArgumentException("Status can only be set to KNOWN or NOT_KNOWN");
        }

        final int index = getIndexInBitSetFor(value);
        final long bitSetIndex = getBitSetIndexFor(value);
        setNodeStatus(bitSetIndex, index, status);
    }

    /**
     * 
     * Gets the status of a node. If the current status of a node
     * is UNKNOWN, then we check the status of
     * its ascendants, until we find KNOWN or
     * UNKNOWN.
     * 
     * 
     * Notice that one ascendant could have a status of
     * NOT_KNOWN, and we would send
     * that node to the learner. Later on, we receive the notification
     * that the learner knows the node, so we sent that node
     * unnecessarily, but at the point of making the decision of
     * sending the node, UNKNOWN or NOT_KNOWN
     * yield the same result, i.e., we send the node anyway.
     * 
     * 
     * If one ascendant is KNOWN, then we return
     * KNOWN
     * 
     * 
     * In an UNKNOWN scenario the algorithmic complexity of
     * this method is {@code O(lg n)} where {@code n} is the {@code value}
     * provided. Otherwise, the method is in {@code O(1)}.
     * 
     * 
     * Currently, for each check of the status of a node, either direct
     * or its descendants, executes an atomic operation that blocks the
     * thread setting status.
     * 
     *
     * @param value
     * 		path of node to check
     * @return status of a node
     */
    public Status getStatus(long value) {
        if (value < 0 || value >= capacity) {
            throw new IllegalArgumentException(
                    String.format("Value can only be between [0, %d), %d is illegal", capacity, value));
        }

        Status status;
        do {
            final int index = getIndexInBitSetFor(value);
            final long bitSetIndex = getBitSetIndexFor(value);
            status = statusBitSets
                    .computeIfAbsent(bitSetIndex, k -> new BitSetGroup())
                    .getStatus(index);
            value = Path.getParentPath(value);
        } while (status == Status.UNKNOWN);

        return status;
    }

    /**
     * Get the status of a node as reported by the learner, or return UNKNOWN.
     * 
     * Unlike the getStatus(long value) method above, this method returns the actual
     * status of the requested node without traversing the tree to its parents.
     * If the learner hasn't reported a status for this particular node, this method
     * returns UNKNOWN.
     *
     * @param value path of node to check
     * @return status of the node, or UNKNOWN if its status has never been reported yet
     */
    public Status getReportedStatus(long value) {
        if (value < 0 || value >= capacity) {
            throw new IllegalArgumentException(
                    String.format("Value can only be between [0, %d), %d is illegal", capacity, value));
        }

        final int index = getIndexInBitSetFor(value);
        final long bitSetIndex = getBitSetIndexFor(value);
        return statusBitSets
                .computeIfAbsent(bitSetIndex, k -> new BitSetGroup())
                .getStatus(index);
    }

    /**
     * Atomically sets the status of a node represented by its value (path).
     * Currently, for the immediate use case, we are blocking for each
     * read and write, given that at much, we are using 2*{@value Integer#MAX_VALUE},
     * we only need 3 {@link BitSetGroup}s, and for much of the updates we
     * are going to be blocking.
     *
     * @param bitsetIndex
     * 		index of the {@link BitSetGroup}
     * @param valueIndex
     * 		index of the value inside the {@link BitSetGroup}
     * @param status
     * 		status to set
     */
    private void setNodeStatus(final long bitsetIndex, final int valueIndex, final Status status) {
        statusBitSets.compute(bitsetIndex, (k, bitsetGroup) -> {
            if (bitsetGroup == null) {
                bitsetGroup = new BitSetGroup();
            }

            bitsetGroup.setStatus(valueIndex, status);
            return bitsetGroup;
        });
    }

    /**
     * Computes the index of the BitSet this value
     * should be set in
     *
     * @param value
     * 		Value to set/retrieve
     * @return index of the BitSet this value should be set in
     */
    private static long getBitSetIndexFor(final long value) {
        return value >> RIGHT_SHIFTS_FOR_LIMIT_AS_DIVISOR;
    }

    /**
     * Computes the index in the BitSet this value
     * should be set in
     *
     * @param value
     * 		Value to set/retrieve
     * @return index in the BitSet this value should be set in
     */
    private static int getIndexInBitSetFor(final long value) {
        return (int) (value & DIVISOR);
    }

    /**
     * We use two {@link BitSet} to handle the 3 possible
     * states of a value:
     * 

     * UNKNOWN: if {@code knowns} is set to false.
     * 
     * KNOWN: if {@code knowns} is set to true and
     * {@code status} is also set to true.
     * 
     * NOT_KNOWN: if {@code knowns} is set to true
     * and {@code status} is not set.
     * 
     * 
     */
    private static final class BitSetGroup {
        private final BitSet status;
        private final BitSet knowns;

        private BitSetGroup() {
            this.status = new BitSet(LIMIT);
            this.knowns = new BitSet(LIMIT);
        }

        private void setStatus(final int index, final Status status) {
            this.knowns.set(index);
            if (status == Status.KNOWN) {
                this.status.set(index);
            }
        }

        private Status getStatus(final int index) {
            if (!this.knowns.get(index)) {
                return Status.UNKNOWN;
            }

            return status.get(index) ? Status.KNOWN : Status.NOT_KNOWN;
        }
    }
}