All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.heap.CopyOnWriteStateMap Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.heap;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.runtime.state.StateEntry;
import org.apache.flink.runtime.state.StateTransformationFunction;
import org.apache.flink.runtime.state.internal.InternalKvState;
import org.apache.flink.util.MathUtils;
import org.apache.flink.util.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import static org.apache.flink.util.CollectionUtil.MAX_ARRAY_SIZE;

/**
 * Implementation of Flink's in-memory state maps with copy-on-write support. This map does not support null values
 * for key or namespace.
 *
 * 

{@link CopyOnWriteStateMap} sacrifices some peak performance and memory efficiency for features like incremental * rehashing and asynchronous snapshots through copy-on-write. Copy-on-write tries to minimize the amount of copying by * maintaining version meta data for both, the map structure and the state objects. However, we must often proactively * copy state objects when we hand them to the user. * *

As for any state backend, user should not keep references on state objects that they obtained from state backends * outside the scope of the user function calls. * *

Some brief maintenance notes: * *

1) Flattening the underlying data structure from nested maps (namespace) -> (key) -> (state) to one flat map * (key, namespace) -> (state) brings certain performance trade-offs. In theory, the flat map has one less level of * indirection compared to the nested map. However, the nested map naturally de-duplicates namespace objects for which * #equals() is true. This leads to potentially a lot of redundant namespace objects for the flattened version. Those, * in turn, can again introduce more cache misses because we need to follow the namespace object on all operations to * ensure entry identities. Obviously, copy-on-write can also add memory overhead. So does the meta data to track * copy-on-write requirement (state and entry versions on {@link StateMapEntry}). * *

2) A flat map structure is a lot easier when it comes to tracking copy-on-write of the map structure. * *

3) Nested structure had the (never used) advantage that we can easily drop and iterate whole namespaces. This could * give locality advantages for certain access pattern, e.g. iterating a namespace. * *

4) Serialization format is changed from namespace-prefix compressed (as naturally provided from the old nested * structure) to making all entries self contained as (key, namespace, state). * *

5) Currently, a state map can only grow, but never shrinks on low load. We could easily add this if required. * *

6) Heap based state backends like this can easily cause a lot of GC activity. Besides using G1 as garbage collector, * we should provide an additional state backend that operates on off-heap memory. This would sacrifice peak performance * (due to de/serialization of objects) for a lower, but more constant throughput and potentially huge simplifications * w.r.t. copy-on-write. * *

7) We could try a hybrid of a serialized and object based backends, where key and namespace of the entries are both * serialized in one byte-array. * *

9) We could consider smaller types (e.g. short) for the version counting and think about some reset strategy before * overflows, when there is no snapshot running. However, this would have to touch all entries in the map. * *

This class was initially based on the {@link java.util.HashMap} implementation of the Android JDK, but is now heavily * customized towards the use case of map for state entries. * IMPORTANT: the contracts for this class rely on the user not holding any references to objects returned by this map * beyond the life cycle of per-element operations. Or phrased differently, all get-update-put operations on a mapping * should be within one call of processElement. Otherwise, the user must take care of taking deep copies, e.g. for * caching purposes. * * @param type of key. * @param type of namespace. * @param type of value. */ public class CopyOnWriteStateMap extends StateMap { /** * The logger. */ private static final Logger LOG = LoggerFactory.getLogger(HeapKeyedStateBackend.class); /** * Min capacity (other than zero) for a {@link CopyOnWriteStateMap}. Must be a power of two * greater than 1 (and less than 1 << 30). */ private static final int MINIMUM_CAPACITY = 4; /** * Max capacity for a {@link CopyOnWriteStateMap}. Must be a power of two >= MINIMUM_CAPACITY. */ private static final int MAXIMUM_CAPACITY = 1 << 30; /** * Default capacity for a {@link CopyOnWriteStateMap}. Must be a power of two, * greater than {@code MINIMUM_CAPACITY} and less than {@code MAXIMUM_CAPACITY}. */ public static final int DEFAULT_CAPACITY = 128; /** * Minimum number of entries that one step of incremental rehashing migrates from the old to the new sub-map. */ private static final int MIN_TRANSFERRED_PER_INCREMENTAL_REHASH = 4; /** * The serializer of the state. */ protected final TypeSerializer stateSerializer; /** * An empty map shared by all zero-capacity maps (typically from default * constructor). It is never written to, and replaced on first put. Its size * is set to half the minimum, so that the first resize will create a * minimum-sized map. */ private static final StateMapEntry[] EMPTY_TABLE = new StateMapEntry[MINIMUM_CAPACITY >>> 1]; /** * Empty entry that we use to bootstrap our {@link CopyOnWriteStateMap.StateEntryIterator}. */ private static final StateMapEntry ITERATOR_BOOTSTRAP_ENTRY = new StateMapEntry<>(new Object(), new Object(), new Object(), 0, null, 0, 0); /** * Maintains an ordered set of version ids that are still in use by unreleased snapshots. */ private final TreeSet snapshotVersions; /** * This is the primary entry array (hash directory) of the state map. If no incremental rehash is ongoing, this * is the only used table. **/ private StateMapEntry[] primaryTable; /** * We maintain a secondary entry array while performing an incremental rehash. The purpose is to slowly migrate * entries from the primary table to this resized table array. When all entries are migrated, this becomes the new * primary table. */ private StateMapEntry[] incrementalRehashTable; /** * The current number of mappings in the primary talbe. */ private int primaryTableSize; /** * The current number of mappings in the rehash table. */ private int incrementalRehashTableSize; /** * The next index for a step of incremental rehashing in the primary table. */ private int rehashIndex; /** * The current version of this map. Used for copy-on-write mechanics. */ private int stateMapVersion; /** * The highest version of this map that is still required by any unreleased snapshot. */ private int highestRequiredSnapshotVersion; /** * The last namespace that was actually inserted. This is a small optimization to reduce duplicate namespace objects. */ private N lastNamespace; /** * The {@link CopyOnWriteStateMap} is rehashed when its size exceeds this threshold. * The value of this field is generally .75 * capacity, except when * the capacity is zero, as described in the EMPTY_TABLE declaration * above. */ private int threshold; /** * Incremented by "structural modifications" to allow (best effort) * detection of concurrent modification. */ private int modCount; /** * Constructs a new {@code StateMap} with default capacity of {@code DEFAULT_CAPACITY}. * * @param stateSerializer the serializer of the key. */ CopyOnWriteStateMap(TypeSerializer stateSerializer) { this(DEFAULT_CAPACITY, stateSerializer); } /** * Constructs a new {@code StateMap} instance with the specified capacity. * * @param capacity the initial capacity of this hash map. * @param stateSerializer the serializer of the key. * @throws IllegalArgumentException when the capacity is less than zero. */ @SuppressWarnings("unchecked") private CopyOnWriteStateMap( int capacity, TypeSerializer stateSerializer) { this.stateSerializer = Preconditions.checkNotNull(stateSerializer); // initialized maps to EMPTY_TABLE. this.primaryTable = (StateMapEntry[]) EMPTY_TABLE; this.incrementalRehashTable = (StateMapEntry[]) EMPTY_TABLE; // initialize sizes to 0. this.primaryTableSize = 0; this.incrementalRehashTableSize = 0; this.rehashIndex = 0; this.stateMapVersion = 0; this.highestRequiredSnapshotVersion = 0; this.snapshotVersions = new TreeSet<>(); if (capacity < 0) { throw new IllegalArgumentException("Capacity: " + capacity); } if (capacity == 0) { threshold = -1; return; } if (capacity < MINIMUM_CAPACITY) { capacity = MINIMUM_CAPACITY; } else if (capacity > MAXIMUM_CAPACITY) { capacity = MAXIMUM_CAPACITY; } else { capacity = MathUtils.roundUpToPowerOfTwo(capacity); } primaryTable = makeTable(capacity); } // Public API from StateMap ------------------------------------------------------------------------------ /** * Returns the total number of entries in this {@link CopyOnWriteStateMap}. This is the sum of both sub-maps. * * @return the number of entries in this {@link CopyOnWriteStateMap}. */ @Override public int size() { return primaryTableSize + incrementalRehashTableSize; } @Override public S get(K key, N namespace) { final int hash = computeHashForOperationAndDoIncrementalRehash(key, namespace); final int requiredVersion = highestRequiredSnapshotVersion; final StateMapEntry[] tab = selectActiveTable(hash); int index = hash & (tab.length - 1); for (StateMapEntry e = tab[index]; e != null; e = e.next) { final K eKey = e.key; final N eNamespace = e.namespace; if ((e.hash == hash && key.equals(eKey) && namespace.equals(eNamespace))) { // copy-on-write check for state if (e.stateVersion < requiredVersion) { // copy-on-write check for entry if (e.entryVersion < requiredVersion) { e = handleChainedEntryCopyOnWrite(tab, hash & (tab.length - 1), e); } e.stateVersion = stateMapVersion; e.state = getStateSerializer().copy(e.state); } return e.state; } } return null; } @Override public boolean containsKey(K key, N namespace) { final int hash = computeHashForOperationAndDoIncrementalRehash(key, namespace); final StateMapEntry[] tab = selectActiveTable(hash); int index = hash & (tab.length - 1); for (StateMapEntry e = tab[index]; e != null; e = e.next) { final K eKey = e.key; final N eNamespace = e.namespace; if ((e.hash == hash && key.equals(eKey) && namespace.equals(eNamespace))) { return true; } } return false; } @Override public void put(K key, N namespace, S value) { final StateMapEntry e = putEntry(key, namespace); e.state = value; e.stateVersion = stateMapVersion; } @Override public S putAndGetOld(K key, N namespace, S state) { final StateMapEntry e = putEntry(key, namespace); // copy-on-write check for state S oldState = (e.stateVersion < highestRequiredSnapshotVersion) ? getStateSerializer().copy(e.state) : e.state; e.state = state; e.stateVersion = stateMapVersion; return oldState; } @Override public void remove(K key, N namespace) { removeEntry(key, namespace); } @Override public S removeAndGetOld(K key, N namespace) { final StateMapEntry e = removeEntry(key, namespace); return e != null ? // copy-on-write check for state (e.stateVersion < highestRequiredSnapshotVersion ? getStateSerializer().copy(e.state) : e.state) : null; } @Override public Stream getKeys(N namespace) { return StreamSupport.stream(spliterator(), false) .filter(entry -> entry.getNamespace().equals(namespace)) .map(StateEntry::getKey); } @Override public void transform( K key, N namespace, T value, StateTransformationFunction transformation) throws Exception { final StateMapEntry entry = putEntry(key, namespace); // copy-on-write check for state entry.state = transformation.apply( (entry.stateVersion < highestRequiredSnapshotVersion) ? getStateSerializer().copy(entry.state) : entry.state, value); entry.stateVersion = stateMapVersion; } // Private implementation details of the API methods --------------------------------------------------------------- /** * Helper method that is the basis for operations that add mappings. */ private StateMapEntry putEntry(K key, N namespace) { final int hash = computeHashForOperationAndDoIncrementalRehash(key, namespace); final StateMapEntry[] tab = selectActiveTable(hash); int index = hash & (tab.length - 1); for (StateMapEntry e = tab[index]; e != null; e = e.next) { if (e.hash == hash && key.equals(e.key) && namespace.equals(e.namespace)) { // copy-on-write check for entry if (e.entryVersion < highestRequiredSnapshotVersion) { e = handleChainedEntryCopyOnWrite(tab, index, e); } return e; } } ++modCount; if (size() > threshold) { doubleCapacity(); } return addNewStateMapEntry(tab, key, namespace, hash); } /** * Helper method that is the basis for operations that remove mappings. */ private StateMapEntry removeEntry(K key, N namespace) { final int hash = computeHashForOperationAndDoIncrementalRehash(key, namespace); final StateMapEntry[] tab = selectActiveTable(hash); int index = hash & (tab.length - 1); for (StateMapEntry e = tab[index], prev = null; e != null; prev = e, e = e.next) { if (e.hash == hash && key.equals(e.key) && namespace.equals(e.namespace)) { if (prev == null) { tab[index] = e.next; } else { // copy-on-write check for entry if (prev.entryVersion < highestRequiredSnapshotVersion) { prev = handleChainedEntryCopyOnWrite(tab, index, prev); } prev.next = e.next; } ++modCount; if (tab == primaryTable) { --primaryTableSize; } else { --incrementalRehashTableSize; } return e; } } return null; } // Iteration ------------------------------------------------------------------------------------------------------ @Nonnull @Override public Iterator> iterator() { return new StateEntryIterator(); } // Private utility functions for StateMap management ------------------------------------------------------------- /** * @see #releaseSnapshot(StateMapSnapshot) */ @VisibleForTesting void releaseSnapshot(int snapshotVersion) { // we guard against concurrent modifications of highestRequiredSnapshotVersion between snapshot and release. // Only stale reads of from the result of #releaseSnapshot calls are ok. synchronized (snapshotVersions) { Preconditions.checkState(snapshotVersions.remove(snapshotVersion), "Attempt to release unknown snapshot version"); highestRequiredSnapshotVersion = snapshotVersions.isEmpty() ? 0 : snapshotVersions.last(); } } /** * Creates (combined) copy of the table arrays for a snapshot. This method must be called by the same Thread that * does modifications to the {@link CopyOnWriteStateMap}. */ @VisibleForTesting @SuppressWarnings("unchecked") StateMapEntry[] snapshotMapArrays() { // we guard against concurrent modifications of highestRequiredSnapshotVersion between snapshot and release. // Only stale reads of from the result of #releaseSnapshot calls are ok. This is why we must call this method // from the same thread that does all the modifications to the map. synchronized (snapshotVersions) { // increase the map version for copy-on-write and register the snapshot if (++stateMapVersion < 0) { // this is just a safety net against overflows, but should never happen in practice (i.e., only after 2^31 snapshots) throw new IllegalStateException("Version count overflow in CopyOnWriteStateMap. Enforcing restart."); } highestRequiredSnapshotVersion = stateMapVersion; snapshotVersions.add(highestRequiredSnapshotVersion); } StateMapEntry[] table = primaryTable; // In order to reuse the copied array as the destination array for the partitioned records in // CopyOnWriteStateMapSnapshot.TransformedSnapshotIterator, we need to make sure that the copied array // is big enough to hold the flattened entries. In fact, given the current rehashing algorithm, we only // need to do this check when isRehashing() is false, but in order to get a more robust code(in case that // the rehashing algorithm may changed in the future), we do this check for all the case. final int totalMapIndexSize = rehashIndex + table.length; final int copiedArraySize = Math.max(totalMapIndexSize, size()); final StateMapEntry[] copy = new StateMapEntry[copiedArraySize]; if (isRehashing()) { // consider both maps for the snapshot, the rehash index tells us which part of the two maps we need final int localRehashIndex = rehashIndex; final int localCopyLength = table.length - localRehashIndex; // for the primary table, take every index >= rhIdx. System.arraycopy(table, localRehashIndex, copy, 0, localCopyLength); // for the new table, we are sure that two regions contain all the entries: // [0, rhIdx[ AND [table.length / 2, table.length / 2 + rhIdx[ table = incrementalRehashTable; System.arraycopy(table, 0, copy, localCopyLength, localRehashIndex); System.arraycopy(table, table.length >>> 1, copy, localCopyLength + localRehashIndex, localRehashIndex); } else { // we only need to copy the primary table System.arraycopy(table, 0, copy, 0, table.length); } return copy; } int getStateMapVersion() { return stateMapVersion; } /** * Allocate a table of the given capacity and set the threshold accordingly. * * @param newCapacity must be a power of two */ private StateMapEntry[] makeTable(int newCapacity) { if (newCapacity < MAXIMUM_CAPACITY) { threshold = (newCapacity >> 1) + (newCapacity >> 2); // 3/4 capacity } else { if (size() > MAX_ARRAY_SIZE) { throw new IllegalStateException("Maximum capacity of CopyOnWriteStateMap is reached and the job " + "cannot continue. Please consider scaling-out your job or using a different keyed state backend " + "implementation!"); } else { LOG.warn("Maximum capacity of 2^30 in StateMap reached. Cannot increase hash map size. This can " + "lead to more collisions and lower performance. Please consider scaling-out your job or using a " + "different keyed state backend implementation!"); threshold = MAX_ARRAY_SIZE; } } @SuppressWarnings("unchecked") StateMapEntry[] newMap = (StateMapEntry[]) new StateMapEntry[newCapacity]; return newMap; } /** * Creates and inserts a new {@link StateMapEntry}. */ private StateMapEntry addNewStateMapEntry( StateMapEntry[] table, K key, N namespace, int hash) { // small optimization that aims to avoid holding references on duplicate namespace objects if (namespace.equals(lastNamespace)) { namespace = lastNamespace; } else { lastNamespace = namespace; } int index = hash & (table.length - 1); StateMapEntry newEntry = new StateMapEntry<>( key, namespace, null, hash, table[index], stateMapVersion, stateMapVersion); table[index] = newEntry; if (table == primaryTable) { ++primaryTableSize; } else { ++incrementalRehashTableSize; } return newEntry; } /** * Select the sub-table which is responsible for entries with the given hash code. * * @param hashCode the hash code which we use to decide about the table that is responsible. * @return the index of the sub-table that is responsible for the entry with the given hash code. */ private StateMapEntry[] selectActiveTable(int hashCode) { return (hashCode & (primaryTable.length - 1)) >= rehashIndex ? primaryTable : incrementalRehashTable; } /** * Doubles the capacity of the hash table. Existing entries are placed in * the correct bucket on the enlarged table. If the current capacity is, * MAXIMUM_CAPACITY, this method is a no-op. Returns the table, which * will be new unless we were already at MAXIMUM_CAPACITY. */ private void doubleCapacity() { // There can only be one rehash in flight. From the amount of incremental rehash steps we take, this should always hold. Preconditions.checkState(!isRehashing(), "There is already a rehash in progress."); StateMapEntry[] oldMap = primaryTable; int oldCapacity = oldMap.length; if (oldCapacity == MAXIMUM_CAPACITY) { return; } incrementalRehashTable = makeTable(oldCapacity * 2); } /** * Returns true, if an incremental rehash is in progress. */ @VisibleForTesting boolean isRehashing() { // if we rehash, the secondary table is not empty return EMPTY_TABLE != incrementalRehashTable; } /** * Computes the hash for the composite of key and namespace and performs some steps of incremental rehash if * incremental rehashing is in progress. */ private int computeHashForOperationAndDoIncrementalRehash(K key, N namespace) { if (isRehashing()) { incrementalRehash(); } return compositeHash(key, namespace); } /** * Runs a number of steps for incremental rehashing. */ @SuppressWarnings("unchecked") private void incrementalRehash() { StateMapEntry[] oldMap = primaryTable; StateMapEntry[] newMap = incrementalRehashTable; int oldCapacity = oldMap.length; int newMask = newMap.length - 1; int requiredVersion = highestRequiredSnapshotVersion; int rhIdx = rehashIndex; int transferred = 0; // we migrate a certain minimum amount of entries from the old to the new table while (transferred < MIN_TRANSFERRED_PER_INCREMENTAL_REHASH) { StateMapEntry e = oldMap[rhIdx]; while (e != null) { // copy-on-write check for entry if (e.entryVersion < requiredVersion) { e = new StateMapEntry<>(e, stateMapVersion); } StateMapEntry n = e.next; int pos = e.hash & newMask; e.next = newMap[pos]; newMap[pos] = e; e = n; ++transferred; } oldMap[rhIdx] = null; if (++rhIdx == oldCapacity) { //here, the rehash is complete and we release resources and reset fields primaryTable = newMap; incrementalRehashTable = (StateMapEntry[]) EMPTY_TABLE; primaryTableSize += incrementalRehashTableSize; incrementalRehashTableSize = 0; rehashIndex = 0; return; } } // sync our local bookkeeping the with official bookkeeping fields primaryTableSize -= transferred; incrementalRehashTableSize += transferred; rehashIndex = rhIdx; } /** * Perform copy-on-write for entry chains. We iterate the (hopefully and probably) still cached chain, replace * all links up to the 'untilEntry', which we actually wanted to modify. */ private StateMapEntry handleChainedEntryCopyOnWrite( StateMapEntry[] tab, int mapIdx, StateMapEntry untilEntry) { final int required = highestRequiredSnapshotVersion; StateMapEntry current = tab[mapIdx]; StateMapEntry copy; if (current.entryVersion < required) { copy = new StateMapEntry<>(current, stateMapVersion); tab[mapIdx] = copy; } else { // nothing to do, just advance copy to current copy = current; } // we iterate the chain up to 'until entry' while (current != untilEntry) { //advance current current = current.next; if (current.entryVersion < required) { // copy and advance the current's copy copy.next = new StateMapEntry<>(current, stateMapVersion); copy = copy.next; } else { // nothing to do, just advance copy to current copy = current; } } return copy; } @SuppressWarnings("unchecked") private static StateMapEntry getBootstrapEntry() { return (StateMapEntry) ITERATOR_BOOTSTRAP_ENTRY; } /** * Helper function that creates and scrambles a composite hash for key and namespace. */ private static int compositeHash(Object key, Object namespace) { // create composite key through XOR, then apply some bit-mixing for better distribution of skewed keys. return MathUtils.bitMix(key.hashCode() ^ namespace.hashCode()); } /** * Creates a snapshot of this {@link CopyOnWriteStateMap}, to be written in checkpointing. The snapshot integrity * is protected through copy-on-write from the {@link CopyOnWriteStateMap}. Users should call * {@link #releaseSnapshot(StateMapSnapshot)} after using the returned object. * * @return a snapshot from this {@link CopyOnWriteStateMap}, for checkpointing. */ @Nonnull @Override public CopyOnWriteStateMapSnapshot stateSnapshot() { return new CopyOnWriteStateMapSnapshot<>(this); } /** * Releases a snapshot for this {@link CopyOnWriteStateMap}. This method should be called once a snapshot is no more needed, * so that the {@link CopyOnWriteStateMap} can stop considering this snapshot for copy-on-write, thus avoiding unnecessary * object creation. * * @param snapshotToRelease the snapshot to release, which was previously created by this state map. */ @Override public void releaseSnapshot(StateMapSnapshot> snapshotToRelease) { CopyOnWriteStateMapSnapshot copyOnWriteStateMapSnapshot = (CopyOnWriteStateMapSnapshot) snapshotToRelease; Preconditions.checkArgument(copyOnWriteStateMapSnapshot.isOwner(this), "Cannot release snapshot which is owned by a different state map."); releaseSnapshot(copyOnWriteStateMapSnapshot.getSnapshotVersion()); } @VisibleForTesting Set getSnapshotVersions() { return snapshotVersions; } // Meta data setter / getter and toString ----------------------------------------------------- public TypeSerializer getStateSerializer() { return stateSerializer; } // StateMapEntry ------------------------------------------------------------------------------------------------- /** * One entry in the {@link CopyOnWriteStateMap}. This is a triplet of key, namespace, and state. Thereby, key and * namespace together serve as a composite key for the state. This class also contains some management meta data for * copy-on-write, a pointer to link other {@link StateMapEntry}s to a list, and cached hash code. * * @param type of key. * @param type of namespace. * @param type of state. */ @VisibleForTesting protected static class StateMapEntry implements StateEntry { /** * The key. Assumed to be immumap and not null. */ @Nonnull final K key; /** * The namespace. Assumed to be immumap and not null. */ @Nonnull final N namespace; /** * The state. This is not final to allow exchanging the object for copy-on-write. Can be null. */ @Nullable S state; /** * Link to another {@link StateMapEntry}. This is used to resolve collisions in the * {@link CopyOnWriteStateMap} through chaining. */ @Nullable StateMapEntry next; /** * The version of this {@link StateMapEntry}. This is meta data for copy-on-write of the map structure. */ int entryVersion; /** * The version of the state object in this entry. This is meta data for copy-on-write of the state object itself. */ int stateVersion; /** * The computed secondary hash for the composite of key and namespace. */ final int hash; StateMapEntry(StateMapEntry other, int entryVersion) { this(other.key, other.namespace, other.state, other.hash, other.next, entryVersion, other.stateVersion); } StateMapEntry( @Nonnull K key, @Nonnull N namespace, @Nullable S state, int hash, @Nullable StateMapEntry next, int entryVersion, int stateVersion) { this.key = key; this.namespace = namespace; this.hash = hash; this.next = next; this.entryVersion = entryVersion; this.state = state; this.stateVersion = stateVersion; } public final void setState(@Nullable S value, int mapVersion) { // naturally, we can update the state version every time we replace the old state with a different object if (value != state) { this.state = value; this.stateVersion = mapVersion; } } @Nonnull @Override public K getKey() { return key; } @Nonnull @Override public N getNamespace() { return namespace; } @Nullable @Override public S getState() { return state; } @Override public final boolean equals(Object o) { if (!(o instanceof CopyOnWriteStateMap.StateMapEntry)) { return false; } StateEntry e = (StateEntry) o; return e.getKey().equals(key) && e.getNamespace().equals(namespace) && Objects.equals(e.getState(), state); } @Override public final int hashCode() { return (key.hashCode() ^ namespace.hashCode()) ^ Objects.hashCode(state); } @Override public final String toString() { return "(" + key + "|" + namespace + ")=" + state; } } // For testing ---------------------------------------------------------------------------------------------------- @Override public int sizeOfNamespace(Object namespace) { int count = 0; for (StateEntry entry : this) { if (null != entry && namespace.equals(entry.getNamespace())) { ++count; } } return count; } // StateEntryIterator --------------------------------------------------------------------------------------------- @Override public InternalKvState.StateIncrementalVisitor getStateIncrementalVisitor(int recommendedMaxNumberOfReturnedRecords) { return new StateIncrementalVisitorImpl(recommendedMaxNumberOfReturnedRecords); } /** * Iterator over state entry chains in a {@link CopyOnWriteStateMap}. */ class StateEntryChainIterator implements Iterator> { StateMapEntry[] activeTable; private int nextMapPosition; private final int maxTraversedMapPositions; StateEntryChainIterator() { this(Integer.MAX_VALUE); } StateEntryChainIterator(int maxTraversedMapPositions) { this.maxTraversedMapPositions = maxTraversedMapPositions; this.activeTable = primaryTable; this.nextMapPosition = 0; } @Override public boolean hasNext() { return size() > 0 && (nextMapPosition < activeTable.length || activeTable == primaryTable); } @Override public StateMapEntry next() { StateMapEntry next; // consider both sub-tables to cover the case of rehash while (true) { // current is empty // try get next in active table or // iteration is done over primary and rehash table // or primary was swapped with rehash when rehash is done next = nextActiveMapPosition(); if (next != null || nextMapPosition < activeTable.length || activeTable == incrementalRehashTable || activeTable != primaryTable) { return next; } else { // switch to rehash (empty if no rehash) activeTable = incrementalRehashTable; nextMapPosition = 0; } } } private StateMapEntry nextActiveMapPosition() { StateMapEntry[] tab = activeTable; int traversedPositions = 0; while (nextMapPosition < tab.length && traversedPositions < maxTraversedMapPositions) { StateMapEntry next = tab[nextMapPosition++]; if (next != null) { return next; } traversedPositions++; } return null; } } /** * Iterator over state entries in a {@link CopyOnWriteStateMap} which does not tolerate concurrent modifications. */ class StateEntryIterator implements Iterator> { private final StateEntryChainIterator chainIterator; private StateMapEntry nextEntry; private final int expectedModCount; StateEntryIterator() { this.chainIterator = new StateEntryChainIterator(); this.expectedModCount = modCount; this.nextEntry = getBootstrapEntry(); advanceIterator(); } @Override public boolean hasNext() { return nextEntry != null; } @Override public StateEntry next() { if (modCount != expectedModCount) { throw new ConcurrentModificationException(); } if (!hasNext()) { throw new NoSuchElementException(); } return advanceIterator(); } StateMapEntry advanceIterator() { StateMapEntry entryToReturn = nextEntry; StateMapEntry next = nextEntry.next; if (next == null) { next = chainIterator.next(); } nextEntry = next; return entryToReturn; } } /** * Incremental visitor over state entries in a {@link CopyOnWriteStateMap}. */ class StateIncrementalVisitorImpl implements InternalKvState.StateIncrementalVisitor { private final StateEntryChainIterator chainIterator; private final Collection> chainToReturn = new ArrayList<>(5); StateIncrementalVisitorImpl(int recommendedMaxNumberOfReturnedRecords) { chainIterator = new StateEntryChainIterator(recommendedMaxNumberOfReturnedRecords); } @Override public boolean hasNext() { return chainIterator.hasNext(); } @Override public Collection> nextEntries() { if (!hasNext()) { return null; } chainToReturn.clear(); for (StateMapEntry nextEntry = chainIterator.next(); nextEntry != null; nextEntry = nextEntry.next) { chainToReturn.add(nextEntry); } return chainToReturn; } @Override public void remove(StateEntry stateEntry) { CopyOnWriteStateMap.this.remove(stateEntry.getKey(), stateEntry.getNamespace()); } @Override public void update(StateEntry stateEntry, S newValue) { CopyOnWriteStateMap.this.put(stateEntry.getKey(), stateEntry.getNamespace(), newValue); } } }