io.timeandspace.smoothie.SmoothieMap Maven / Gradle / Ivy
Show all versions of smoothie-map Show documentation
/*
* Copyright (C) The SmoothieMap Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.timeandspace.smoothie;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import com.google.errorprone.annotations.DoNotCall;
import io.timeandspace.collect.Equivalence;
import io.timeandspace.collect.ObjCollection;
import io.timeandspace.collect.map.KeyValue;
import io.timeandspace.collect.map.ObjObjMap;
import io.timeandspace.collect.ObjSet;
import io.timeandspace.smoothie.InterleavedSegments.FullCapacitySegment;
import io.timeandspace.smoothie.InterleavedSegments.IntermediateCapacitySegment;
import org.checkerframework.checker.index.qual.NonNegative;
import org.checkerframework.checker.index.qual.Positive;
import org.checkerframework.checker.nullness.qual.EnsuresNonNull;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.checkerframework.common.value.qual.IntRange;
import org.jetbrains.annotations.Contract;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.BiPredicate;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.ToLongFunction;
import static io.timeandspace.smoothie.BitSetAndState.SEGMENT_ORDER_UNIT;
import static io.timeandspace.smoothie.BitSetAndState.allocCapacity;
import static io.timeandspace.smoothie.BitSetAndState.clearAllocBit;
import static io.timeandspace.smoothie.BitSetAndState.clearBitSet;
import static io.timeandspace.smoothie.BitSetAndState.extractBitSetForIteration;
import static io.timeandspace.smoothie.BitSetAndState.freeAllocIndexClosestTo;
import static io.timeandspace.smoothie.BitSetAndState.incrementSegmentOrder;
import static io.timeandspace.smoothie.BitSetAndState.isBulkOperationPlaceholderBitSetAndState;
import static io.timeandspace.smoothie.BitSetAndState.isFullCapacity;
import static io.timeandspace.smoothie.BitSetAndState.isInflatedBitSetAndState;
import static io.timeandspace.smoothie.BitSetAndState.lowestFreeAllocIndex;
import static io.timeandspace.smoothie.BitSetAndState.makeBitSetAndStateForPrivatelyPopulatedContinuousSegment;
import static io.timeandspace.smoothie.BitSetAndState.makeBulkOperationPlaceholderBitSetAndState;
import static io.timeandspace.smoothie.BitSetAndState.makeInflatedBitSetAndState;
import static io.timeandspace.smoothie.BitSetAndState.makeNewBitSetAndState;
import static io.timeandspace.smoothie.BitSetAndState.segmentOrder;
import static io.timeandspace.smoothie.BitSetAndState.segmentSize;
import static io.timeandspace.smoothie.BitSetAndState.setAllocBit;
import static io.timeandspace.smoothie.BitSetAndState.setLowestAllocBit;
import static io.timeandspace.smoothie.InterleavedSegment_BitSetAndStateArea.getBitSetAndState;
import static io.timeandspace.smoothie.InterleavedSegment_BitSetAndStateArea.setBitSetAndState;
import static io.timeandspace.smoothie.HashTable.HASH_TABLE_GROUPS_MASK;
import static io.timeandspace.smoothie.HashTable.matchFull;
import static io.timeandspace.smoothie.HashTable.setSlotEmpty;
import static io.timeandspace.smoothie.InterleavedSegments.allocIndexBoundaryForLocalAllocation;
import static io.timeandspace.smoothie.InterleavedSegments.allocOffset;
import static io.timeandspace.smoothie.InterleavedSegments.allocateNewSegmentWithoutSettingBitSetAndSet;
import static io.timeandspace.smoothie.InterleavedSegments.createNewSegment;
import static io.timeandspace.smoothie.InterleavedSegments.grow;
import static io.timeandspace.smoothie.InterleavedSegments.dataGroupFromTagGroupOffset;
import static io.timeandspace.smoothie.InterleavedSegments.dataGroupOffset;
import static io.timeandspace.smoothie.InterleavedSegments.tagGroupOffset;
import static io.timeandspace.smoothie.InterleavedSegments.readDataGroupAtOffset;
import static io.timeandspace.smoothie.InterleavedSegments.readTagGroupAtOffset;
import static io.timeandspace.smoothie.InterleavedSegments.writeDataGroupAtOffset;
import static io.timeandspace.smoothie.InterleavedSegments.writeTagAndData;
import static io.timeandspace.smoothie.HashTable.INFLATED_SEGMENT__MARKER_DATA_GROUP;
import static io.timeandspace.smoothie.HashTable.matchEmpty;
import static io.timeandspace.smoothie.HashTable.addGroupIndex;
import static io.timeandspace.smoothie.HashTable.extractAllocIndex;
import static io.timeandspace.smoothie.HashTable.firstAllocIndex;
import static io.timeandspace.smoothie.HashTable.lowestMatchingSlotIndex;
import static io.timeandspace.smoothie.HashTable.HASH_TABLE_GROUPS;
import static io.timeandspace.smoothie.HashTable.HASH_TABLE_SLOTS;
import static io.timeandspace.smoothie.HashTable.baseGroupIndex;
import static io.timeandspace.smoothie.HashTable.makeData;
import static io.timeandspace.smoothie.HashTable.match;
import static io.timeandspace.smoothie.HashTable.shouldStopProbing;
import static io.timeandspace.smoothie.InflatedSegmentQueryContext.COMPUTE_IF_PRESENT_ENTRY_REMOVED;
import static io.timeandspace.smoothie.InflatedSegmentQueryContext.Node;
import static io.timeandspace.smoothie.LongMath.clearLowestNBits;
import static io.timeandspace.smoothie.LongMath.clearLowestSetBit;
import static io.timeandspace.smoothie.ObjectSize.classSizeInBytes;
import static io.timeandspace.smoothie.ObjectSize.hashMapSizeInBytes;
import static io.timeandspace.smoothie.ObjectSize.objectSizeInBytes;
import static io.timeandspace.smoothie.OutboundOverflowCounts.addOutboundOverflowCountsPerGroup;
import static io.timeandspace.smoothie.OutboundOverflowCounts.computeOutboundOverflowCount_perGroupChanges;
import static io.timeandspace.smoothie.OutboundOverflowCounts.decrementOutboundOverflowCountsPerGroup;
import static io.timeandspace.smoothie.OutboundOverflowCounts.incrementOutboundOverflowCountsPerGroup;
import static io.timeandspace.smoothie.OutboundOverflowCounts.outboundOverflowCount_groupForChange;
import static io.timeandspace.smoothie.OutboundOverflowCounts.outboundOverflowCount_markGroupForChange;
import static io.timeandspace.smoothie.OutboundOverflowCounts.subtractOutboundOverflowCountsPerGroupAndUpdateAllGroups;
import static io.timeandspace.smoothie.IsFullCapacitySegmentBitSet.bitSetArrayLengthFromSegmentsArrayLength;
import static io.timeandspace.smoothie.Segments.valueOffsetFromAllocOffset;
import static io.timeandspace.smoothie.SmoothieMap.Segment.HASH__BASE_GROUP_INDEX_BITS;
import static io.timeandspace.smoothie.SmoothieMap.Segment.TAG_HASH_BITS;
import static io.timeandspace.smoothie.SmoothieMap.Segment.MIN_LEFTOVER_ALLOC_CAPACITY_AFTER_SHRINKING;
import static io.timeandspace.smoothie.SmoothieMap.Segment.checkAllocIndex;
import static io.timeandspace.smoothie.SmoothieMap.Segment.eraseKeyAndValueAtOffset;
import static io.timeandspace.smoothie.SmoothieMap.Segment.tagBits;
import static io.timeandspace.smoothie.SmoothieMap.Segment.readKeyAtOffset;
import static io.timeandspace.smoothie.SmoothieMap.Segment.readKeyCheckedAtIndex;
import static io.timeandspace.smoothie.SmoothieMap.Segment.readValueAtOffset;
import static io.timeandspace.smoothie.SmoothieMap.Segment.readValueCheckedAtIndex;
import static io.timeandspace.smoothie.SmoothieMap.Segment.writeEntry;
import static io.timeandspace.smoothie.SmoothieMap.Segment.writeValueAtOffset;
import static io.timeandspace.smoothie.UnsafeUtils.ARRAY_INT_BASE_OFFSET_AS_LONG;
import static io.timeandspace.smoothie.UnsafeUtils.ARRAY_INT_INDEX_SCALE_AS_LONG;
import static io.timeandspace.smoothie.UnsafeUtils.ARRAY_OBJECT_BASE_OFFSET_AS_LONG;
import static io.timeandspace.smoothie.UnsafeUtils.ARRAY_OBJECT_INDEX_SHIFT;
import static io.timeandspace.smoothie.UnsafeUtils.U;
import static io.timeandspace.smoothie.UnsafeUtils.acquireFence;
import static io.timeandspace.smoothie.UnsafeUtils.getFieldOffset;
import static io.timeandspace.smoothie.UnsafeUtils.storeStoreFence;
import static io.timeandspace.smoothie.Utils.checkNonNull;
import static io.timeandspace.smoothie.Utils.duplicateArray;
import static io.timeandspace.smoothie.Utils.nonNullOrThrowCme;
import static io.timeandspace.smoothie.Utils.verifyEqual;
import static io.timeandspace.smoothie.Utils.verifyIsPowerOfTwo;
import static io.timeandspace.smoothie.Utils.verifyNonNull;
import static io.timeandspace.smoothie.Utils.verifyThat;
import static java.lang.Integer.numberOfTrailingZeros;
import static java.lang.Math.max;
import static java.lang.Math.min;
import static sun.misc.Unsafe.ARRAY_OBJECT_INDEX_SCALE;
/**
* Unordered {@code Map} with worst {@link #put(Object, Object) put} latencies more than 100 times
* smaller than in ordinary hash table implementations like {@link HashMap} and very low footprint
* per entry. SmoothieMap may also operate in the {@linkplain OptimizationObjective#LOW_GARBAGE
* "low-garbage"} or the {@linkplain OptimizationObjective#FOOTPRINT "footprint"} modes.
*
* SmoothieMap is created using a builder: {@link #newBuilder()
* SmoothieMap.newBuilder().build()}. See possible configurations in the documentation for {@link
* SmoothieMapBuilder}.
*
*
Unlike {@link HashMap}, but like {@link java.util.concurrent.ConcurrentHashMap}, {@code
* Map.of()} immutable Maps, and Guava's ImmutableMaps, SmoothieMap does not support null key
* and values. An attempt to put null key or value, or query null key or value (e. g. via {@link
* #get(Object) get(null)}), leads to a {@link NullPointerException}.
*
*
{@code SmoothieMap} supports pluggable keys' and values' equivalences which could be
* configured in the builder, via {@link SmoothieMapBuilder#keyEquivalence(Equivalence)} and
* {@link SmoothieMapBuilder#valueEquivalence(Equivalence)} methods.
*
*
Functional additions to the {@code Map} interface implemented by SmoothieMap are described
* in the documentation for {@link ObjObjMap} interface. It also provides {@link #sizeInBytes()} to
* track the footprint of the map.
*
*
Note that this implementation is not synchronized. If multiple threads access a
* {@code SmoothieMap} concurrently, and at least one of the threads modifies the map structurally,
* it must be synchronized externally. (A structural modification is any operation that adds
* or deletes one or more mappings; merely changing the value associated with a key that an instance
* already contains is not a structural modification.) This is typically accomplished by
* synchronizing on some object that naturally encapsulates the map.
*
*
If no such object exists, the map should be "wrapped" using the {@link
* Collections#synchronizedMap Collections.synchronizedMap} method. This is best done at creation
* time, to prevent accidental unsynchronized access to the map:
* Map m = Collections.synchronizedMap(smoothieMap);
*
* In terms of performance, favor calling bulk methods like {@link #forEach(BiConsumer)} to
* iterating the {@code SmoothieMap} via {@code Iterator}, including for-each style iterations on
* map's collections views. Especially if you need to remove entries during iteration (i. e. call
* {@link Iterator#remove()}) and hash code for key objects is not cached on their side (like it is
* cached, for example, in {@link String} class) - try to express your logic using {@link
* #removeIf(BiPredicate)} method in this case.
*
* @param the type of keys maintained by this map
* @param the type of mapped values
*
* @author Roman Leventov
*/
public class SmoothieMap implements ObjObjMap {
private static final long SIZE_IN_BYTES = classSizeInBytes(SmoothieMap.class);
private static final long KEY_SET__SIZE_IN_BYTES = classSizeInBytes(SmoothieMap.KeySet.class);
private static final long VALUES__SIZE_IN_BYTES = classSizeInBytes(SmoothieMap.Values.class);
private static final long ENTRY_SET__SIZE_IN_BYTES =
classSizeInBytes(SmoothieMap.EntrySet.class);
/**
* If these values are changed the documentation for {@link
* SmoothieMapBuilder#reportPoorHashCodeDistribution} should be updated.
*/
static final double POOR_HASH_CODE_DISTRIB__BENIGN_OCCASION__MAX_PROB__MAX = 0.2;
static final double POOR_HASH_CODE_DISTRIB__BENIGN_OCCASION__MAX_PROB__MIN = 0.00001;
/**
* Creates a new {@link SmoothieMapBuilder}.
*
* @param the type of keys in SmoothieMap(s) to be created
* @param the type of values in SmoothieMap(s) to be created
* @return a new {@link SmoothieMapBuilder}
*/
@Contract(value = " -> new", pure = true)
public static SmoothieMapBuilder newBuilder() {
return SmoothieMapBuilder.create();
}
/**
* {@link #segmentsArray} is always power of two sized. An array in Java couldn't have length
* 2^31 because it's greater than {@link Integer#MAX_VALUE}, so 30 is the maximum.
*/
static final int MAX_SEGMENTS_ARRAY_ORDER = 30;
private static final int MAX_SEGMENTS_ARRAY_LENGTH = 1 << MAX_SEGMENTS_ARRAY_ORDER;
static final int SEGMENT_MAX_ALLOC_CAPACITY = 48;
/** {@link #SEGMENT_MAX_ALLOC_CAPACITY} = 48 = 16 * 3 */
static final int MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE = 16;
@CompileTimeConstant
static final int MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE_DIVISION_SHIFT = 4;
static {
verifyIsPowerOfTwo(MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE, "");
verifyEqual(MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE_DIVISION_SHIFT,
numberOfTrailingZeros(MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE));
}
/**
* Using 30 for {@link ContinuousSegments} because statistically it leads to lower expected
* SmoothieMap's memory footprint (if {@link #splitBetweenTwoNewSegments} is false; if
* {@link #splitBetweenTwoNewSegments}, the result may be different, e. g. 32 or 34). Using 32
* for {@link InterleavedSegments} because it's only very marginally worse than 30, but {@link
* IntermediateCapacitySegment} can be fully symmetric with 4 allocation slots surrounding each
* hash table group. Also, the probability of calling {@link
* FullCapacitySegment#swapContentsDuringSplit} is lower
* (see [Swap segments] in {@link #doSplit}) which is good because {@link
* FullCapacitySegment#swapContentsDuringSplit} is relatively
* more expensive than {@link
* ContinuousSegments.SegmentBase#swapContentsDuringSplit} and allocates.
*
* TODO recompute and provide here exact expected memory footprint of a SmoothieMap in case of
* intermediate capacity = 30 and 32
*/
static final int SEGMENT_INTERMEDIATE_ALLOC_CAPACITY =
32;
/**
* The probability of [Swap segments] after the [fromSegment iteration] loop is
* 1 - CDF[BinomialDistribution[48, 0.5],
* MIN_ENTRIES_IN_INTERMEDIATE_CAPACITY_SEGMENT_AFTER_SPLIT_FOR_SWAPPING - 1]
* = ~ 23.5% (Continuous segments)
* = ~ 9.7% (Interleaved segments)
*
* It makes {@link ContinuousSegments.SegmentBase#swapContentsDuringSplit} (which are called
* within [Swap segments]) to not actually {@link RarelyCalledAmortizedPerSegment} because
* {@link RarelyCalledAmortizedPerSegment} requires less than 10% of probability of calling per
* split.
*
* The value is different for Continuous segments both because {@link
* #SEGMENT_INTERMEDIATE_ALLOC_CAPACITY} is less than for Interleaved segments (30 vs. 32) and
* because {@link ContinuousSegments.SegmentBase#swapContentsDuringSplit} is cheaper than {@link
* InterleavedSegments#swapContentsDuringSplit} so it should be more tolerable for it to be
* called more frequently.
*/
private static final int MIN_ENTRIES_IN_INTERMEDIATE_CAPACITY_SEGMENT_AFTER_SPLIT_FOR_SWAPPING =
29;
private static final long FULL_CAPACITY_SEGMENT_SIZE_IN_BYTES =
objectSizeInBytes(createNewSegment(SEGMENT_MAX_ALLOC_CAPACITY, 0));
private static final long INTERMEDIATE_CAPACITY_SEGMENT_SIZE_IN_BYTES =
objectSizeInBytes(createNewSegment(SEGMENT_INTERMEDIATE_ALLOC_CAPACITY, 0));
private static final long
MAP_AVERAGE_SEGMENTS_SATURATION_SEGMENT_CAPACITY_POWER_OF_TWO_COMPONENTS =
((long) SEGMENT_MAX_ALLOC_CAPACITY / MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE) *
MAX_SEGMENTS_ARRAY_LENGTH;
/**
* Returns the minimum of {@link #MAX_SEGMENTS_ARRAY_ORDER} and
* log2(ceilingPowerOfTwo(divideCeiling(size, {@link Segment#SEGMENT_MAX_ALLOC_CAPACITY}))).
* The given size must be positive.
*/
static int doComputeAverageSegmentOrder(long size) {
assert size > 0;
// The implementation of this method aims to avoid integral division and branches. The idea
// is that instead of dividing the size by SEGMENT_MAX_ALLOC_CAPACITY = 48, the size is
// first divided by 16 (that is replaced with right shift) and then additionally by 3, that
// is replaceable with multiplication and right shift too (a bit twiddling hack).
// saturatedSize is needed for proper ceiling division by SEGMENT_MAX_ALLOC_CAPACITY.
long saturatedSize = size + SEGMENT_MAX_ALLOC_CAPACITY - 1;
// TODO simplify and move MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE_DIVISION_SHIFT
// inside the division, i. e.
// `>>> (33 + MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE_DIVISION_SHIFT)`
long segmentCapacityPowerOfTwoComponents = min(
// [Replacing division with shift]
saturatedSize >>> MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE_DIVISION_SHIFT,
MAP_AVERAGE_SEGMENTS_SATURATION_SEGMENT_CAPACITY_POWER_OF_TWO_COMPONENTS
);
// The following line is an obscure form of
// `int averageSegments = (int) (segmentCapacityPowerOfTwoComponents / 3);`, where 3 is
// the constant equal to SEGMENT_MAX_ALLOC_CAPACITY /
// MAX_ALLOC_CAPACITY_POWER_OF_TWO_COMPONENT_SIZE.
// The following magic constants work only if the argument is between 0 and 2^31 + 2^30,
// i. e. assuming that
// MAP_AVERAGE_SEGMENTS_SATURATION_SEGMENT_CAPACITY_POWER_OF_TWO_COMPONENTS = 3 * 2^30 =
// 2^31 + 2^30.
//
// Similar replacement of division is done in InterleavedSegments' allocOffset methods.
int averageSegments = (int) ((segmentCapacityPowerOfTwoComponents * 2863311531L) >>> 33);
return Integer.SIZE - Integer.numberOfLeadingZeros(averageSegments - 1);
}
/**
* The order of any segment must not become more than {@link #computeAverageSegmentOrder(long)}
* plus this value, and more than {@link #segmentsArray}'s order. If a segment has the maximum
* allowed order and its size exceeds {@link #SEGMENT_MAX_ALLOC_CAPACITY}, it is inflated
* instead of being split (see {@link #makeSpaceAndInsert}).
*
* This constant's value of 1 formalizes the "{@link #segmentsArray} may be doubled, but not
* quadrupled above the average segments" principle, described in the Javadoc comment for {@link
* InflatedSegmentQueryContext}.
*/
static final int MAX_SEGMENT_ORDER_DIFFERENCE_FROM_AVERAGE = 1;
static int maxSplittableSegmentOrder(int averageSegmentOrder) {
// Since MAX_SEGMENT_ORDER_DIFFERENCE_FROM_AVERAGE = 1, this statement is currently
// logically equivalent to `return averageSegmentOrder`.
return averageSegmentOrder + MAX_SEGMENT_ORDER_DIFFERENCE_FROM_AVERAGE - 1;
}
private static class SegmentsArrayLengthAndNumSegments {
final int segmentsArrayLength;
final int numSegments;
private SegmentsArrayLengthAndNumSegments(int segmentsArrayLength, int numSegments) {
this.segmentsArrayLength = segmentsArrayLength;
this.numSegments = numSegments;
}
}
/**
* @return a power of 2 number of segments
*/
private static SegmentsArrayLengthAndNumSegments chooseInitialSegmentsArrayLength(
SmoothieMapBuilder, ?> builder) {
long minPeakSize = builder.minPeakSize();
if (minPeakSize == SmoothieMapBuilder.UNKNOWN_SIZE) {
return new SegmentsArrayLengthAndNumSegments(1, 1);
}
return chooseInitialSegmentsArrayLengthInternal(minPeakSize);
}
private static SegmentsArrayLengthAndNumSegments chooseInitialSegmentsArrayLengthInternal(
long minPeakSize) {
verifyThat(minPeakSize >= 0);
if (minPeakSize <= SEGMENT_MAX_ALLOC_CAPACITY) {
return new SegmentsArrayLengthAndNumSegments(1, 1);
}
if (minPeakSize <= 2 * SEGMENT_MAX_ALLOC_CAPACITY) {
return new SegmentsArrayLengthAndNumSegments(2, 2);
}
// TODO something more smart. For example, when minPeakSize / SEGMENT_MAX_ALLOC_CAPACITY
// is just over a power of two N, it's better to choose initialNumSegments = N / 2,
// initialSegmentsArrayLength = N * 2.
int initialNumSegments = (int) Math.min(MAX_SEGMENTS_ARRAY_LENGTH,
LongMath.floorPowerOfTwo(minPeakSize / SEGMENT_MAX_ALLOC_CAPACITY));
int initialSegmentsArrayLength = (int) Math.min(MAX_SEGMENTS_ARRAY_LENGTH,
((long) initialNumSegments) * 2L);
return new SegmentsArrayLengthAndNumSegments(
initialSegmentsArrayLength, initialNumSegments);
}
// See MathDecisions in test/
static final int MIN_ROUNDED_UP_AVERAGE_ENTRIES_PER_SEGMENT = 32;
static final int MAX_ROUNDED_UP_AVERAGE_ENTRIES_PER_SEGMENT = 63;
// See MathDecisions in test/
private static final long[] SEGMENTS_QUADRUPLING_FROM_REF_SIZE_4 = {
17237966, 20085926, 23461869, 27467051, 32222765, 101478192, 118705641, 139126526,
163353618, 192120413, 226305341, 266960817, 825529841, 971366784, 1144556172,
1350385115, 1595184608, 1886539115, 2233536926L, 2647074163L, 1244014982, 598555262,
294588684, 148182403, 76120369, 39902677, 21329967, 11619067, 6445637, 3639219,
2089996, 1220217,
};
private static final long[] SEGMENTS_QUADRUPLING_FROM_REF_SIZE_8 = {
6333006, 7437876, 8753429, 10321069, 12190537, 37874373, 44596145, 52597103, 62128040,
73490002, 87044486, 266960817, 315348276, 372980187, 441670897, 523597560, 621373710,
738137712, 2233536926L, 2647074163L, 1244014982, 598555262, 294588684, 148182403,
76120369, 39902677, 21329967, 11619067, 6445637, 3639219, 2089996, 1220217,
};
private static final long[] SEGMENTS_QUADRUPLING_FROM = ARRAY_OBJECT_INDEX_SCALE == 4 ?
SEGMENTS_QUADRUPLING_FROM_REF_SIZE_4 : SEGMENTS_QUADRUPLING_FROM_REF_SIZE_8;
/**
* @return 0 - default, 1 - doubling, 2 - quadrupling
*/
private static int chooseUpFrontScale(long expectedSize, int segments) {
// if only one segment, no possibility to "skew" assuming given expectedSize is precise
if (segments == 1)
return 0;
int roundedUpAverageEntriesPerSegment =
max((int) roundedUpDivide(expectedSize, segments),
MIN_ROUNDED_UP_AVERAGE_ENTRIES_PER_SEGMENT);
assert roundedUpAverageEntriesPerSegment <= MAX_ROUNDED_UP_AVERAGE_ENTRIES_PER_SEGMENT;
int indexInSegmentsQuadruplingFromArray =
roundedUpAverageEntriesPerSegment - MIN_ROUNDED_UP_AVERAGE_ENTRIES_PER_SEGMENT;
if (segments * 4L <= MAX_SEGMENTS_ARRAY_LENGTH &&
indexInSegmentsQuadruplingFromArray < SEGMENTS_QUADRUPLING_FROM.length &&
segments >= SEGMENTS_QUADRUPLING_FROM[indexInSegmentsQuadruplingFromArray]) {
return 2; // quadrupling
} else {
if (segments * 2L <= MAX_SEGMENTS_ARRAY_LENGTH) {
return 1; // doubling
} else {
return 0;
}
}
}
private static long roundedUpDivide(long dividend, long divisor) {
return (dividend + divisor - 1) / divisor;
}
// See MathDecisions in test/
private static final byte[] ALLOC_CAPACITIES_REF_SIZE_4 = {
42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61, 62, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
};
private static final byte[] ALLOC_CAPACITIES_REF_SIZE_8 = {
41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 62, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
};
private static final byte[] ALLOC_CAPACITIES = ARRAY_OBJECT_INDEX_SCALE == 4 ?
ALLOC_CAPACITIES_REF_SIZE_4 : ALLOC_CAPACITIES_REF_SIZE_8;
private static int chooseAllocCapacity(long expectedSize, int segments) {
int averageEntriesPerSegment = max((int) roundedUpDivide(expectedSize, segments),
MIN_ROUNDED_UP_AVERAGE_ENTRIES_PER_SEGMENT);
return ALLOC_CAPACITIES[
averageEntriesPerSegment - MIN_ROUNDED_UP_AVERAGE_ENTRIES_PER_SEGMENT];
}
private static int order(int numSegments) {
verifyIsPowerOfTwo(numSegments, "num segments");
return numberOfTrailingZeros(numSegments);
}
/**
* The number of the lowest bit in hash codes that is used (along with all the higher bits) to
* locate a segment (in {@link #segmentsArray}) for a key (see {@link #segmentLookupBits} and
* {@link #segmentBySegmentLookupBits}).
*
* The lowest {@link Segment#HASH__BASE_GROUP_INDEX_BITS} bits are used to locate the
* first lookup group within a segment, the following {@link Segment#TAG_HASH_BITS} bits are
* stored in the tag groups.
*/
@CompileTimeConstant
static final int HASH__SEGMENT_LOOKUP_SHIFT = HASH__BASE_GROUP_INDEX_BITS + TAG_HASH_BITS;
/**
* The number of bytes to shift (masked) hash to the right to obtain the offset in {@link
* #segmentsArray}.
*/
private static final int HASH__SEGMENT_ARRAY_OFFSET_SHIFT =
HASH__SEGMENT_LOOKUP_SHIFT - ARRAY_OBJECT_INDEX_SHIFT;
private static final
AtomicIntegerFieldUpdater SEGMENT_STRUCTURE_MODIFICATION_STAMP_UPDATER =
AtomicIntegerFieldUpdater.newUpdater(
SmoothieMap.class, "segmentStructureModStamp");
private static final long MOD_COUNT_FIELD_OFFSET;
static {
MOD_COUNT_FIELD_OFFSET = getFieldOffset(SmoothieMap.class, "modCount");
}
/**
* See https://probablydance.com/2018/06/16/fibonacci-
* hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
* for explanation why this constant is good.
*/
static final long LONG_PHI_MAGIC = -7046029254386353131L;
/**
* This field is a {@link java.util.concurrent.locks.StampedLock}-inspired stamp which protects
* a SmoothieMap from concurrent modifications to the segment structure (in {@link
* #growSegmentsArray} and {@link #replaceInSegmentsArray}) which may lead to wrong memory
* access and corruption or a JVM crash.
*
* This field is used via {@link #SEGMENT_STRUCTURE_MODIFICATION_STAMP_UPDATER}.
*
* Unlike {@link java.util.concurrent.locks.StampedLock} where "locked" stamps are odd and
* unlocked stamps are even, a locked segmentStructureModStamp is negative and an unlocked
* segmentStructureModStamp is positive. This is done to allow {@link
* #acquireSegmentStructureModStamp()} called in hot methods while
* [Reading consistent segment and isFullCapacitySegment values] to compile in fewer machine
* instructions (or fused uops; see je/jz/jne).
*/
private volatile int segmentStructureModStamp = 0;
private volatile long segmentLookupMask;
@Nullable Object segmentsArray;
private long size;
private int modCount;
@MonotonicNonNull InflatedSegmentQueryContext inflatedSegmentQueryContext;
/**
* The last value returned from a {@link #doComputeAverageSegmentOrder} call. Updated
* transparently in {@link #computeAverageSegmentOrder}. The average segment order is computed
* and used only in the contexts related to SmoothieMap's growth, namely {@link #doSplit}
* (triggered when a segment grows too large) and {@link #splitInflated} (triggered when the
* average segment order grows large enough for an inflated segment to not be considered outlier
* anymore). It means that if no entries are inserted into a SmoothieMap or more entries are
* deleted from a SmoothieMap than inserted the value stored in averageSegmentOrder_lastComputed
* could become stale, much larger than the actual average segment order. It's updated when a
* SmoothieMap starts to grow again (in the next {@link #doSplit} call), so there shouldn't be
* any "high watermark" effects, unless entries are inserted into a SmoothieMap in an artificial
* order, for example, making all insertions to fall into already inflated segments, while
* removals happen from ordinary segments. See also the comment for {@link
* InflatedSegment#shouldBeSplit}, and the comments inside that method.
*/
byte averageSegmentOrder_lastComputed;
/** Mirror field: {@link SmoothieMapBuilder#allocateIntermediateSegments}. */
private boolean allocateIntermediateSegments;
/**
* If this flag is true then during {@link #splitAndInsert} instead of moving half (on average)
* of the entries from the old segment into a newly allocated segment, we allocate two new
* intermediate-capacity segments which allows extra memory savings at the cost of higher memory
* churn during the growth of the SmoothieMap.
*
* Mirror field: {@link SmoothieMapBuilder#splitBetweenTwoNewSegments}.
*/
private boolean splitBetweenTwoNewSegments;
/**
* An int[] array which is a {@link IsFullCapacitySegmentBitSet}. Value 1 at any index in
* the bit set identifies that the segment at the same index in {@link #segmentsArray} is a
* {@link FullCapacitySegment}, or {@link IntermediateCapacitySegment} if the corresponding
* value in the bit set is 0.
*
* The type of this field is Object rather than int[] to avoid class checks when
* [Avoid normal array access].
*
* No scalarization of this field is applied when the number of segments in the SmoothieMap is
* small (like it is done for {@link #segmentCountsByOrder}) because {@link
* IsFullCapacitySegmentBitSet#getValue} is on the hot point access path via {@link
* #isFullCapacitySegment} and it must be branchless.
*
* This field might be set to null if {@link #allocateIntermediateSegments} is false for a
* SmoothieMap. This would reduce SmoothieMap's memory footprint by a little and the number of
* memory accesses on the read path (which are likely to be in L1). On the other hand, it would
* require an extra null check on the read path and a branch which would have a potential to
* be unpredictable if there are several frequently used SmoothieMaps in the JVM with different
* {@link #allocateIntermediateSegments} setting. TODO compare the approaches
*/
private Object isFullCapacitySegmentBitSet;
/** Mirror field: {@link SmoothieMapBuilder#doShrink}. */
private boolean doShrink;
private @MonotonicNonNull ObjSet keySet;
private @MonotonicNonNull Collection values;
private @MonotonicNonNull ObjSet> entrySet;
/**
* Creates a new, empty {@code SmoothieMap}.
*/
SmoothieMap(SmoothieMapBuilder builder) {
this.allocateIntermediateSegments = builder.allocateIntermediateCapacitySegments();
this.splitBetweenTwoNewSegments = builder.splitBetweenTwoNewSegments();
this.doShrink = builder.doShrink();
SegmentsArrayLengthAndNumSegments initialSegmentsArrayLengthAndNumSegments =
chooseInitialSegmentsArrayLength(builder);
initArrays(initialSegmentsArrayLengthAndNumSegments);
}
private void initArrays(
SegmentsArrayLengthAndNumSegments initialSegmentsArrayLengthAndNumSegments) {
Object[] segmentsArray = initSegmentsArray(initialSegmentsArrayLengthAndNumSegments);
updateSegmentLookupMask(segmentsArray.length);
// Ensure that no thread sees null in the segmentsArray field and nulls as segmentsArray's
// elements. The latter could lead to a segfault.
U.storeFence();
}
/**
* Returns the approximate footprint of this {@code SmoothieMap} instance in the heap of the JVM
* process, in bytes. Does not include the footprints of the keys and values stored in
* the {@code SmoothieMap}.
*
* @return the approximate footprint of this {@code SmoothieMap} proper
*/
public final long sizeInBytes() {
return smoothieMapClassSizeInBytes() +
objectSizeInBytes(segmentsArray) +
objectSizeInBytes(isFullCapacitySegmentBitSet) +
totalSizeOfSegmentsInBytes() +
(inflatedSegmentQueryContext != null ?
inflatedSegmentQueryContext.sizeInBytes() : 0) +
// Not adding hashTableHalfPopulationStats because it's a debug-only feature.
(keySet != null ? KEY_SET__SIZE_IN_BYTES : 0) +
(values != null ? VALUES__SIZE_IN_BYTES : 0) +
(entrySet != null ? ENTRY_SET__SIZE_IN_BYTES : 0);
}
long smoothieMapClassSizeInBytes() {
return SIZE_IN_BYTES;
}
/**
* Returns {@code true} if the two given key objects should be considered equal for this {@code
* SmoothieMap}.
*
* This method should obey general equivalence relation rules (see {@link
* Object#equals(Object)} documentation for details), and also should be consistent with {@link
* #keyHashCode(Object)} method in this class (i. e. these methods should overridden together).
*
*
It is guaranteed that the first specified key is non-null and the arguments are not
* identical (!=), in particular this means that to get {@link IdentityHashMap} behaviour it's
* OK to override this method just returning {@code false}:
* class IdentitySmoothieMap<K, V> extends SmoothieMap<K, V> {
*
* @Override
* protected boolean keysEqual(Object queriedKey, K internalKey) {
* return false;
* }
*
* @Override
* protected long keyHashCode(Object key) {
* return System.identityHashCode(key) * LONG_PHI_MAGIC;
* }
* }
*
* This method accepts raw {@code Object} argument, because {@link Map} interface allows
* to check presence of raw key, e. g. {@link #get(Object)} without {@link ClassCastException}.
* If you want to subclass parameterized {@code SmoothieMap}, you should cast the arguments to
* the key parameter class yourself, e. g.:
* class DomainToIpMap extends SmoothieMap<String, Integer> {
*
* @Override
* protected boolean keysEqual(Object queriedDomain, String domainInMap) {
* return ((String) queriedDomain).equalsIgnoreCase(domainInMap));
* }
*
* @Override
* protected long keyHashCode(Object domain) {
* return LongHashFunction.xx_r39().hashChars(((String) domain).toLowerCase());
* }
* }
*
* Default implementation is {@code queriedKey.equals(internalKey)}.
*
* @param queriedKey the first key to compare, that is passed to queries like {@link #get},
* but might also be a key, that is already stored in the map
* @param internalKey the second key to compare, guaranteed that this key is already stored
* in the map
* @return {@code true} if the given keys should be considered equal for this map, {@code false}
* otherwise
* @see #keyHashCode(Object)
*/
boolean keysEqual(Object queriedKey, K internalKey) {
return queriedKey.equals(internalKey);
}
@Override
public Equivalence keyEquivalence() {
return Equivalence.defaultEquality();
}
/**
* Returns hash code for the given key.
*
* This method should obey general hash code contract (see {@link Object#hashCode()}
* documentation for details), also should be consistent with {@link #keysEqual(Object, Object)}
* method in this class (i. e. these methods should overridden together).
*
*
The returned hash codes MUST be distributed well in the whole {@code long} range,
* because {@code SmoothieMap} implementation uses high bits of the returned value. When
* overriding this method, if you are not sure that you hash codes are distributed well it is
* recommended to multiply by {@link #LONG_PHI_MAGIC} finally, to spread low bits of the values
* to the high.
*
* TODO update javadoc
*
* Ensures that the lowest {@link #HASH__SEGMENT_LOOKUP_SHIFT} of the result are affected by all
* input hash bits, i. e. partial avalanche effect. To achieve full avalanche effect (all bits
* of the result are affected by all input hash bits) considerably more steps are required,
* e. g. see the finalization procedure of xxHash. Having the lowest {@link
* #HASH__SEGMENT_LOOKUP_SHIFT} bits of the result distributed well is critical because those
* bits are responsible for SmoothieMap's efficiency (number of collisions and unnecessary key
* comparisons) within (ordinary) segments, that is not reported to a callback provided to
* {@link SmoothieMapBuilder#reportPoorHashCodeDistribution}, because there are checks that only
* catch higher level, inter-segment anomalies (see {@link
* HashCodeDistribution#checkAndReportTooLargeInflatedSegment0},
* (TODO link to num inflated segments method) for more details).
* because of two types of intra-segment distribution problems - slot concentration and stored
* hash collisions
* TODO detect slot concentration?
*
*
See other examples of this method override in the documentation to {@link
* #keysEqual(Object, Object)} method.
*
* @param key the key (queried or already stored in the map) to compute hash code for
* @return the hash code for the given key
* @see #keysEqual(Object, Object)
*/
long keyHashCode(Object key) {
return defaultKeyHashCode(key);
}
static long defaultKeyHashCode(Object key) {
return intToLongHashCode(key.hashCode());
}
static long intToLongHashCode(int intHashCode) {
long x = ((long) intHashCode) * LONG_PHI_MAGIC;
return x ^ (x >>> (Long.SIZE - HASH__SEGMENT_LOOKUP_SHIFT));
}
/**
* To be used in {@link #hashCode()}, {@link EntrySet#hashCode()} and {@link KeySet#hashCode()}.
* @see #valueHashCodeForAggregateHashCodes
*/
int keyHashCodeForAggregateHashCodes(Object key) {
return key.hashCode();
}
ToLongFunction getKeyHashFunction() {
return DefaultHashFunction.instance();
}
/**
* Returns {@code true} if the two given value objects should be considered equal for this map.
* This equivalence is used in the implementations of {@link #containsValue(Object)}, {@link
* #containsEntry(Object, Object)}, {@link #remove(Object, Object)}, {@link #replace(Object,
* Object, Object)} methods, and the methods of the {@linkplain #values() values} collection.
*
* This method should obey general equivalence relation rules (see {@link
* Object#equals(Object)} documentation for details).
*
*
It is guaranteed that the first specified value is non-null and the arguments are not
* identical (!=).
*
*
This method accepts raw {@code Object} argument, because {@link Map} interface allows
* to check presence of raw value, e. g. {@link #containsValue(Object)} without {@link
* ClassCastException}. If you want to subclass parameterized {@code SmoothieMap}, you should
* cast the arguments to the value parameter class yourself, e. g.:
* class IpToDomainMap extends SmoothieMap<Integer, String> {
*
* @Override
* protected boolean valuesEqual(Object d1, String d2) {
* return ((String) d1).equalsIgnoreCase(d2);
* }
* }
*
* Default implementation is {@code queriedValue.equals(internalValue)}.
*
* @param queriedValue the first value to compare, that is passed to queries like {@link
* #containsValue(Object)}
* @param internalValue the second value to compare, this value is already stored in the map
* @return {@code true} if the given values should be considered equal for this map
*/
boolean valuesEqual(Object queriedValue, V internalValue) {
return queriedValue.equals(internalValue);
}
@Override
public Equivalence valueEquivalence() {
return Equivalence.defaultEquality();
}
/**
* To be used in {@link #hashCode()} and {@link EntrySet#hashCode()}.
* @see #keyHashCodeForAggregateHashCodes
*/
int valueHashCodeForAggregateHashCodes(V value) {
return value.hashCode();
}
private int getInitialSegmentAllocCapacity(int segmentOrder) {
if (allocateIntermediateSegments) {
return SEGMENT_INTERMEDIATE_ALLOC_CAPACITY;
} else {
return SEGMENT_MAX_ALLOC_CAPACITY;
}
}
//region segmentOrderStats-related methods
//endregion
//region hashTableHalfPopulationStats-related methods
//endregion
private Object[] initSegmentsArray(
SegmentsArrayLengthAndNumSegments segmentsArrayLengthAndNumSegments) {
Object[] segmentsArray = new Object[segmentsArrayLengthAndNumSegments.segmentsArrayLength];
int numCreatedSegments = segmentsArrayLengthAndNumSegments.numSegments;
int segmentsOrder = order(numCreatedSegments);
int segmentAllocCapacity = getInitialSegmentAllocCapacity(segmentsOrder);
for (int i = 0; i < numCreatedSegments; i++) {
segmentsArray[i] = createNewSegment(segmentAllocCapacity, segmentsOrder);
}
duplicateArray(segmentsArray, segmentsArray.length, numCreatedSegments);
this.segmentsArray = segmentsArray;
int[] isFullCapacityBitSet = IsFullCapacitySegmentBitSet.allocate(segmentsArray.length);
if (segmentAllocCapacity == SEGMENT_MAX_ALLOC_CAPACITY) {
IsFullCapacitySegmentBitSet.setAll(isFullCapacityBitSet);
}
this.isFullCapacitySegmentBitSet = isFullCapacityBitSet;
return segmentsArray;
}
private void updateSegmentLookupMask(int segmentsArrayLength) {
verifyIsPowerOfTwo(segmentsArrayLength, "segments array length");
segmentLookupMask = ((long) segmentsArrayLength - 1) << HASH__SEGMENT_LOOKUP_SHIFT;
}
/**
* Returns a non-negative number if upon returning from this method {@link #segmentsArray} has
* sufficient capacity to hold segments of order equal to priorSegmentOrder + 1, or a negative
* number if the maximum capacity is reached (in other words, if priorSegmentOrder is equal to
* {@link #MAX_SEGMENTS_ARRAY_ORDER}.
*
* If returns a non-negative number, it's 0 or 1 depending on if modCount increment has
* happened inside during the method call.
*
* Negative integer return contract: this method documents to return a negative value rather
* than exactly -1 to enforce clients to use comparision with zero (< 0, or >= 0) rather than
* exact comparision with -1 (== -1, != -1) because the former requires less machine
* instructions (see jz/je/jne).
*/
@AmortizedPerSegment
private int tryEnsureSegmentsArrayCapacityForSplit(int priorSegmentOrder) {
// Computing the current segmentsArray length from segmentLookupMask (a volatile variable)
// to ensure that if this method returns early in [The current capacity is sufficient]
// branch below, later reads of segmentsArray will observe a segmentsArray of at least the
// ensured capacity. This is not strictly required to avoid memory corruption because in
// replaceInSegmentsArray() (the only method where writes to segmentsArray happen after
// calling to tryEnsureSegmentsArrayCapacityForSplit()) the length of the array is used as
// the loop bound anyway, but provides a little more confidence. Also reading the length of
// segmentsArray directly is not guaranteed to be faster than computing it from
// segmentLookupMask because the former incurs an extra data dependency. While reading
// segmentsArray field just once and passing it into both
// tryEnsureSegmentsArrayCapacityForSplit() and (through a chain of methods) to
// replaceInSegmentsArray() is possible, it means adding a parameter to a number of methods
// that is cumbersome (for a method annotated @AmortizedPerSegment, i. e. shouldn't be
// optimized _that_ hard) and has it's cost too, which is not guaranteed to be lower than
// the cost of reading from the segmentsArray field twice.
long visibleSegmentsArrayLength =
(this.segmentLookupMask >>> HASH__SEGMENT_LOOKUP_SHIFT) + 1;
// Needs to be a long, because if priorSegmentOrder = MAX_SEGMENTS_ARRAY_LENGTH == 30,
// requiredSegmentsArrayLength = 2^31 will overflow as an int.
long requiredSegmentsArrayLength = 1L << (priorSegmentOrder + 1);
if (visibleSegmentsArrayLength >= requiredSegmentsArrayLength) { // [Positive likely branch]
// The current capacity is sufficient:
return 0; // Didn't increment modCount in the course of this method call.
} else {
// [Positive likely branch]
if (requiredSegmentsArrayLength <= MAX_SEGMENTS_ARRAY_LENGTH) {
// Code in a rarely taken branch is extracted as a method, see
// [Reducing bytecode size of a hot method]
// TODO check whether this is actually a good trick
growSegmentsArray((int) requiredSegmentsArrayLength);
return 1; // Incremented modCount in growSegmentsArray().
} else {
// Not succeeded to ensure capacity because MAX_SEGMENTS_ARRAY_LENGTH capacity is
// reached.
return -1;
}
}
}
@AmortizedPerOrder
private void growSegmentsArray(int requiredSegmentsArrayLength) {
// Protecting growSegmentsArray() with
// beginSegmentStructureModification..endSegmentStructureModification not only to detect
// concurrent modifications but also because it's very hard to prove that no race between
// concurrent tryEnsureSegmentsArrayCapacityForSplit() calls (which may lead to accessing
// segmentsArray's elements beyond the array's length at [Avoid normal array access] in
// segmentBySegmentLookupBits()) is possible. For this reason and, additionally, since this
// method is AmortizedPerOrder (hence isn't performance-critical) the protection is always
// on rather than only when Interleaved segments with Supported intermediateSegments are
// used.
int lockedStamp = beginSegmentStructureModification();
try {
Object[] oldSegments = getNonNullSegmentsArrayOrThrowCme();
// Check the length again, after an equivalent check in
// tryEnsureSegmentsArrayCapacityForSplit(). Sort of double-checked locking.
if (oldSegments.length < requiredSegmentsArrayLength) {
modCount++;
// [Unimportant order of isFullCapacitySegmentBitSet and segmentsArray updates].
// Here, isFullCapacitySegmentBitSet is updated before segmentsArray only for
// consistency with replaceInSegmentsArray().
int[] oldIsFullCapacitySegmentBitSet = (int[]) this.isFullCapacitySegmentBitSet;
this.isFullCapacitySegmentBitSet = IsFullCapacitySegmentBitSet.duplicate(
oldIsFullCapacitySegmentBitSet, oldSegments.length,
requiredSegmentsArrayLength);
Object[] newSegments = Arrays.copyOf(oldSegments, requiredSegmentsArrayLength);
duplicateArray(newSegments, newSegments.length, oldSegments.length);
// Ensures that no thread can see nulls as segmentsArray's elements. This fence is
// not needed when Interleaved segments with Supported intermediateSegments are used
// because [Reading consistent segment and isFullCapacitySegment values] guarantees
// against reading inconsistent values from segmentsArray (such as nulls) already.
this.segmentsArray = newSegments;
// It's critical to update segmentLookupMask after assigning the new segments array
// into segmentsArray field and the new isFullCapacitySegment bit set into
// isFullCapacitySegmentBitSet field (see the code right above) to provide a
// happens-before (segmentLookupMask is volatile) with the code in
// segmentBySegmentLookupBits() and isFullCapacitySegment() via segmentLookupBits()
// (both segmentBySegmentLookupBits() and isFullCapacitySegment() accept
// hash_segmentLookupBits as a parameter that should be computed in
// segmentLookupBits()) and thus guarantee impossibility of an illegal out of bounds
// access to an array.
updateSegmentLookupMask(newSegments.length);
} else {
throw new ConcurrentModificationException();
}
}
finally {
endSegmentStructureModification(lockedStamp);
}
}
@HotPath
private long segmentLookupBits(long hash) {
return hash & segmentLookupMask;
}
@HotPath
private Object segmentBySegmentLookupBits(long hash_segmentLookupBits) {
long segmentArrayOffset = hash_segmentLookupBits >>> HASH__SEGMENT_ARRAY_OFFSET_SHIFT;
@Nullable Object segmentsArray = this.segmentsArray;
// Avoid normal array access: normal array access incurs
// - an extra data dependency between reading the array reference and a reference to a
// specific Segment;
// - the array header cache line reading and refreshing it in L1;
// - bound checks;
// - a class check that `segmentsArray` is indeed an array of objects.
return U.getObject(segmentsArray,
ARRAY_OBJECT_BASE_OFFSET_AS_LONG + segmentArrayOffset);
}
@HotPath
private int isFullCapacitySegment(long hash_segmentLookupBits) {
return IsFullCapacitySegmentBitSet.getValue(isFullCapacitySegmentBitSet,
hash_segmentLookupBits >>> HASH__SEGMENT_LOOKUP_SHIFT);
}
@AmortizedPerSegment
private int isFullCapacitySegmentByIndex(int segmentIndex) {
return IsFullCapacitySegmentBitSet.getValue(
isFullCapacitySegmentBitSet, (long) segmentIndex);
}
/** @deprecated in order not to forget to remove calls from production code */
@Deprecated
final int debugSegmentsArrayLength() {
//noinspection ConstantConditions: suppress nullability warnings during debug
return ((Object[]) segmentsArray).length;
}
/** @deprecated in order not to forget to remove calls from production code */
@Deprecated
final Segment debugSegmentByIndex(int segmentIndex) {
//noinspection unchecked,ConstantConditions: suppress nullability warnings during debug
return (Segment) ((Object[]) segmentsArray)[segmentIndex];
}
/** This method should be called only during bulk operations and from iterators. */
private static Segment segmentCheckedByIndex(
@Nullable Object[] segmentsArray, int segmentIndex) {
// [Not avoiding normal array access]
@Nullable Object segment = segmentsArray[segmentIndex];
if (segment == null) {
throw new ConcurrentModificationException();
}
//noinspection unchecked
return (Segment) segment;
}
static int firstSegmentIndexByHashAndOrder(long hash, int segmentOrder) {
// In native implementation, BEXTR instruction (see en.wikipedia.org/wiki/
// Bit_Manipulation_Instruction_Sets#BMI1_(Bit_Manipulation_Instruction_Set_1)) can be used.
return ((int) (hash >>> HASH__SEGMENT_LOOKUP_SHIFT)) & ((1 << segmentOrder) - 1);
}
private static int firstSegmentIndexByIndexAndOrder(@NonNegative int segmentIndex,
@IntRange(from = 0, to = MAX_SEGMENTS_ARRAY_ORDER) int segmentOrder) {
return segmentIndex & ((1 << segmentOrder) - 1);
}
private static int siblingSegmentIndex(int segmentIndex, int segmentOrder) {
return segmentIndex ^ (1 << (segmentOrder - 1));
}
/**
* Specifically to be called from {@link #split}.
* @param firstSiblingsSegmentIndex the first (smallest) index in {@link #segmentsArray} where
* where yet unsplit segment (with the order equal to newSegmentOrder - 1) is stored.
* @param newSegmentOrder the order of the two new sibling segments
* @param chooseLower should be equal to 1 if this method should return the first index for the
* lower segment among two siblings, value 0 means that the first index for the higher
* segment among two siblings should be returned.
* @return the first index for the lower or the higher segment among the two new siblings, as
* chosen.
*/
private static int chooseFirstSiblingSegmentIndex(
int firstSiblingsSegmentIndex, int newSegmentOrder, int chooseLower) {
int n = newSegmentOrder - 1;
// Using the last algorithm from this answer: https://stackoverflow.com/a/47990 because it's
// simpler than the first algorithm, but the data dependency chain is equally long in our
// case because we have to convert chooseLower to x by `1 - chooseLower` operation.
int x = 1 - chooseLower;
//noinspection UnnecessaryLocalVariable - using the same variable name as in the source
int number = firstSiblingsSegmentIndex;
return (number & ~(1 << n)) | (x << n);
}
/**
* @param firstReplacedSegmentIndex the first (smallest) index in {@link #segmentsArray} where
* the replacement segment should be stored instead of the replaced segment.
* @param replacementSegmentOrder the order of the replacement segment. Must be equal to or
* greater than the order of the replaced segment.
*/
@AmortizedPerSegment
private void replaceInSegmentsArray(Object[] segmentsArray,
int firstReplacedSegmentIndex, int replacementSegmentOrder, Object replacementSegment
, boolean replacedSegment_isFullCapacity) {
modCount++;
int step = 1 << replacementSegmentOrder;
// beginSegmentStructureModification..endSegmentStructureModification are fairly expensive
// (CAS operations and barriers), so avoiding them whenever only segmentsArray, but not
// isFullCapacitySegmentBitSet should be updated, including when Interleaved segments with
// Supported intermediateSegments are used. In particular, we avoid doing
// beginSegmentStructureModification..endSegmentStructureModification when intermediate
// segments are supported but allocateIntermediateSegments is false for a SmoothieMap in
// which case isFullCapacitySegmentBitSet is never updated in replaceInSegmentsArray().
//
// lockedStamp = 0 is not a valid "locked stamp" value (see
// isLockedSegmentStructureModStamp()), so it can be used as the default value to check
// against in the finally block below.
int lockedStamp = 0;
try {
// Updating isFullCapacitySegmentBitSet if needed.
// Unimportant order of isFullCapacitySegmentBitSet and segmentsArray updates: the order
// in which isFullCapacitySegmentBitSet and segmentsArray are updated isn't important
// since they are both protected with
// beginSegmentStructureModification..endSegmentStructureModification and both reads
// are verified while [Reading consistent segment and isFullCapacitySegment values].
// In replaceInSegmentsArray() isFullCapacitySegmentBitSet is updated first because it
// is more convenient: a single `if (isFullCapacitySegmentValue_needFlip) {}` branch is
// needed.
int[] isFullCapacitySegmentBitSet = (int[]) this.isFullCapacitySegmentBitSet;
if (isFullCapacitySegmentBitSet.length !=
bitSetArrayLengthFromSegmentsArrayLength(segmentsArray.length)) {
// There should be growSegmentsArray() happening concurrently.
throwGenericCme();
}
// There is no specific reason why `instanceof FullCapacitySegment` is used rather than
// `BitSetAndState.isFullCapacity(getBitSetAndState(replacementSegment))` here. Both
// ways should work because replacementSegment's bitSetAndState is expected to be
// properly initialized already.
boolean replacementSegment_isFullCapacity =
replacementSegment instanceof FullCapacitySegment;
boolean isFullCapacitySegmentValue_needFlip =
replacedSegment_isFullCapacity ^ replacementSegment_isFullCapacity;
if (isFullCapacitySegmentValue_needFlip) {
lockedStamp = beginSegmentStructureModification();
for (int segmentIndex = firstReplacedSegmentIndex;
segmentIndex < segmentsArray.length;
segmentIndex += step) {
// Cannot just flip a bit in isFullCapacitySegmentBitSet which would not require
// `value` variable and would be computationally simpler than setValue() because
// due to potential concurrent modifications the replaced segment (and changed
// values in isFullCapacitySegmentBitSet) may not correspond to
// replacedSegment_isFullCapacity by the time
// beginSegmentStructureModification() is called. So flipping bits may result in
// inconsistency between segmentsArray and isFullCapacitySegmentBitSet.
int value = replacementSegment_isFullCapacity ? 1 : 0;
IsFullCapacitySegmentBitSet.setValue(
isFullCapacitySegmentBitSet, segmentIndex, value);
}
}
// Updating segmentsArray.
for (int segmentIndex = firstReplacedSegmentIndex; segmentIndex < segmentsArray.length;
segmentIndex += step) {
// Not avoiding normal array access: couldn't [Avoid normal array access] because
// unless segmentsArray (and isFullCapacitySegmentBitSet) is read just once across
// all paths in put(), remove() etc. methods and passed all the way down as local
// parameter to replaceInSegmentsArray() (which is called in splitAndInsert(),
// tryShrink3(), and other methods) which is likely not practical because it
// contributes to bytecode size and machine operations on the hot paths of methods
// like put() and remove() then a memory corrupting race is possible because
// segmentsArray is not volatile and the second read of this field (e. g. the one
// performed in getNonNullSegmentsArrayOrThrowCme() to obtain an array version to be
// passed into this method) might see an _earlier_ version of the array with smaller
// length. See
// https://shipilev.net/blog/2016/close-encounters-of-jmm-kind/#wishful-hb-actual
// explaining how that is possible.
//
// When Interleaved segments with Supported intermediateSegments are used the
// condition explained above is not possible because there is a loadLoad fence
// imposed in validateSegmentStructureModStamp() between the reads of segmentsArray
// variable, however, we don't avoid normal array access in this case either for
// simplicity, consistency between of the cases and a little extra "backup"
// confidence in the safety of this code, also considering that this is an
// @AmortizedPerSegment method.
segmentsArray[segmentIndex] = replacementSegment;
}
}
finally {
if (lockedStamp != 0) {
endSegmentStructureModification(lockedStamp);
}
}
}
/**
* Should be called in point access and segment transformation methods, except in {@link
* #segmentBySegmentLookupBits} (see the comment for {@link #throwIseSegmentsArrayNull}).
*/
private Object[] getNonNullSegmentsArrayOrThrowCme() {
Object @Nullable [] segmentsArray = (Object @Nullable []) this.segmentsArray;
return segmentsArray;
}
/**
* Should be called in the beginning of bulk iteration methods.
*/
private Object[] getNonNullSegmentsArrayOrThrowIse() {
Object @Nullable [] segmentsArray = (Object @Nullable []) this.segmentsArray;
return segmentsArray;
}
/**
* Reducing bytecode size of a hot method: extracting exception construction and throwing as
* a method in order to reduce the bytecode size of a hot method ({@link
* #segmentBySegmentLookupBits} here), ultimately making SmoothieMap friendlier for inlining,
* because inlining thresholds and limits are defined in terms of the numbers of bytecodes in
* Hotspot JVM.
*
* When {@link #segmentsArray} is found to be null, this method should be called only from
* {@link #segmentBySegmentLookupBits} (among point access and segment transformation methods)
* because {@link #segmentBySegmentLookupBits} is first called on all map query paths, so that
* if a SmoothieMap is mistakenly accessed after calling {@link #moveToMapWithShrunkArray()} an
* IllegalStateException is thrown. In other methods {@link #getNonNullSegmentsArrayOrThrowCme()
* } should be called instead to throw a ConcurrentModificationException because {@link
* #segmentsArray} might be found to be null in other point access and segment transformation
* methods only if the map is accessed concurrently.
*/
@Contract(" -> fail")
private static void throwIseSegmentsArrayNull() {
throw new IllegalStateException(
"Old map object shouldn't be accessed after explicit shrinking"
);
}
/** [Reducing bytecode size of a hot method] */
@Contract(" -> fail")
private static void throwCmeSegmentsArrayNull() {
throw new ConcurrentModificationException(
"Explicit shrinking is done concurrently with other some " +
"modification operations on a map"
);
}
/** [Reducing bytecode size of a hot method] */
@Contract(" -> fail")
private static void throwGenericCme() {
throw new ConcurrentModificationException("Concurrent map update is in progress");
}
final void incrementSize() {
modCount++;
size++;
}
final void decrementSize() {
modCount++;
size--;
}
/**
* Makes at least Opaque-level read to allow making modCount checks more robust in the face of
* operation reorderings performed by the JVM.
*/
final int getModCountOpaque() {
// It should be VarHandle's Opaque mode. In the absence of that in sun.misc.Unsafe API,
// using volatile, that costs almost as little on x86.
return U.getIntVolatile(this, MOD_COUNT_FIELD_OFFSET);
}
final void checkModCountOrThrowCme(int expectedModCount) {
// Intentionally makes Opaque read of modCount rather than plain read (that is, accessing
// modCount field directly) to make the modCount check more robust in the face of operation
// reorderings performed by the JVM.
int actualModCount = getModCountOpaque();
if (expectedModCount != actualModCount) {
throw new ConcurrentModificationException();
}
}
private static boolean isLockedSegmentStructureModStamp(int stamp) {
return stamp < 0;
}
private int beginSegmentStructureModification() {
int stamp = segmentStructureModStamp;
int lockedStamp = Integer.MIN_VALUE | stamp;
if (isLockedSegmentStructureModStamp(stamp) ||
!SEGMENT_STRUCTURE_MODIFICATION_STAMP_UPDATER.compareAndSet(
this, stamp, lockedStamp)) {
throwGenericCme();
}
// CAS above has the memory semantics of volatile read + volatile write (see
// VarHandle.compareAndSet() specification, referred from the Javadocs for
// AtomicIntegerFieldUpdater). Volatile read is an equivalent of a read + an acquire fence
// after the read, which is a LoadLoad + a LoadStore fence (see VarHandle.acquireFence()).
// Adding StoreStore fence here adds up to a _release_ fence (see VarHandle.releaseFence())
// between beginSegmentStructureModification() and the subsequent modifications to segment
// structure (segmentsArray or isFullCapacitySegmentBitSet) in growSegmentsArray() or
// replaceInSegmentsArray(). This means that if a partially-updated segment structure state
// is observed on the read path TODO insert link to read path
// than validateSegmentStructureModStamp() must observe the locked stamp in
// segmentStructureModStamp field in validateSegmentStructureModStamp() method.
// See also the "Algorithmic notes" in j.u.c.l.StampedLock source code in OpenJDK 9.
storeStoreFence();
return lockedStamp;
}
private void endSegmentStructureModification(int lockedStamp) {
verifyThat(isLockedSegmentStructureModStamp(lockedStamp));
segmentStructureModStamp = (lockedStamp + 1) & Integer.MAX_VALUE;
}
private int acquireSegmentStructureModStamp() {
int stamp = segmentStructureModStamp;
if (isLockedSegmentStructureModStamp(stamp)) {
throwGenericCme();
}
return stamp;
}
private void validateSegmentStructureModStamp(int stamp) {
// Using acquireFence() following j.u.c.l.StampedLock.validate() code in OpenJDK 9 although
// it's not clear why loadLoadFence() wouldn't suffice.
acquireFence();
if (stamp != segmentStructureModStamp) {
throwGenericCme();
}
}
private InflatedSegmentQueryContext getInflatedSegmentQueryContext() {
@MonotonicNonNull InflatedSegmentQueryContext context = inflatedSegmentQueryContext;
if (context == null) {
context = new InflatedSegmentQueryContext<>(this);
inflatedSegmentQueryContext = context;
}
return context;
}
/** @see #averageSegmentOrder_lastComputed */
@AmortizedPerSegment
final int computeAverageSegmentOrder(long size) {
int averageSegmentOrder_prevComputed = (int) averageSegmentOrder_lastComputed;
int averageSegmentOrder = doComputeAverageSegmentOrder(size);
// Guarding unlikely write: it's unlikely that averageSegmentOrder_lastComputed actually
// needs to be updated. Guarding the write (which is done in updateAverageSegmentOrder())
// should be preferable when a GC algorithm with expensive write barriers is used.
// [Positive likely branch]
if (averageSegmentOrder == averageSegmentOrder_prevComputed) {
return averageSegmentOrder;
} else {
// [Rarely taken branch is extracted as a method]
updateAverageSegmentOrder(averageSegmentOrder_prevComputed, averageSegmentOrder);
return averageSegmentOrder;
}
}
@AmortizedPerOrder
private void updateAverageSegmentOrder(
int averageSegmentOrder_prevComputed, int newAverageSegmentOrder) {
averageSegmentOrder_lastComputed = (byte) newAverageSegmentOrder;
}
@Override
public final int size() {
return (int) min(size, Integer.MAX_VALUE);
}
@Override
public final long sizeAsLong() {
return size;
}
@Override
public final boolean isEmpty() {
return size == 0;
}
//region Map API point access methods
@Override
public final boolean containsKey(Object key) {
// TODO specialize to avoid access into value's memory: see [Protecting null comparisons].
return getInternalKey(key) != null;
}
@Override
public final boolean containsEntry(Object key, Object value) {
checkNonNull(value);
@Nullable V internalVal = get(key);
//noinspection ObjectEquality: identity comparision is intended
boolean valuesIdentical = internalVal == value;
// Avoiding `internalVal != null` check before valuesIdentical check for the same reason as
// [Protecting null comparisons].
return valuesIdentical || (internalVal != null && valuesEqual(value, internalVal));
}
@Override
public final V getOrDefault(Object key, V defaultValue) {
@Nullable V internalVal = get(key);
// TODO specialize or implement get as getOrDefault(null) to avoid access into value's
// memory: see [Protecting null comparisons].
return internalVal != null ? internalVal : defaultValue;
}
@CanIgnoreReturnValue
@Override
public final @Nullable V remove(Object key) {
checkNonNull(key);
long hash = keyHashCode(key);
long hash_segmentLookupBits = segmentLookupBits(hash);
// Reading consistent segment and isFullCapacitySegment values: they reside in different
// arrays, so to read consistent values the reads are confined between a stamp acquisition
// and validation, a-la j.u.c.l.StampedLock idiom.
// TODO add `Flag intermediateSegments` JPSG generation dimension value and access
// isFullCapacitySegmentBitSet and segmentStructureModStamp conditionally.
// See the doc comment for isFullCapacitySegmentBitSet for more details.
int segmentStructureModStamp = acquireSegmentStructureModStamp();
Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
return removeImpl(segment,
isFullCapacitySegment,
key, hash, null);
}
@Override
public final boolean remove(Object key, Object value) {
checkNonNull(key);
checkNonNull(value);
long hash = keyHashCode(key);
long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
int segmentStructureModStamp = acquireSegmentStructureModStamp();
Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
// TODO `== value` may be better than `!= null` (if the method also returns the
// corresponding object) for the same reason as [Protecting null comparisons]. Or, the
// method should be specialized. However, `== value` may not be possible due to the current
// contract of removeImpl(): see InflatedSegmentQueryContext.removeOrReplaceEntry().
return removeImpl(segment,
isFullCapacitySegment,
key, hash, value) != null;
}
@Override
public final V replace(K key, V value) {
checkNonNull(key);
checkNonNull(value);
long hash = keyHashCode(key);
long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
int segmentStructureModStamp = acquireSegmentStructureModStamp();
Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
return replaceImpl(segment,
isFullCapacitySegment,
key, hash, null, value);
}
@Override
public final boolean replace(K key, V oldValue, V newValue) {
checkNonNull(key);
checkNonNull(oldValue);
checkNonNull(newValue);
long hash = keyHashCode(key);
long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
int segmentStructureModStamp = acquireSegmentStructureModStamp();
Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
// TODO `== oldValue` may be better than `!= null` (if the method also returns the
// corresponding object) for the same reason as [Protecting null comparisons]. Or, the
// method should be specialized. However, `== oldValue` may not be possible due to the
// current contract of replaceImpl(): see
// InflatedSegmentQueryContext.removeOrReplaceEntry().
return replaceImpl(segment,
isFullCapacitySegment,
key, hash, oldValue, newValue) != null;
}
@CanIgnoreReturnValue
@Override
public final @Nullable V put(K key, V value) {
checkNonNull(key);
checkNonNull(value);
long hash = keyHashCode(key);
long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
int segmentStructureModStamp = acquireSegmentStructureModStamp();
Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
return putImpl(segment,
isFullCapacitySegment,
key, hash, value, false /* onlyIfAbsent */);
}
@Override
public final @Nullable V putIfAbsent(K key, V value) {
checkNonNull(key);
checkNonNull(value);
long hash = keyHashCode(key);
return internalPutIfAbsent(key, hash, value);
}
@HotPath
private @Nullable V internalPutIfAbsent(K key, long hash, V value) {
final long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
int segmentStructureModStamp = acquireSegmentStructureModStamp();
Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
return putImpl(segment,
isFullCapacitySegment,
key, hash, value, true /* onlyIfAbsent */);
}
//endregion
//region Implementations of point access methods
@Override
public final @Nullable V get(Object key) {
checkNonNull(key);
final long hash = keyHashCode(key);
final long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
final int segmentStructureModStamp = acquireSegmentStructureModStamp();
final Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
final int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ;) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
long dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
// bitMask loop:
// TODO compare with int-indexed loop with bitCount(bitMask) limit
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
long allocOffset = allocOffset(firstAllocIndex(dataGroup, bitMask)
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
return readValueAtOffset(segment, allocOffset);
}
}
// Likelihood of this branch depends on SmoothieMap's use case.
// TODO provide separate getLikelyPresent() and getLikelyAbsent() methods
if (shouldStopProbing(dataGroup)) {
return null;
}
// InflatedSegment checking after unsuccessful key search: the key search above was
// destined to be unsuccessful in an inflated segment, but we don't check whether the
// segment is inflated or not in the beginning to declutter the hot path as much as
// possible. This is enabled by the fact that InflatedSegment inherits Segment and thus
// has a hash table. See also the Javadoc for InflatedSegment.
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) { // [Positive likely branch]
// Quadratic probing:
// The alternative to quadratic probing is double hashing: we can extract two bits
// out of the key's hash and use (these_two_bits << 1) + 1 as the step in the
// probing chain, like it is done in F14: https://github.com/facebook/folly/blob/
// e988905d/folly/container/detail/F14Table.h#L1337-L1345.
// Advantages of double hashing:
// - Less clustering (TODO evaluate)
// - No additional operations with the step on each probing loop iteration (like
// the following `groupIndexStep += 1` with quadratic probing).
// Disadvantages:
// - A relatively expensive computation of the step value. This computation is
// likely to not be needed at all because the probing will likely finish at the
// first group. CPU probably won't be able to smear this computation in its
// pipeline during the first iteration because of the high register pressure the
// computed step value is pushed to the stack. There are two ways to avoid making
// this expensive computation on the first iteration of the probing loop:
// 1) Unroll the first iteration of the probing loop. But it would increase the
// size and the complexity of the methods (both in computational terms, i. e.
// icache trashing, quality of compilation into machine code by JVM, branch
// predictability) considerably. This doesn't seem like a good deal.
// 2) Recompute the step from hash value on each iteration of the probing loop.
// This approach is potentially viable since the chance that the probing chain
// will be longer than two groups (when the repetitive step computations will
// start to make the difference) is very low. TODO evaluate this approach
// - A simplified, no-action approach to handling missing opportunities of shifting
// back unnecessarily overflown entries during doSplit() is not possible with
// double hashing because that approach relies on the quadratic probing scheme: see
// [fromSegment iteration].
//
// The maximum load factor in SmoothieMap is lower than in F14
// (SEGMENT_MAX_ALLOC_CAPACITY / HASH_TABLE_SLOTS = 0.75 vs. 12/14 ~= 0.86). Also,
// thanks to the "smoothness" of SmoothieMap there are no sizes when the whole map
// is highly populated (but rather only a proportion of segments). These factors
// are in favor of quadratic probing, although it's still not a clear win and proper
// benchmarking and precise evaluation should be done. TODO compare the approaches.
//
// Double hashing can also allow measuring outbound overflow counts corresponding to
// steps 1 and 3 counts and 5 and 7 in two 4-bit registers instead of measuring the
// total outbound overflow count for a group in a single 8-bit register (see
// ContinuousSegment_BitSetAndStateArea.outboundOverflowCountsPerGroup) which would
// allow more granular breaking from the probing loop. TODO work out this idea
groupIndexStep += 1;
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// Break from the loop when visited all groups in the hash table: this may happen
// when every group in the hash table has outbound overflow count greater than 1,
// yet many of the groups are empty enough (after removals from the segment) so that
// the total number of them is less than SEGMENT_MAX_ALLOC_CAPACITY. This may
// eventually happen in a segment after a period of entry insertions and removals
// since no kind of "shift deletion" is performed upon removals. See also
// https://github.com/facebook/folly/blob/e988905d/
// folly/container/detail/F14Table.h#L1399-L1402.
// TODO remove this condition in a specialized version of a Map that doesn't support
// removes
return null;
}
} else {
return getInflated(segment, key, hash);
}
}
}
final int countCollisionKeyComparisons(Object segment, Object key, long hash) {
verifyNonNull(segment);
verifyNonNull(key);
// Can't use branchless isFullCapacitySegment(segmentLookupBits(hash)) here because segment
// object is already given and there is no segmentStructureModStamp to validate with, as in
// [Reading consistent segment and isFullCapacitySegment values].
final int isFullCapacitySegment = segment instanceof FullCapacitySegment ? 1 : 0;
int numCollisionKeyComparisons = 0;
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ;) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
long dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
// [bitMask loop]
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
long allocOffset = allocOffset(firstAllocIndex(dataGroup, bitMask)
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
return numCollisionKeyComparisons;
} else {
numCollisionKeyComparisons++;
}
}
if (shouldStopProbing(dataGroup)) {
throw new IllegalStateException("Expected the key to be in the map");
}
// [InflatedSegment checking after unsuccessful key search]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) { // [Positive likely branch]
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// [Break from the loop when visited all groups in the hash table]
throw new IllegalStateException("Expected the key to be in the map");
}
} else {
throw new IllegalStateException("Expected an ordinary segment");
}
}
}
final void aggregateKeySearchStats(Object key, KeySearchStats keySearchStats) {
checkNonNull(key);
final long hash = keyHashCode(key);
final long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
final int segmentStructureModStamp = acquireSegmentStructureModStamp();
final Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
final int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
int collisionChainGroupLength = 0;
int numCollisionKeyComparisons = 0;
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
keySearch:
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ;) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
long dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
// bitMask loop:
// TODO compare with int-indexed loop with bitCount(bitMask) limit
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
long allocOffset = allocOffset(firstAllocIndex(dataGroup, bitMask)
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
break keySearch;
} else {
numCollisionKeyComparisons++;
}
}
if (shouldStopProbing(dataGroup)) {
break keySearch;
}
// [InflatedSegment checking after unsuccessful key search]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) { // [Positive likely branch]
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
collisionChainGroupLength++;
} else {
// [Break from the loop when visited all groups in the hash table]
break keySearch;
}
} else {
// Not aggregating any stats of searches in inflated segments.
return;
}
}
keySearchStats.aggregate(collisionChainGroupLength, numCollisionKeyComparisons);
}
/**
* Shallow xxxInflated() methods: this method (and other xxxInflated() methods) just casts the
* segment object to {@link InflatedSegment} and delegates to it's method. It's done to reduce
* the bytecode size of {@link #get(Object)} as much as possible, see
* [Reducing bytecode size of a hot method].
*/
private @Nullable V getInflated(Object segment, Object key, long hash) {
@SuppressWarnings("unchecked")
InflatedSegment inflatedSegment = (InflatedSegment) segment;
return inflatedSegment.get(this, key, hash);
}
@Override
public final @Nullable K getInternalKey(Object key) {
checkNonNull(key);
final long hash = keyHashCode(key);
final long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
final int segmentStructureModStamp = acquireSegmentStructureModStamp();
final Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
final int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ;) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
long dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
long allocOffset = allocOffset(firstAllocIndex(dataGroup, bitMask)
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
return internalKey;
}
}
// Likelihood of this branch depends on SmoothieMap's use case.
if (shouldStopProbing(dataGroup)) {
return null;
}
// [InflatedSegment checking after unsuccessful key search]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) { // [Positive likely branch]
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// [Break from the loop when visited all groups in the hash table]
return null;
}
} else {
return getInternalKeyInflated(segment, key, hash);
}
}
}
/** [Shallow xxxInflated() methods] */
private @Nullable K getInternalKeyInflated(Object segment, Object key, long hash) {
@SuppressWarnings("unchecked")
InflatedSegment inflatedSegment = (InflatedSegment) segment;
return inflatedSegment.getInternalKey(this, key, hash);
}
@Override
public final @Nullable V computeIfPresent(
K key, BiFunction super K, ? super V, ? extends @Nullable V> remappingFunction) {
checkNonNull(key);
checkNonNull(remappingFunction);
final long hash = keyHashCode(key);
final long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
final int segmentStructureModStamp = acquireSegmentStructureModStamp();
final Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
final int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ;) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
long dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
// Inlined lowestMatchingSlotIndex: computing the number of trailing zeros directly
// and then calling lowestMatchingSlotIndexFromTrailingZeros() is inlined
// lowestMatchingSlotIndex(). It's inlined because the trailingZeros value is also
// needed for extractAllocIndex().
int trailingZeros = Long.numberOfTrailingZeros(bitMask);
long allocIndex = extractAllocIndex(dataGroup, trailingZeros);
long allocOffset = allocOffset(allocIndex
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
V internalVal = readValueAtOffset(segment, allocOffset);
@Nullable V newValue = remappingFunction.apply(key, internalVal);
if (newValue != null) {
writeValueAtOffset(segment, allocOffset, newValue);
} else {
// Computing outboundOverflowCount_perGroupDecrements in the end: not
// computing outboundOverflowCount_perGroupDecrements along with the key
// search loop as in remove() because it's considered generally unlikely
// that remappingFunction returns null, so avoiding premature computations
// which can turn out to be unnecessary. Since the majority of key search
// loops are expected to to have just one iteration, the main cost
// contribution of the remove()'s approach is establishing an extra variable
// outside of the loop which likely leads to a stack push because of very
// high register pressure in the method.
// TODO provide separate computeIfPresentLikelyRemove() method
long outboundOverflowCount_perGroupDecrements =
computeOutboundOverflowCount_perGroupChanges(
baseGroupIndex, groupIndex);
// [Reusing local variable]
dataGroup = setSlotEmpty(dataGroup, trailingZeros);
removeAtSlot(hash, segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupDecrements, dataGroupOffset,
dataGroup, allocIndex, allocOffset);
}
return newValue;
}
}
// Likelihood of this branch depends on SmoothieMap's use case.
if (shouldStopProbing(dataGroup)) {
return null;
}
// [InflatedSegment checking after unsuccessful key search]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) { // [Positive likely branch]
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// [Break from the loop when visited all groups in the hash table]
return null;
}
} else {
return computeIfPresentInflated(segment, key, hash, remappingFunction);
}
}
}
/** [Shallow xxxInflated() methods] */
private @Nullable V computeIfPresentInflated(Object segment,
K key, long hash,
BiFunction super K, ? super V, ? extends @Nullable V> remappingFunction) {
@SuppressWarnings("unchecked")
InflatedSegment inflatedSegment = (InflatedSegment) segment;
return inflatedSegment.computeIfPresent(this, key, hash, remappingFunction);
}
/**
* The common implementation for
* {@link #remove(Object)}: matchValue == null
* {@link #remove(Object, Object)}: matchValue != null
*
* @param matchValue if non-null the entry's value should be equal to matchValue for removal.
* @return a value removed in the map, or null if no change was made. If matchValue is null the
* returned value is the internal value removed in the map. If matchValue is non-null the
* returned value could be either the internal value _or_ the matchValue itself.
*/
private @Nullable V removeImpl(Object segment,
int isFullCapacitySegment,
Object key, long hash, @Nullable Object matchValue) {
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
// Compare with precomputed outboundOverflowCount_perGroupChanges approach:
// [Precomputed outboundOverflowCount_perGroupChanges] might turn out to be more effective
// so that [Computing outboundOverflowCount_perGroupDecrements in the end] should be used
// instead of computing outboundOverflowCount_perGroupDecrements along with the key search
// loop. TODO compare the approaches
long outboundOverflowCount_perGroupDecrements = 0;
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ;) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
long dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
// [Inlined lowestMatchingSlotIndex]
int trailingZeros = Long.numberOfTrailingZeros(bitMask);
long allocIndex = extractAllocIndex(dataGroup, trailingZeros);
long allocOffset = allocOffset(allocIndex
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
V internalVal = readValueAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean valuesIdentical = internalVal == matchValue;
// Avoiding `matchValue == null` check before valuesIdentical check for the same
// reason as [Protecting null comparisons].
if (valuesIdentical || matchValue == null ||
valuesEqual(matchValue, internalVal)) {
// [Reusing local variable]
dataGroup = setSlotEmpty(dataGroup, trailingZeros);
removeAtSlot(hash, segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupDecrements, dataGroupOffset,
dataGroup, allocIndex, allocOffset);
return internalVal;
} else {
return null;
}
}
}
// Likelihood of this branch depends on SmoothieMap's use case.
if (shouldStopProbing(dataGroup)) {
return null;
}
// [InflatedSegment checking after unsuccessful key search]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) { // [Positive likely branch]
outboundOverflowCount_perGroupDecrements = outboundOverflowCount_markGroupForChange(
outboundOverflowCount_perGroupDecrements, groupIndex);
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// [Break from the loop when visited all groups in the hash table]
return null;
}
} else {
return removeOrReplaceInflated(segment, key, hash, matchValue, null);
}
}
}
/**
* The difference of this method from {@link #removeImpl} is that {@link #removeAtSlotNoShrink}
* called inside the method instead of {@link #removeAtSlot}.
*
* @param allocIndexToRemove the alloc index of the entry being removed; should be matched
* instead of key and matchValue as in {@link #removeImpl}.
*/
private void removeDuringIterationFromOrdinarySegment(
Segment segment, long allocIndexToRemove) {
// Not storing isFullCapacitySegment during iteration: because of various race conditions
// possible from abusing Iterator (see the comment for checkAllocIndex()), it doesn't make
// sense to store isFullCapacitySegment in a field in Iterator object: it has to be
// re-checked against the local `segment` variable every time it is used, anyways.
final int isFullCapacitySegment = segment instanceof FullCapacitySegment ? 1 : 0;
final Object key = readKeyCheckedAtIndex(segment, allocIndexToRemove
, (long) isFullCapacitySegment);
final long hash = keyHashCode(key);
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
// TODO [Compare with precomputed outboundOverflowCount_perGroupChanges approach]
long outboundOverflowCount_perGroupDecrements = 0;
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ;) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
long dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
// [Inlined lowestMatchingSlotIndex]
int trailingZeros = Long.numberOfTrailingZeros(bitMask);
long allocIndex = extractAllocIndex(dataGroup, trailingZeros);
if (allocIndex == allocIndexToRemove) {
dataGroup = setSlotEmpty(dataGroup, trailingZeros); // [Reusing local variable]
// An alternative to reading bitSetAndState here is reading it in advance,
// outside of the loop to avoid a data dependency stall before the call to
// removeAtSlotNoShrink(): the loop doesn't have any normal outcome other than
// removing an entry, so this read of bitSetAndState must be always useful.
// However, because of high register pressure the value is likely to be
// immediately pushed to stack and then has to be read from the stack. And the
// cache line containing segment's bitSetAndState should likely be already in L1
// anyway because bitSetAndState is read in the beginning of an iteration over
// a segment.
long bitSetAndState = segment.bitSetAndState;
long allocOffset = allocOffset(allocIndex
, (long) isFullCapacitySegment);
// It's possible to implement shrinking during iteration, but it would be
// more complex than shrinking during ordinary removeAtSlot(), involving a
// procedure similar to compactEntriesDuringSegmentSwap().
// TODO implement shrinking during iteration
bitSetAndState = removeAtSlotNoShrink(bitSetAndState, segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupDecrements, dataGroupOffset, dataGroup,
allocIndex, allocOffset);
segment.bitSetAndState = bitSetAndState;
return;
}
}
if (shouldStopProbing(dataGroup)) {
break; // to throwing ConcurrentModificationException
}
outboundOverflowCount_perGroupDecrements = outboundOverflowCount_markGroupForChange(
outboundOverflowCount_perGroupDecrements, groupIndex);
groupIndexStep += 1; // [Quadratic probing]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
if (groupIndex == baseGroupIndex) {
break; // to throwing ConcurrentModificationException
}
}
// There is no hash table entry pointing to allocIndexToRemove with the same hash tag bits
// as for the provided hash. It means some concurrent modification should be happening to
// the segment.
throw new ConcurrentModificationException();
}
/**
* The common implementation for
* {@link #replace(Object, Object)}: matchValue == null
* {@link #replace(Object, Object, Object)}: matchValue != null
*
* @param matchValue if non-null the entry's value should be equal to matchValue for
* replacement.
* @return a value replaced in the map, or null if no change was made. If matchValue is null the
* returned value is the internal value replaced in the map. If matchValue is non-null the
* returned value could be either the internal value _or_ the matchValue itself.
*/
private @Nullable V replaceImpl(Object segment,
int isFullCapacitySegment,
Object key, long hash, @Nullable Object matchValue,
V replacementValue) {
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ;) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
long dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
long allocIndex = firstAllocIndex(dataGroup, bitMask);
long allocOffset = allocOffset(allocIndex
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
V internalVal = readValueAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean valuesIdentical = internalVal == matchValue;
// Avoiding `matchValue == null` check before valuesIdentical check for the same
// reason as [Protecting null comparisons].
if (valuesIdentical || matchValue == null ||
valuesEqual(matchValue, internalVal)) {
writeValueAtOffset(segment, allocOffset, replacementValue);
return internalVal;
} else {
return null;
}
}
}
// Likelihood of this branch depends on SmoothieMap's use case.
if (shouldStopProbing(dataGroup)) {
return null;
}
// [InflatedSegment checking after unsuccessful key search]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) { // [Positive likely branch]
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// [Break from the loop when visited all groups in the hash table]
return null;
}
} else {
return removeOrReplaceInflated(segment, key, hash, matchValue, replacementValue);
}
}
}
/**
* [Shallow xxxInflated() methods]
*
* If the given replacementValue is null the method removes an entry from the inflated segment,
* otherwise replaces the value corresponding to the key.
*
* @param matchValue if non-null the entry's value should be equal to matchValue for removal or
* replacement.
* @return a value removed or replaced in the map, or null if no change was made. If matchValue
* is null, the returned value is the internal value removed or replaced in the map. If
* matchValue is non-null, the returned value could be the internal value _or_ the matchValue
* itself.
*/
@SuppressWarnings("unchecked")
private @Nullable V removeOrReplaceInflated(Object segment, Object key, long hash,
@Nullable Object matchValue, @Nullable V replacementValue) {
InflatedSegment inflatedSegment = (InflatedSegment) segment;
return inflatedSegment.removeOrReplace(
this, (K) key, hash, (V) matchValue, replacementValue);
}
@HotPath
private @Nullable V putImpl(Object segment,
int isFullCapacitySegment,
K key, long hash, V value, boolean onlyIfAbsent) {
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
long groupIndex = baseGroupIndex;
long dataGroup;
long emptyBitMask;
// TODO [Compare with precomputed outboundOverflowCount_perGroupChanges approach]
long outboundOverflowCount_perGroupIncrements;
toInsertNewEntry:
{
long groupIndexStep = 0;
toFindEmptySlot:
{
keySearch:
while (true) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
for (long bitMask = match(tagGroup, hashTagBits, dataGroup); ; ) {
// Positive likely branch: the following condition is in a separate if block
// rather than the loop condition (as in all other operations: find(),
// compute(), etc.) in order to make it positive and so it's more likely
// that JIT compiles the code with an assumption that this branch is taken
// (i. e. that bitMask is 0), that is what we really expect during Map.put()
// or putIfAbsent().
// TODO check bytecode output of javac
// TODO check if this even makes sense
// TODO check a different approach, with an unrolled check and then a
// do-while
if (bitMask == 0) {
break;
}
long allocOffset = allocOffset(firstAllocIndex(dataGroup, bitMask)
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
V internalVal = readValueAtOffset(segment, allocOffset);
if (!onlyIfAbsent) {
writeValueAtOffset(segment, allocOffset, value);
}
return internalVal;
}
bitMask = clearLowestSetBit(bitMask);
}
// Likelihood of this branch depends on SmoothieMap's use case.
if (shouldStopProbing(dataGroup)) {
// Fast-path empty slot search: this is a fast-path condition to avoid
// re-reading the dataGroup (even though from L1) in [Find empty slot] loop.
// This is a likely branch because during puts the hash table is expected to
// be only half-full on average:
// SEGMENT_MAX_ALLOC_CAPACITY / 2 / HASH_TABLE_SLOTS = 37.5% full.
if (groupIndexStep == 0) { // [Positive likely branch]
emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
outboundOverflowCount_perGroupIncrements = 0;
break toInsertNewEntry;
} else {
outboundOverflowCount_perGroupIncrements =
outboundOverflowCount_groupForChange(groupIndex);
// The first iteration of [Quadratic probing] inlined
groupIndexStep = 1;
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
break toFindEmptySlot;
}
} else {
break keySearch; // to [Reset groupIndex]
}
}
// [InflatedSegment checking after unsuccessful key search]
// [Positive likely branch]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) {
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// [Break from the loop when visited all groups in the hash table]
break keySearch; // to [Reset groupIndex]
}
} else {
return putInflated(segment, key, hash, value, onlyIfAbsent);
}
} // end of keySearch loop
// Reset groupIndex:
groupIndexStep = 0;
outboundOverflowCount_perGroupIncrements = 0;
groupIndex = baseGroupIndex;
// Fall-through to [Find empty slot]
} // end of toFindEmptySlot block
// Find empty slot: an alternative to finding an empty slot for insertion in a separate
// loop is merge this loop with the above loop, but that would increase the number of
// local variables greatly (variables like emptySlotFound, emptySlotGroupIndex,
// insertionSlotIndexWithinGroup would be needed) that is critical because the register
// pressure is already very high. TODO compare the approaches
//
// The decision to keep this loop separate from the [keySearch] loop partially
// undermines the idea behind separate outboundOverflowCounts: see the comment for
// ContinuousSegment_BitSetAndStateArea.outboundOverflowCountsPerGroup. However, in a
// specialization of put() and similar methods for the case when removes cannot happen
// from a SmoothieMap the outboundOverflowCount_perGroupIncrements logic can be merged
// into the key search loop, similarly to how this is done in with
// outboundOverflowCount_perGroupDecrements in methods such as remove().
// TODO implement this specialization
//noinspection InfiniteLoopStatement: https://youtrack.jetbrains.com/issue/IDEA-207495
while (true) {
long dataGroupOffset = dataGroupOffset(groupIndex
, (long) isFullCapacitySegment);
dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
break toInsertNewEntry; // to [Insert new entry]
}
outboundOverflowCount_perGroupIncrements =
outboundOverflowCount_markGroupForChange(
outboundOverflowCount_perGroupIncrements, groupIndex);
groupIndexStep += 1; // [Quadratic probing]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
// No break condition in a loop searching for an empty slot: unlike for key search
// loops (see [Break from the loop when visited all groups in the hash table]) it's
// not possible to visit all groups in a segment and to not find an empty slot
// because SEGMENT_MAX_ALLOC_CAPACITY is less than HASH_TABLE_SLOTS.
}
} // end of toInsertNewEntry block
// Insert new entry:
int insertionSlotIndexWithinGroup = lowestMatchingSlotIndex(emptyBitMask);
insert(segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupIncrements, key, hash, value, groupIndex, dataGroup,
insertionSlotIndexWithinGroup);
return null;
}
/** [Shallow xxxInflated() methods] */
private @Nullable V putInflated(Object segment, K key, long hash, V value,
boolean onlyIfAbsent) {
@SuppressWarnings("unchecked")
InflatedSegment inflatedSegment = (InflatedSegment) segment;
return inflatedSegment.put(this, key, hash, value, onlyIfAbsent);
}
@Override
public final @Nullable V computeIfAbsent(
K key, Function super K, ? extends @Nullable V> mappingFunction) {
checkNonNull(key);
checkNonNull(mappingFunction);
final long hash = keyHashCode(key);
final long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
final int segmentStructureModStamp = acquireSegmentStructureModStamp();
final Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
final int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
@Nullable V value;
long groupIndex = baseGroupIndex;
long dataGroup;
long emptyBitMask;
// TODO [Compare with precomputed outboundOverflowCount_perGroupChanges approach]
long outboundOverflowCount_perGroupIncrements;
toInsertNewEntry:
{
long groupIndexStep = 0;
toFindEmptySlot:
{
keySearch:
while (true) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
long allocOffset = allocOffset(firstAllocIndex(dataGroup, bitMask)
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
return readValueAtOffset(segment, allocOffset);
}
}
// Likelihood of this branch depends on SmoothieMap's use case.
if (shouldStopProbing(dataGroup)) {
value = mappingFunction.apply(key);
if (value != null) { // [Positive likely branch]
// [Fast-path empty slot search]
if (groupIndexStep == 0) { // [Positive likely branch]
emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
outboundOverflowCount_perGroupIncrements = 0;
break toInsertNewEntry;
} else {
outboundOverflowCount_perGroupIncrements =
outboundOverflowCount_groupForChange(groupIndex);
// The first iteration of [Quadratic probing] inlined
groupIndexStep = 1;
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
break toFindEmptySlot;
}
} else {
break keySearch; // to [Reset groupIndex]
}
} else {
return null; // mappingFunction returned null, not recording any value.
}
}
// [InflatedSegment checking after unsuccessful key search]
// [Positive likely branch]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) {
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// [Break from the loop when visited all groups in the hash table]
value = mappingFunction.apply(key);
if (value != null) {
break keySearch; // to [Reset groupIndex]
} else {
// mappingFunction returned null, not recording any value.
return null;
}
}
} else {
return computeIfAbsentInflated(segment, key, hash, mappingFunction);
}
} // end of keySearch loop
// Reset groupIndex:
groupIndexStep = 0;
outboundOverflowCount_perGroupIncrements = 0;
groupIndex = baseGroupIndex;
// Fall-through to [Find empty slot]
} // end of toFindEmptySlot block
// [Find empty slot]
//noinspection InfiniteLoopStatement: https://youtrack.jetbrains.com/issue/IDEA-207495
while (true) {
long dataGroupOffset = dataGroupOffset(groupIndex
, (long) isFullCapacitySegment);
dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
break toInsertNewEntry; // to [Insert new entry]
}
outboundOverflowCount_perGroupIncrements =
outboundOverflowCount_markGroupForChange(
outboundOverflowCount_perGroupIncrements, groupIndex);
groupIndexStep += 1; // [Quadratic probing]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
// [No break condition in a loop searching for an empty slot]
}
} // end of toInsertNewEntry block
// Insert new entry:
int insertionSlotIndexWithinGroup = lowestMatchingSlotIndex(emptyBitMask);
insert(segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupIncrements, key, hash, value, groupIndex, dataGroup,
insertionSlotIndexWithinGroup);
return value;
}
/** [Shallow xxxInflated() methods] */
private @Nullable V computeIfAbsentInflated(Object segment,
K key, long hash, Function super K, ? extends @Nullable V> mappingFunction) {
@SuppressWarnings("unchecked")
InflatedSegment inflatedSegment = (InflatedSegment) segment;
return inflatedSegment.computeIfAbsent(this, key, hash, mappingFunction);
}
@Override
public final @Nullable V compute(K key,
BiFunction super K, ? super @Nullable V, ? extends @Nullable V> remappingFunction) {
checkNonNull(key);
checkNonNull(remappingFunction);
final long hash = keyHashCode(key);
final long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
final int segmentStructureModStamp = acquireSegmentStructureModStamp();
final Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
final int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
@Nullable V newValue;
long groupIndex = baseGroupIndex;
long dataGroup;
long emptyBitMask;
// TODO [Compare with precomputed outboundOverflowCount_perGroupChanges approach]
long outboundOverflowCount_perGroupIncrements;
toInsertNewEntry:
{
long groupIndexStep = 0;
toFindEmptySlot:
{
keySearch:
while (true) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
// [Inlined lowestMatchingSlotIndex]
int trailingZeros = Long.numberOfTrailingZeros(bitMask);
long allocIndex = extractAllocIndex(dataGroup, trailingZeros);
long allocOffset = allocOffset(allocIndex
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
V oldValue = readValueAtOffset(segment, allocOffset);
newValue = remappingFunction.apply(key, oldValue);
if (newValue != null) {
writeValueAtOffset(segment, allocOffset, newValue);
} else {
// [Computing outboundOverflowCount_perGroupDecrements in the end]
// TODO provide separate computeLikelyRemove() method
long outboundOverflowCount_perGroupDecrements =
computeOutboundOverflowCount_perGroupChanges(
baseGroupIndex, groupIndex);
// [Reusing local variable]
dataGroup = setSlotEmpty(dataGroup, trailingZeros);
removeAtSlot(hash, segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupDecrements, dataGroupOffset,
dataGroup, allocIndex, allocOffset);
}
return newValue;
}
}
// Likelihood of this branch depends on SmoothieMap's use case.
if (shouldStopProbing(dataGroup)) {
newValue = remappingFunction.apply(key, null);
if (newValue != null) { // [Positive likely branch]
// [Fast-path empty slot search]
if (groupIndexStep == 0) { // [Positive likely branch]
emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
outboundOverflowCount_perGroupIncrements = 0;
break toInsertNewEntry;
} else {
outboundOverflowCount_perGroupIncrements =
outboundOverflowCount_groupForChange(groupIndex);
// The first iteration of [Quadratic probing] inlined
groupIndexStep = 1;
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
break toFindEmptySlot;
}
} else {
break keySearch; // to [Reset groupIndex]
}
} else {
// remappingFunction returned null, not recording any value.
return null;
}
}
// [InflatedSegment checking after unsuccessful key search]
// [Positive likely branch]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) {
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// [Break from the loop when visited all groups in the hash table]
newValue = remappingFunction.apply(key, null);
if (newValue != null) {
break keySearch; // to [Reset groupIndex]
} else {
// remappingFunction returned null, not recording any value.
return null;
}
}
} else {
return computeInflated(segment, key, hash, remappingFunction);
}
} // end of keySearch loop
// Reset groupIndex:
groupIndexStep = 0;
outboundOverflowCount_perGroupIncrements = 0;
groupIndex = baseGroupIndex;
// Fall-through to [Find empty slot]
} // end of toFindEmptySlot block
// [Find empty slot]
//noinspection InfiniteLoopStatement: https://youtrack.jetbrains.com/issue/IDEA-207495
while (true) {
long dataGroupOffset = dataGroupOffset(groupIndex
, (long) isFullCapacitySegment);
dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
break toInsertNewEntry; // to [Insert new entry]
}
outboundOverflowCount_perGroupIncrements =
outboundOverflowCount_markGroupForChange(
outboundOverflowCount_perGroupIncrements, groupIndex);
groupIndexStep += 1; // [Quadratic probing]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
// [No break condition in a loop searching for an empty slot]
}
} // end of toInsertNewEntry block
// Insert new entry:
int insertionSlotIndexWithinGroup = lowestMatchingSlotIndex(emptyBitMask);
insert(segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupIncrements, key, hash, newValue, groupIndex,
dataGroup, insertionSlotIndexWithinGroup);
return newValue;
}
/** [Shallow xxxInflated() methods] */
private @Nullable V computeInflated(Object segment, K key, long hash,
BiFunction super K, ? super @Nullable V, ? extends @Nullable V> remappingFunction) {
@SuppressWarnings("unchecked")
InflatedSegment inflatedSegment = (InflatedSegment) segment;
return inflatedSegment.compute(this, key, hash, remappingFunction);
}
@Override
public final @Nullable V merge(K key, V value,
BiFunction super V, ? super V, ? extends @Nullable V> remappingFunction) {
checkNonNull(key);
checkNonNull(value);
checkNonNull(remappingFunction);
final long hash = keyHashCode(key);
final long hash_segmentLookupBits = segmentLookupBits(hash);
// [Reading consistent segment and isFullCapacitySegment values]
final int segmentStructureModStamp = acquireSegmentStructureModStamp();
final Object segment = segmentBySegmentLookupBits(hash_segmentLookupBits);
final int isFullCapacitySegment = isFullCapacitySegment(hash_segmentLookupBits);
validateSegmentStructureModStamp(segmentStructureModStamp);
final long baseGroupIndex = baseGroupIndex(hash);
final long hashTagBits = tagBits(hash);
long groupIndex = baseGroupIndex;
long dataGroup;
long emptyBitMask;
// TODO [Compare with precomputed outboundOverflowCount_perGroupChanges approach]
long outboundOverflowCount_perGroupIncrements;
toInsertNewEntry:
{
long groupIndexStep = 0;
toFindEmptySlot:
{
keySearch:
while (true) {
long tagGroupOffset = tagGroupOffset(groupIndex
, (long) isFullCapacitySegment);
long tagGroup = readTagGroupAtOffset(segment, tagGroupOffset);
long dataGroupOffset = dataGroupFromTagGroupOffset(tagGroupOffset);
dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
for (long bitMask = match(tagGroup, hashTagBits, dataGroup);
bitMask != 0L;
bitMask = clearLowestSetBit(bitMask)) {
// [Inlined lowestMatchingSlotIndex]
int trailingZeros = Long.numberOfTrailingZeros(bitMask);
long allocIndex = extractAllocIndex(dataGroup, trailingZeros);
long allocOffset = allocOffset(allocIndex
, (long) isFullCapacitySegment);
K internalKey = readKeyAtOffset(segment, allocOffset);
//noinspection ObjectEquality: identity comparision is intended
boolean keysIdentical = internalKey == key;
if (keysIdentical || keysEqual(key, internalKey)) {
V internalVal = readValueAtOffset(segment, allocOffset);
@Nullable V newValue = remappingFunction.apply(internalVal, value);
if (newValue != null) {
writeValueAtOffset(segment, allocOffset, newValue);
} else {
// [Computing outboundOverflowCount_perGroupDecrements in the end]
// TODO provide separate mergeLikelyRemove() method
long outboundOverflowCount_perGroupDecrements =
computeOutboundOverflowCount_perGroupChanges(
baseGroupIndex, groupIndex);
// [Reusing local variable]
dataGroup = setSlotEmpty(dataGroup, trailingZeros);
removeAtSlot(hash, segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupDecrements, dataGroupOffset,
dataGroup, allocIndex, allocOffset);
}
return newValue;
}
}
// Likelihood of this branch depends on SmoothieMap's use case.
if (shouldStopProbing(dataGroup)) {
// [Fast-path empty slot search]
if (groupIndexStep == 0) { // [Positive likely branch]
emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
outboundOverflowCount_perGroupIncrements = 0;
break toInsertNewEntry;
} else {
outboundOverflowCount_perGroupIncrements =
outboundOverflowCount_groupForChange(groupIndex);
// The first iteration of [Quadratic probing] inlined
groupIndexStep = 1;
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
break toFindEmptySlot;
}
} else {
break keySearch; // to [Reset groupIndex]
}
}
// [InflatedSegment checking after unsuccessful key search]
// [Positive likely branch]
if (dataGroup != INFLATED_SEGMENT__MARKER_DATA_GROUP) {
groupIndexStep += 1; // [Quadratic probing]
if (groupIndexStep != HASH_TABLE_GROUPS) { // [Positive likely branch]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
} else {
// [Break from the loop when visited all groups in the hash table]
break keySearch; // to [Reset groupIndex]
}
} else {
return mergeInflated(segment, key, hash, value, remappingFunction);
}
} // end of keySearch loop
// Reset groupIndex:
groupIndexStep = 0;
outboundOverflowCount_perGroupIncrements = 0;
groupIndex = baseGroupIndex;
// Fall-through to [Find empty slot]
} // end of toFindEmptySlot block
// [Find empty slot]
//noinspection InfiniteLoopStatement: https://youtrack.jetbrains.com/issue/IDEA-207495
while (true) {
long dataGroupOffset = dataGroupOffset(groupIndex
, (long) isFullCapacitySegment);
dataGroup = readDataGroupAtOffset(segment, dataGroupOffset);
emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
break toInsertNewEntry; // to [Insert new entry]
}
outboundOverflowCount_perGroupIncrements =
outboundOverflowCount_markGroupForChange(
outboundOverflowCount_perGroupIncrements, groupIndex);
groupIndexStep += 1; // [Quadratic probing]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
// [No break condition in a loop searching for an empty slot]
}
} // end of toInsertNewEntry block
// Insert new entry:
int insertionSlotIndexWithinGroup = lowestMatchingSlotIndex(emptyBitMask);
insert(segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupIncrements, key, hash, value, groupIndex, dataGroup,
insertionSlotIndexWithinGroup);
return value;
}
/** [Shallow xxxInflated() methods] */
private @Nullable V mergeInflated(Object segment, K key, long hash, V value,
BiFunction super V, ? super V, ? extends @Nullable V> remappingFunction) {
@SuppressWarnings("unchecked")
InflatedSegment inflatedSegment = (InflatedSegment) segment;
return inflatedSegment.merge(this, key, hash, value, remappingFunction);
}
//endregion
//region insert() and the family of makeSpaceAndInsert() methods called from it
@HotPath
private void insert(Object segment,
int isFullCapacitySegment,
long outboundOverflowCounts_perGroupIncrements, K key, long hash, V value,
long groupIndex, long dataGroup, int insertionSlotIndexWithinGroup) {
long bitSetAndState = getBitSetAndState(segment);
int allocCapacity = allocCapacity(bitSetAndState);
int allocIndex = freeAllocIndexClosestTo(bitSetAndState,
allocIndexBoundaryForLocalAllocation((int) groupIndex
, isFullCapacitySegment)
, allocCapacity);
if (allocIndex < allocCapacity) { // [Positive likely branch]
doInsert(segment,
isFullCapacitySegment,
outboundOverflowCounts_perGroupIncrements, key, hash, value,
groupIndex, dataGroup, insertionSlotIndexWithinGroup, bitSetAndState,
allocIndex);
} else {
@SuppressWarnings("unchecked") Segment seg = (Segment) segment;
makeSpaceAndInsert(allocCapacity, seg, outboundOverflowCounts_perGroupIncrements, key,
hash, value, groupIndex, dataGroup, insertionSlotIndexWithinGroup,
bitSetAndState);
}
}
@HotPath
private void doInsert(Object segment,
int isFullCapacitySegment,
long outboundOverflowCounts_perGroupIncrements,
K key, long hash, V value, long groupIndex, long dataGroup,
int insertionSlotIndexWithinGroup, long bitSetAndState, int allocIndex) {
incrementSize();
if (outboundOverflowCounts_perGroupIncrements != 0) { // Unlikely branch
incrementOutboundOverflowCountsPerGroup(segment,
isFullCapacitySegment,
outboundOverflowCounts_perGroupIncrements);
}
bitSetAndState = setAllocBit(bitSetAndState, allocIndex);
setBitSetAndState(segment, bitSetAndState);
// The tag can also be passed down from the methods calling insert() (like put()) but
// it's chosen not to do so because of the high register pressure in those methods.
// Compare with a similar tradeoff in makeSpaceAndInsert(): see @apiNote to that method.
// TODO compare the approaches
byte tag = (byte) tagBits(hash);
writeEntry(segment,
(long) isFullCapacitySegment,
key, tag, value, groupIndex, dataGroup, insertionSlotIndexWithinGroup, allocIndex);
}
/**
* Makes space for an extra entry by means of either {@link #growCapacityAndInsert}, {@link
* #splitAndInsert}, or {@link #inflateAndInsert}.
*
* @apiNote
* allocCapacity could have been re-extracted from bitSetAndState instead of passing it as a
* parameter to this method that would reduce the bytecode size of {@link #insert} (that is
* good: see [Reducing bytecode size of a hot method]) and might be even cheaper than putting
* variables that are in registers onto stack and then popping them from the stack (that happens
* when so many arguments are passed to the method). Similarly,
* outboundOverflowCounts_perGroupIncrements could be computed within this method from
* groupIndex and {@link HashTable#baseGroupIndex}(hash) if
* [Precomputed outboundOverflowCount_perGroupChanges] is implemented.
*
* Compare with a similar tradeoff in {@link #doInsert} itself where `tag` can be passed into
* the method instead of recomputing it inside the method.
* TODO compare the approaches
*/
@AmortizedPerSegment
final void makeSpaceAndInsert(int allocCapacity, Segment segment,
long outboundOverflowCounts_perGroupIncrements, K key, long hash, V value,
long groupIndex, long dataGroup, int insertionSlotIndexWithinGroup,
long bitSetAndState) {
if (isBulkOperationPlaceholderBitSetAndState(bitSetAndState)) {
throw new ConcurrentModificationException();
}
// ### First route: check if the segment is intermediate-capacity and should be grown to
// ### full size.
// This branch could more naturally be placed in insert() (and then allocCapacity shouldn't
// be passed into makeSpaceAndInsert()) but there is an objective to make bytecode size of
// insert() as small as possible, see [Reducing bytecode size of a hot method]. TODO compare
if (allocCapacity < SEGMENT_MAX_ALLOC_CAPACITY) {
growCapacityAndInsert(segment, outboundOverflowCounts_perGroupIncrements, key, hash,
value, groupIndex, dataGroup, insertionSlotIndexWithinGroup, bitSetAndState);
return;
}
// Need to read modCount here rather than inside methods splitAndInsert() and
// inflateAndInsert() so that it is done before calling to
// tryEnsureSegmentsArrayCapacityForSplit() that may update the modCount field (and is a
// bulky method that needs to be surrounded with modCount read and check).
int modCount = getModCountOpaque();
// ### Second route: split or inflate the segment.
int segmentOrder = segmentOrder(bitSetAndState);
// InflatedSegment.shouldBeSplit() refers to and depends on the following call to
// computeAverageSegmentOrder().
// Using `size + 1` as the argument for computeAverageSegmentOrder() because we are in the
// process of insertion of a new entry. If just `size` was used it would be possible that
// a segment is inflated and then immediately split afterwards.
int averageSegmentOrder = computeAverageSegmentOrder(size + 1);
boolean acceptableOrderAfterSplitting =
segmentOrder < averageSegmentOrder + MAX_SEGMENT_ORDER_DIFFERENCE_FROM_AVERAGE;
int modCountAddition;
// In principle, we can still split to-become-outlier segments or segments that are
// already outliers as long as their order is less than segmentsArray's order (which
// might happen to be greater than averageSegmentOrder +
// MAX_SEGMENT_ORDER_DIFFERENCE_FROM_AVERAGE if the SmoothieMap used to be larger and
// has shrunk in size since). But it's not done because we don't want to disturb the
// poor hash code distribution detection (see HashCodeDistribution), as well as
// the functionality of moveToMapWithShrunkArray().
if (acceptableOrderAfterSplitting &&
(modCountAddition =
tryEnsureSegmentsArrayCapacityForSplit(segmentOrder)) >= 0) {
// Matches the modCount field increment performed in
// tryEnsureSegmentsArrayCapacityForSplit().
modCount += modCountAddition;
splitAndInsert(modCount, segment, key, hash, value, bitSetAndState, segmentOrder);
} else {
inflateAndInsert(modCount, segmentOrder, segment, bitSetAndState, key, hash, value);
}
}
@AmortizedPerSegment
private void growCapacityAndInsert(Segment oldSegment,
long outboundOverflowCounts_perGroupIncrements, K key, long hash, V value,
long groupIndex, long dataGroup, int insertionSlotIndexWithinGroup,
long bitSetAndState) {
int modCount = getModCountOpaque();
// The old segment's bitSetAndState is never reset back to an operational value after this
// statement.
oldSegment.bitSetAndState = makeBulkOperationPlaceholderBitSetAndState(bitSetAndState);
// ### Create a new segment.
int newAllocCapacity = SEGMENT_MAX_ALLOC_CAPACITY;
Segment, ?> newSegment = grow(oldSegment, bitSetAndState, newAllocCapacity);
// Reusing local variable: it's better to reuse an existing local variable than to introduce
// a new variable because of a risk of using a wrong variable in the code below.
bitSetAndState = newSegment.bitSetAndState;
// ### Replace references from oldSegment to newSegment in segmentsArray.
int segmentOrder = segmentOrder(bitSetAndState);
int firstSegmentIndex = firstSegmentIndexByHashAndOrder(hash, segmentOrder);
boolean oldSegment_isFullCapacity = false;
replaceInSegmentsArray(
getNonNullSegmentsArrayOrThrowCme(), firstSegmentIndex, segmentOrder, newSegment
, oldSegment_isFullCapacity);
modCount++; // Matches the modCount field increment performed in replaceInSegmentsArray().
// ### Insert the new entry.
int allocIndex = freeAllocIndexClosestTo(bitSetAndState,
FullCapacitySegment.allocIndexBoundaryForLocalAllocation((int) groupIndex)
, newAllocCapacity);
int newSegment_isFullCapacity = 1;
// No point in specializing doInsert() for full-capacity segments because we are in an
// AmortizedPerSegment method.
doInsert(newSegment,
newSegment_isFullCapacity,
outboundOverflowCounts_perGroupIncrements, key, hash, value,
groupIndex, dataGroup, insertionSlotIndexWithinGroup, bitSetAndState, allocIndex);
modCount++; // Matches the modCount field increment performed in doInsert().
checkModCountOrThrowCme(modCount);
}
/**
* Precondition for calling this method: {@link #tryEnsureSegmentsArrayCapacityForSplit} is
* called with {@code priorSegmentOrder} as the argument and returned a non-negative result.
*/
@AmortizedPerSegment
private void splitAndInsert(int modCount, Segment fromSegment, K key, long hash, V value,
long fromSegment_bitSetAndState, int priorSegmentOrder) {
if (!splitBetweenTwoNewSegments) {
split(modCount, fromSegment, hash, fromSegment_bitSetAndState, priorSegmentOrder);
} else {
splitBetweenTwoNewSegments(
modCount, fromSegment, hash, fromSegment_bitSetAndState, priorSegmentOrder);
}
// ### Insert the new entry into fromSegment or intoSegment: calling into
// internalPutIfAbsent() which accesses the segmentsArray (and the bit set with
// isFullCapacity flags) although both fromSegment and intoSegment could be available as
// local variables (if split() was inlined into splitAndInsert()) because choosing between
// fromSegment and intoSegment and determining whether the chosen segment has full capacity
// in an ad-hoc manner would likely result in more branches than internalPutIfAbsent(). Note
// that accessing segmentsArray (and the bit set) should read from L1 because this path with
// the same key and hash has already been taken in the beginning of the operation that
// initiated this splitAndInsert() call: in other words, higher in the stack. Also, calling
// to internalPutIfAbsent() is simpler than an alternative ad-hoc segment choice logic.
internalPutIfAbsentDuringSplit(key, hash, value);
}
private void internalPutIfAbsentDuringSplit(K key, long hash, V value) {
if (internalPutIfAbsent(key, hash, value) != null) {
throw new ConcurrentModificationException(
"New entry shouldn't replace existing one during split");
}
}
private void split(int modCount, Segment fromSegment, long hash,
long fromSegment_bitSetAndState, int priorSegmentOrder) {
// The point of incrementing modCount early is that concurrent calls to other methods have a
// chance to catch a concurrent modification. We increment modCount here because splitting
// procedure changes the contents of fromSegment structurally (in doSplit()) before calling
// to replaceInSegmentsArray(). This is a unique case among other *AndInsert() methods:
// inflateAndInsert() and splitBetweenTwoNewSegmentsAndInsert() call
// replaceInSegmentsArray() as their first structural modification. growCapacityAndInsert()
// populates the new segment, but doesn't alter the contents of the old segment (apart from
// setting its bitSetAndState to a bulk operation placeholder value) until the call to
// replaceInSegmentsArray() as well.
modCount++;
// Parallel modCount field increment: increment modCount field on itself rather than
// assigning the local variable to still be able to capture the discrepancy and throw a
// ConcurrentModificationException in the end of this method.
this.modCount++;
// The bitSetAndState of fromSegment is reset back to an operational value inside doSplit(),
// closer to the end of the method.
fromSegment.bitSetAndState =
makeBulkOperationPlaceholderBitSetAndState(fromSegment_bitSetAndState);
int siblingSegmentsOrder = priorSegmentOrder + 1;
// ### Create a new segment and split entries between fromSegment and the new segment.
int intoSegmentAllocCapacity = getInitialSegmentAllocCapacity(siblingSegmentsOrder);
// intoSegment's bitSetAndState is written and [Safe segment publication] is ensured inside
// doSplit(), closer to the end of the method.
Segment intoSegment =
allocateNewSegmentWithoutSettingBitSetAndSet(intoSegmentAllocCapacity);
int siblingSegmentsQualificationBitIndex =
HASH__SEGMENT_LOOKUP_SHIFT + siblingSegmentsOrder - 1;
long fromSegmentIsHigher = doSplit(fromSegment,
fromSegment_bitSetAndState, intoSegment, intoSegmentAllocCapacity,
siblingSegmentsOrder, siblingSegmentsQualificationBitIndex);
// storeFence() is called inside doSplit() to make the publishing of intoSegment safe.
// ### Publish intoSegment (the new segment) to segmentsArray.
int intoSegmentIsLower = (int) (fromSegmentIsHigher >>>
siblingSegmentsQualificationBitIndex);
int firstSiblingSegmentsIndex =
firstSegmentIndexByHashAndOrder(hash, priorSegmentOrder);
int firstIntoSegmentIndex = chooseFirstSiblingSegmentIndex(
firstSiblingSegmentsIndex, siblingSegmentsOrder, intoSegmentIsLower);
// The logic of makeSpaceAndInsert() guarantees that splitAndInsert() is called only with
// full-capacity segments.
boolean fromSegment_isFullCapacity = true;
replaceInSegmentsArray(getNonNullSegmentsArrayOrThrowCme(), firstIntoSegmentIndex,
siblingSegmentsOrder, intoSegment
, fromSegment_isFullCapacity);
modCount++; // Matches the modCount field increment performed in replaceInSegmentsArray().
// Unlike other similar methods which may be called from makeSpaceAndInsert(), namely
// growCapacityAndInsert() and inflateAndInsert(), we check the modCount here before the
// "insert" part of the method because the insertion might cause fromSegment or intoSegment
// to be inflated or split itself if during the splitting all or almost all entries went to
// one of the segments. There is no cheap way to detect if that have happened to
// additionally increment the local copy of modCount. (Compare with the similar problem in
// splitBetweenTwoNewSegments() and splitInflated().) After all, the main point is making a
// modCount check after bulky operations: doSplit() and replaceInSegmentsArray() which are
// called above. Including the last point update (the internalPutIfAbsent() call below) in
// the scope of the modCount check is not necessary.
checkModCountOrThrowCme(modCount);
}
/**
* Distributes entries between fromSegment and intoSegment.
*
* fromSegment_bitSetAndState includes the prior segment order, that is equal to newSegmentOrder
* - 1. It's incremented inside this method and is written into fromSegment's {@link
* Segment#bitSetAndState}. intoSegment's bitSetAndState is also valid after this method
* returns.
*
* @return [boolean as long], 0 if fromSegment (as a Java object rather than a conceptual
* SmoothieMap's segment: these two things differ after [Swap segments] inside this method) is
* the lower-index one of the two sibling segments after the split, or
* `1 << siblingSegmentsQualificationBitIndex` if fromSegment object is the higher-index
* segment. See the definition of swappedSegmentsInsideLoop variable inside the method.
*
* @apiNote siblingSegmentsQualificationBitIndex can be re-computed inside the method from
* newSegmentOrder instead of being passed as a parameter. There is the same "pass-or-recompute"
* tradeoff as in {@link #doInsert} and {@link #makeSpaceAndInsert}. However, in doSplit() an
* additional factor for passing siblingSegmentsQualificationBitIndex is making it being
* computed only once in {@link #splitAndInsert}, hence less implicit dependency and probability
* of making mistakes.
*/
@SuppressWarnings({"UnnecessaryLabelOnBreakStatement", "UnnecessaryLabelOnContinueStatement"})
@AmortizedPerSegment
final long doSplit(Segment fromSegment, long fromSegment_bitSetAndState,
Segment intoSegment, int intoSegment_allocCapacity, int newSegmentOrder,
int siblingSegmentsQualificationBitIndex) {
// Updating fromSegment_bitSetAndState's segment order early in this method because it's
// hard to track different segment orders of fromSegment and intoSegment and their possible
// buggy interactions in InterleavedSegments.swapContentsDuringSplit().
fromSegment_bitSetAndState = incrementSegmentOrder(fromSegment_bitSetAndState);
// ### Defining variables that will be used in and after [fromSegment iteration].
int intoSegment_currentSize = 0;
long intoSegment_bitSetAndState =
makeNewBitSetAndState(intoSegment_allocCapacity, newSegmentOrder);
long fromSegment_outboundOverflowCount_perGroupDeductions = 0;
long intoSegment_outboundOverflowCount_perGroupAdditions = 0;
// boolean as long: this method splits the entries between fromSegment and intoSegment so
// that entries that should go into the lower-index conceptual SmoothieMap's segment (that
// is, their hash codes have 0 in siblingSegmentsQualificationBitIndex bit) remain in
// fromSegment and intoSegment corresponds to the higher-index conceptual segment. After
// [Swap segments], "Java object shells" of the two sibling segments swap. This is the
// information which should be returned from this method: see its Javadoc. Instead of
// having `boolean swappedSegments` and returning
// `swappedSegments ? siblingSegmentsQualificationBit : 0L` this variable stores
// siblingSegmentsQualificationBit directly (which means "true"; 0 means "false") to avoid
// a branch in the return statement.
long swappedSegments = 0;
long siblingSegmentsQualificationBit = 1L << siblingSegmentsQualificationBitIndex;
// TODO check that Hotspot compiles this expression into branchless code.
// TODO check if Hotspot emits more optimal machine code when a variable is kept as long and
// casted down to int when needed or vice versa.
int intoSegment_isFullCapacity =
intoSegment_allocCapacity == SEGMENT_MAX_ALLOC_CAPACITY ? 1 : 0;
int fromSegment_isFullCapacity = 1;
// ### fromSegment iteration: iterating all entries in the hash table, while moving the
// proper entries to intoSegment and reducing (if possible) unnecessary "group overflows"
// for entries that should remain in fromSegment, that is, moving the entries to (or, at
// least, closer to) their respective base groups (see HashTable.baseGroupIndex()).
//
// Reducing group overflows is expected to be effective starting from the second iteration
// of the loop, because about half of the entries in the previous group are expected to be
// moved out to intoSegment, leaving holes for entries which overflown by one group in the
// current iteration group which should be just single percents of the total population of
// entries (see [Non-overflown entry]), and entries overflown by two or more steps in the
// group probing chain orders of magnitude more rare. Note: this reasoning depends on the
// current [Quadratic probing] chain which is baseGroupIndex + [0, 1, 3, 6, 2, 7, 5, 4], and
// in positive direction, as well as the following loop.
//
// However, on the first iteration of the following loop, overflown entries can't be shifted
// back in their group probing chains. One way to fix this it to make HASH_TABLE_GROUPS + 1
// steps in the loop rather than just HASH_TABLE_GROUPS, but that's relatively expensive
// solution because the remaining entries in the first iteration group would be checked (in
// particular, their hash code is computed) twice in the loop. There is no opportunity for
// a "split phase" between 0-th and 8-th iteration because we don't known a priori which
// entries should remain in fromSegment and which should move to intoSegment.
//
// A cheaper approach is taken here: missed opportunities for reducing entries' probing
// chain length ending at the first group iterated in the following loop is just tolerated.
// However, the first group in the iteration is chosen differently at random each time to
// prevent overflown entries from "accruing" in the group 0 (if it's always used as the
// start) in the series of generational segment splits.
//
// `size & HASH_TABLE_GROUPS_MASK` is effectively a pseudo-random value (after splitting the
// first segment in the SmoothieMap with the order of 0), yet it is deterministic when the
// map is populated with exactly the same data (e. g. in tests). (Compare with
// [Randomized choice of the segment to shrink into].)
long iterGroupIndexStart = size & HASH_TABLE_GROUPS_MASK;
// [Branchless hash table iteration]: this instance of [Branchless hash table iteration] has
// yet different properties than Segment.removeIf() and compactEntriesDuringSegmentSwap()
// because the hash table should be SEGMENT_MAX_ALLOC_CAPACITY / HASH_TABLE_SLOTS = 75%
// full here. So if byte-by-byte hash table iteration is better than the branchless
// approach in any one of doSplit(), Segment.removeIf(), and
// compactEntriesDuringSegmentSwap() methods, it should be in doSplit().
// TODO compare the approaches for doSplit().
// On the other hand, this instance of [Branchless hash table iteration] has exactly the
// same properties as in doSplitBetweenTwoNewSegments().
// [Int-indexed loop to avoid a safepoint poll]
for (int extraGroupIndex = 0; extraGroupIndex < HASH_TABLE_GROUPS; extraGroupIndex++) {
long iterGroupIndex = addGroupIndex(iterGroupIndexStart, (long) extraGroupIndex);
long iterDataGroupOffset = dataGroupOffset(iterGroupIndex
, (long) fromSegment_isFullCapacity);
long iterDataGroup = readDataGroupAtOffset(fromSegment, iterDataGroupOffset);
// TODO compare with int-indexed loop with bitCount(bitMask) limit. It may be executed
// without mispredicted branches on modern CPUs. However, this might be ineffective
// because of possible update of iterBitMask in [Swap segments], meaning that the loop
// limit might change in the course of the loop itself, which may inhibit CPU branch
// prediction behaviour.
groupIteration:
for (long iterBitMask = matchFull(iterDataGroup);
iterBitMask != 0L;
iterBitMask = clearLowestSetBit(iterBitMask)) {
long fromSegment_allocIndex;
// [Inlined lowestMatchingSlotIndex]
int iterTrailingZeros = Long.numberOfTrailingZeros(iterBitMask);
fromSegment_allocIndex = extractAllocIndex(iterDataGroup, iterTrailingZeros);
long fromSegment_allocOffset = allocOffset(fromSegment_allocIndex
, (long) fromSegment_isFullCapacity);
K key = readKeyAtOffset(fromSegment, fromSegment_allocOffset);
long hash = keyHashCode(key);
long baseGroupIndex = baseGroupIndex(hash);
final boolean entryShouldRemainInFromSegment =
(hash & siblingSegmentsQualificationBit) == 0;
if (entryShouldRemainInFromSegment) { // 50-50 unpredictable branch
// ### The entry remains in fromSegment.
// Non-overflown entry:
// If iterGroupIndex equals to baseGroupIndex then the condition of
// `if (newGroupIndex == iterGroupIndex)` inside the [Find empty slot] loop
// below must be true before the first iteration (although the loop is organized
// so that this condition is not checked before the first iteration). In fact,
// the condition below and the condition inside the loop can be both replaced
// with a single loop condition. This is not done to preserve
// [Positive likely branch], similarly to what is done in putImpl().
//
// This branch should be usually taken with the maximum load factor of
// SEGMENT_MAX_ALLOC_CAPACITY / HASH_TABLE_SLOTS = 0.75 and therefore CPU
// should predict this branch well.
// TODO evaluate exactly the probability of this branch (should be about 98%?)
if (iterGroupIndex == baseGroupIndex) { // [Positive likely branch]
continue groupIteration;
}
// iterGroupIndex != baseGroupIndex: an unlikely path
// [Find empty slot]
for (long newGroupIndex = baseGroupIndex, groupIndexStep = 0; ; ) {
long dataGroupOffset = dataGroupOffset(newGroupIndex
, (long) fromSegment_isFullCapacity);
long dataGroup = readDataGroupAtOffset(fromSegment, dataGroupOffset);
long emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
int newSlotIndexWithinGroup = lowestMatchingSlotIndex(emptyBitMask);
// Calling makeData() here should be faster than doing something like
// extractDataByte(iterDataGroup, iterTrailingZeros) because makeData()
// is one bitwise op and a cast, while extractDataByte() would be two
// bitwise ops and a cast. Keeping the value when it was used inside the
// extractAllocIndex(iterDataGroup, iterTrailingZeros) call above to
// obtain allocIndex is also likely to be worse because of high register
// pressure: the kept data value is likely going to be pushed to the
// stack.
byte data = makeData(dataGroup, fromSegment_allocIndex);
writeTagAndData(fromSegment,
(long) fromSegment_isFullCapacity,
newGroupIndex, newSlotIndexWithinGroup,
(byte) tagBits(hash), data);
// An entry at the slot is shifted backwards closer to baseGroupIndex.
// The difference between outboundOverflowCount changes should be
// subtracted. XOR operation is such difference.
fromSegment_outboundOverflowCount_perGroupDeductions +=
computeOutboundOverflowCount_perGroupChanges(
baseGroupIndex, newGroupIndex) ^
computeOutboundOverflowCount_perGroupChanges(
baseGroupIndex, iterGroupIndex);
// Empty the iteration slot:
iterDataGroup = setSlotEmpty(iterDataGroup, iterTrailingZeros);
// Not doing anything with the corresponding tag group, i. e. leaving
// "garbage" in the tag byte of the emptied slot.
continue groupIteration;
}
groupIndexStep += 1; // [Quadratic probing]
newGroupIndex = addGroupIndex(newGroupIndex, groupIndexStep);
if (newGroupIndex == iterGroupIndex) { // [Positive likely branch]
// if newGroupIndex has reached iterGroupIndex then doing nothing
// and proceeding to the next iteration of the groupIteration loop,
// as well as in `if (iterGroupIndex == baseGroupIndex)` case above.
continue groupIteration;
}
}
}
else {
// ### The entry is moved to intoSegment.
// Read the value and purge the entry from fromSegment before [Swap segments]
// because fromSegment_allocIndex and fromSegment_allocOffset will be invalid
// after segment swap for Interleaved segments: see the documentation for
// InterleavedSegments.swapContentsDuringSplit().
V value = readValueAtOffset(fromSegment, fromSegment_allocOffset);
// #### Purge the entry from fromSegment.
{
eraseKeyAndValueAtOffset(fromSegment, fromSegment_allocOffset);
fromSegment_bitSetAndState =
clearAllocBit(fromSegment_bitSetAndState, fromSegment_allocIndex);
// Empty the iteration slot:
iterDataGroup = setSlotEmpty(iterDataGroup, iterTrailingZeros);
// Not doing anything with the corresponding tag group, i. e. leaving
// "garbage" in the tag byte of the emptied slot.
}
// Swap segments:
// #### Swap fromSegment's and intoSegment's contents if the intoSegment is
// #### full.
// The probability of taking the following branch once during the
// [fromSegment iteration], that is, when more than
// SEGMENT_INTERMEDIATE_ALLOC_CAPACITY go into an intermediate-capacity
// intoSegment, equals to 1 -
// CDF[BinomialDistribution[SEGMENT_MAX_ALLOC_CAPACITY(48), 0.5], 30] ~= 3%
// (in case of Continuous segments), or 1 -
// CDF[BinomialDistribution[SEGMENT_MAX_ALLOC_CAPACITY(48), 0.5], 32] ~= 0.7%
// (in case of Interleaved segments), except for the cases when the distribution
// is skewed so that the majority of segments at some order split with much more
// keys going to higher-index siblings than to lower-index ones. There is no
// doubling of probability like for HashCodeDistribution's
// HASH_TABLE_HALF__SLOTS_MINUS_MAX_KEYS__SPLIT_CUMULATIVE_PROBS values because
// if the distribution is skewed towards fromSegment the following branch is not
// taken.
//
// Therefore the following branch is very unlikely on each individual iteration.
//
// Doesn't seem practical to use [Positive likely branch] principle
// because it would require to add a dummy loop and organize code in a
// very unnatural way.
if (intoSegment_currentSize == intoSegment_allocCapacity) {
// ### Swapping segments if intermediate-capacity intoSegment is overflown.
// Alternative to swapping segments inline the [fromSegment iteration] and
// swapping fromSegment and intoSegment variables is finishing iteration in
// a separate method like "finishSplitAfterSwap()". The advantage of this
// approach is that fromSegment and intoSegment variable would be
// effectively final that may allow compiler to generate more efficient
// machine code. The disadvantages is higher overall complexity, having a
// separate method that is called rarely (hence pretty cold and may kick JIT
// (re-)compilation well into SmoothieMap's operation), more methods to
// compile, poorer utilization of instruction cache. The disadvantages seem
// to outweigh the advantages.
if (swappedSegments != 0) {
// Already swapped segments inside the splitting loop once. This might
// only happen if entries are inserted into fromSegment concurrently
// with the splitting loop.
throw new ConcurrentModificationException();
}
swappedSegments = siblingSegmentsQualificationBit;
// Write out the iterDataGroup as it may have been updated at
// [Empty the iteration slot].
FullCapacitySegment.writeDataGroup(
fromSegment, iterGroupIndex, iterDataGroup);
// Note: the code below until [Swap fromSegment and intoSegment variables]
// (inclusive) should be updated in parallel with the code in another
// [Swap segments] block after the [fromSegment iteration] loop.
// makeSpaceAndInsert[Second route] guarantees that only full-capacity
// segments are split:
int fromSegment_allocCapacity = SEGMENT_MAX_ALLOC_CAPACITY;
fromSegment_bitSetAndState =
InterleavedSegments.swapContentsDuringSplit(
fromSegment, fromSegment_bitSetAndState,
intoSegment, intoSegment_bitSetAndState);
// Swap fromSegment and intoSegment variables:
{
intoSegment_bitSetAndState = fromSegment_bitSetAndState;
// The updated fromSegment_bitSetAndState is written into bitSetAndState
// field of fromSegment in the call to
// InterleavedSegments.swapContentsDuringSplit() above.
// See the documentation to that method.
fromSegment_bitSetAndState = intoSegment.bitSetAndState;
Segment tmpSegment = intoSegment;
intoSegment = fromSegment;
fromSegment = tmpSegment;
intoSegment_allocCapacity = fromSegment_allocCapacity;
fromSegment_isFullCapacity = 0;
intoSegment_isFullCapacity = 1;
}
// iterDataGroup needs to be re-read here because it might be updated during
// a swapContentsDuringSplit() call above: in
// compactEntriesDuringSegmentSwap() (if this method is called) in case of
// Continuous segments and always in case of InterleavedSegments segments
// (see InterleavedSegments.swapContentsDuringSplit()).
iterDataGroupOffset = dataGroupOffset(iterGroupIndex
, (long) fromSegment_isFullCapacity);
iterDataGroup = readDataGroupAtOffset(fromSegment, iterDataGroupOffset);
iterBitMask = matchFull(iterDataGroup);
// Clear the already iterated bits to continue the loop correctly.
iterBitMask = clearLowestNBits(iterBitMask, iterTrailingZeros);
// To be cleared on the next iteration step
// `iterBitMask = clearLowestSetBit(iterBitMask)` of [groupIteration].
iterBitMask |= 1L << iterTrailingZeros;
}
// end of [Swap segments]
// ### Put the entry into intoSegment:
{
// [Find empty slot]. Even if iterGroupIndex equals baseGroupIndex,
// [Find empty slot] loop can't be skipped because it's possible that
// intoSegment's iterGroupIndex is already full of entries moved from the
// previously visited groups in [fromSegment iteration] which is possible
// due to hash table wrap around.
// TODO [Unbounded search loop]
internalPutLoop:
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ;) {
long dataGroupOffset = dataGroupOffset(groupIndex
, (long) intoSegment_isFullCapacity);
long dataGroup = readDataGroupAtOffset(intoSegment, dataGroupOffset);
long emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
int insertionSlotIndexWithinGroup =
lowestMatchingSlotIndex(emptyBitMask);
intoSegment_outboundOverflowCount_perGroupAdditions +=
computeOutboundOverflowCount_perGroupChanges(
baseGroupIndex, groupIndex);
int intoSegment_allocIndex = freeAllocIndexClosestTo(
intoSegment_bitSetAndState,
allocIndexBoundaryForLocalAllocation((int) groupIndex
, intoSegment_isFullCapacity)
, intoSegment_allocCapacity);
intoSegment_bitSetAndState = setAllocBit(
intoSegment_bitSetAndState, intoSegment_allocIndex);
byte tag = (byte) tagBits(hash);
writeEntry(intoSegment,
(long) intoSegment_isFullCapacity,
key, tag, value, groupIndex, dataGroup,
insertionSlotIndexWithinGroup, intoSegment_allocIndex);
intoSegment_currentSize++;
break internalPutLoop;
}
groupIndexStep += 1; // [Quadratic probing]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
// [No break condition in a loop searching for an empty slot]
}
}
continue groupIteration;
}
// Must break from or continue the groupIteration across all paths above.
// Uncommenting the following statement should make the compiler complain about
// "unreachable statement".
}
// Write out iterDataGroup updated at [Empty the iteration slot] places inside
// groupIteration loop.
// The value computed in the beginning of this loop cannot be reused because
// fromSegment_isFullCapacity may change during the execution of this loop in
// [Swap segments].
iterDataGroupOffset =
dataGroupOffset(iterGroupIndex, (long) fromSegment_isFullCapacity);
writeDataGroupAtOffset(fromSegment, iterDataGroupOffset, iterDataGroup);
}
// [Swap segments] if the distribution of entries between the sibling segments is
// significantly skewed towards the intermediate-capacity segment (intoSegment). The code
// within this `if` block should be updated in parallel with the code in [Swap segments]
// inside the [fromSegment iteration] loop above.
if (swappedSegments == 0 && intoSegment_isFullCapacity == 0 &&
intoSegment_currentSize >=
MIN_ENTRIES_IN_INTERMEDIATE_CAPACITY_SEGMENT_AFTER_SPLIT_FOR_SWAPPING) {
swappedSegments = siblingSegmentsQualificationBit;
// makeSpaceAndInsert[Second route] guarantees that only full-capacity segments are
// split:
int fromSegment_allocCapacity = SEGMENT_MAX_ALLOC_CAPACITY;
fromSegment_bitSetAndState =
InterleavedSegments.swapContentsDuringSplit(
fromSegment, fromSegment_bitSetAndState,
intoSegment, intoSegment_bitSetAndState);
// [Swap fromSegment and intoSegment variables]
{
intoSegment_bitSetAndState = fromSegment_bitSetAndState;
// The updated fromSegment_bitSetAndState is written into bitSetAndState field of
// fromSegment in the call to InterleavedSegments.swapContentsDuringSplit() above.
// See the documentation to that method.
fromSegment_bitSetAndState = intoSegment.bitSetAndState;
Segment tmpSegment = intoSegment;
intoSegment = fromSegment;
fromSegment = tmpSegment;
intoSegment_allocCapacity = fromSegment_allocCapacity;
fromSegment_isFullCapacity = 0;
intoSegment_isFullCapacity = 1;
}
}
// ### Write out bitSetAndStates and outbound overflow counts.
if (swappedSegments == 0) {
fromSegment.setBitSetAndStateAfterBulkOperation(fromSegment_bitSetAndState);
intoSegment.bitSetAndState = intoSegment_bitSetAndState;
} else {
fromSegment.bitSetAndState = fromSegment_bitSetAndState;
intoSegment.setBitSetAndStateAfterBulkOperation(intoSegment_bitSetAndState);
}
subtractOutboundOverflowCountsPerGroupAndUpdateAllGroups(fromSegment,
fromSegment_isFullCapacity,
fromSegment_outboundOverflowCount_perGroupDeductions);
addOutboundOverflowCountsPerGroup(
intoSegment, intoSegment_outboundOverflowCount_perGroupAdditions);
U.storeFence(); // [Safe segment publication]
return swappedSegments;
}
/**
* Precondition for calling this method: {@link #tryEnsureSegmentsArrayCapacityForSplit} is
* called with {@code priorSegmentOrder} as the argument and returned a non-negative result.
*
* The code of this method is almost verbatim copy of {@link #splitInflated}. These methods must
* be changed in parallel.
*/
@AmortizedPerSegment
private void splitBetweenTwoNewSegments(int modCount, Segment oldSegment,
long hash, long oldSegment_bitSetAndState, int priorSegmentOrder) {
// The oldSegment's bitSetAndState is never reset back to an operational value after this
// statement.
oldSegment.bitSetAndState =
makeBulkOperationPlaceholderBitSetAndState(oldSegment_bitSetAndState);
// ### Creating two result segments and replacing references in segmentsArray to the
// ### old segment with references to the the result segments.
int resultSegmentsOrder = priorSegmentOrder + 1;
int resultSegmentsAllocCapacity = getInitialSegmentAllocCapacity(resultSegmentsOrder);
// [Publishing result segments before population]
// [SegmentOne/SegmentTwo naming]
int firstIndexOfResultSegmentOne =
firstSegmentIndexByHashAndOrder(hash, resultSegmentsOrder);
Segment resultSegmentOne =
createNewSegment(resultSegmentsAllocCapacity, resultSegmentsOrder);
Object[] segmentsArray = getNonNullSegmentsArrayOrThrowCme();
boolean oldSegment_isFullCapacitySegment = true;
replaceInSegmentsArray(
segmentsArray, firstIndexOfResultSegmentOne, resultSegmentsOrder, resultSegmentOne
, oldSegment_isFullCapacitySegment);
modCount++; // Matches the modCount field increment performed in replaceInSegmentsArray().
int firstIndexOfResultSegmentTwo =
siblingSegmentIndex(firstIndexOfResultSegmentOne, resultSegmentsOrder);
Segment resultSegmentTwo =
createNewSegment(resultSegmentsAllocCapacity, resultSegmentsOrder);
replaceInSegmentsArray(
segmentsArray, firstIndexOfResultSegmentTwo, resultSegmentsOrder, resultSegmentTwo
, oldSegment_isFullCapacitySegment);
modCount++; // Matches the modCount field increment performed in replaceInSegmentsArray().
// [Checking modCount before actual split of entries]
checkModCountOrThrowCme(modCount);
doSplitBetweenTwoNewSegments(oldSegment);
}
@AmortizedPerSegment
private void doSplitBetweenTwoNewSegments(Segment oldSegment) {
int numMovedEntries = 0;
// [Branchless hash table iteration]
// [Int-indexed loop to avoid a safepoint poll]
for (int iterGroupIndex = 0; iterGroupIndex < HASH_TABLE_GROUPS; iterGroupIndex++) {
long iterDataGroup = FullCapacitySegment.
readDataGroup(oldSegment, (long) iterGroupIndex);
// [groupIteration]
for (long iterBitMask = matchFull(iterDataGroup);
iterBitMask != 0L;
iterBitMask = clearLowestSetBit(iterBitMask)) {
long oldSegment_allocIndex;
// [Inlined lowestMatchingSlotIndex]
int iterTrailingZeros = Long.numberOfTrailingZeros(iterBitMask);
oldSegment_allocIndex = extractAllocIndex(iterDataGroup, iterTrailingZeros);
long oldSegment_allocOffset = FullCapacitySegment.
allocOffset(oldSegment_allocIndex);
K key = readKeyAtOffset(oldSegment, oldSegment_allocOffset);
long hash = keyHashCode(key);
V value = readValueAtOffset(oldSegment, oldSegment_allocOffset);
// [Publishing result segments before population] explains why we are using "public"
// internalPutIfAbsentDuringSplit() method here.
internalPutIfAbsentDuringSplit(key, hash, value);
numMovedEntries++;
}
}
// Restoring the correct size after calling putImpl() with entries that are already in the
// map in the loop above.
size = size - (long) numMovedEntries;
}
/** Invariant before calling this method: oldSegment's size is equal to the capacity. */
@RarelyCalledAmortizedPerSegment
private void inflateAndInsert(int modCount, int segmentOrder, Segment oldSegment,
long bitSetAndState, K key, long hash, V value) {
// The old segment's bitSetAndState is never reset back to an operational value after this
// statement.
oldSegment.bitSetAndState = makeBulkOperationPlaceholderBitSetAndState(bitSetAndState);
InflatedSegment inflatedSegment = new InflatedSegment<>(segmentOrder, size);
oldSegment.copyEntriesDuringInflate(this, inflatedSegment);
int firstSegmentIndex = firstSegmentIndexByHashAndOrder(hash, segmentOrder);
// The logic of makeSpaceAndInsert() guarantees that inflateAndInsert() is called only with
// full-capacity segments.
boolean oldSegment_isFullCapacity = true;
replaceInSegmentsArray(getNonNullSegmentsArrayOrThrowCme(),
firstSegmentIndex, segmentOrder, inflatedSegment
, oldSegment_isFullCapacity);
modCount++; // Matches the modCount field increment performed in replaceInSegmentsArray().
if (inflatedSegment.put(this, key, hash, value, true /* onlyIfAbsent */) != null) {
throw new ConcurrentModificationException();
}
// Matches the modCount field increment performed in InflatedSegment.put(). We don't expect
// deflateSmall() or splitInflated() to be triggered during this put (they would mean that
// some updates to the map have been happening concurrently with this inflateAndInsert()
// call), so if they are called and cause an extra modCount increment we expectedly throw
// a ConcurrentModificationException in the subsequent check.
modCount++;
checkModCountOrThrowCme(modCount);
}
//endregion
//region removeAtSlot() and shrinking methods called from it
/**
* Returns the updated bitSetAndState, but doesn't update {@link Segment#bitSetAndState} field.
* Callers should call {@link Segment#setBitSetAndState} themselves with the value returned from
* this method.
*
* @apiNote allocOffset is passed to this method along with allocIndex because the computation
* of the former is expensive in {@link InterleavedSegments}. However, when the computation is
* cheap (in {@link ContinuousSegments}), this likely makes more harm than good because of high
* register pressure and the necessity to push the values to and pull from the stack. See a
* similar trade-off in {@link #insert} and {@link #makeSpaceAndInsert}.
* TODO don't pass allocOffset for Continuous segments
* (and leave a to-do to compare perf in that case)
*/
@HotPath
final long removeAtSlotNoShrink(long bitSetAndState, Object segment,
int isFullCapacitySegment,
long outboundOverflowCount_perGroupDecrements, long dataGroupOffset,
long dataGroupWithEmptiedSlot, long allocIndex, long allocOffset) {
decrementSize();
if (outboundOverflowCount_perGroupDecrements != 0) { // Unlikely branch
decrementOutboundOverflowCountsPerGroup(segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupDecrements);
}
bitSetAndState = clearAllocBit(bitSetAndState, allocIndex);
writeDataGroupAtOffset(segment, dataGroupOffset, dataGroupWithEmptiedSlot);
eraseKeyAndValueAtOffset(segment, allocOffset);
return bitSetAndState;
}
@HotPath
private void removeAtSlot(long hash, Object segment,
int isFullCapacitySegment,
long outboundOverflowCount_perGroupDecrements, long dataGroupOffset,
long dataGroupWithEmptiedSlot, long allocIndex, long allocOffset) {
long bitSetAndState = getBitSetAndState(segment);
bitSetAndState = removeAtSlotNoShrink(bitSetAndState, segment,
isFullCapacitySegment,
outboundOverflowCount_perGroupDecrements, dataGroupOffset, dataGroupWithEmptiedSlot,
allocIndex, allocOffset);
setBitSetAndState(segment, bitSetAndState);
if (doShrink) {
tryShrink1(hash, segment, bitSetAndState);
}
}
/**
* tryShrink1() makes one guard check and calls {@link #tryShrink2}. This is not just a part of
* the if block in {@link #removeAtSlot}, because if {@link #doShrink} is false (that is the
* default), tryShrink1() is never called and not compiled, that makes {@link #removeAtSlot}'s
* bytecode and compiled code size smaller, that is better for JIT, makes more likely that
* {@link #removeAtSlot} is inlined itself, and better for instruction cache.
*/
@HotPath
private void tryShrink1(long hash, Object segment, long bitSetAndState) {
int segmentSize = segmentSize(bitSetAndState);
// [Positive likely branch]
if (segmentSize >
// Warning: this formula is valid only as long as both SEGMENT_MAX_ALLOC_CAPACITY
// and MIN_LEFTOVER_ALLOC_CAPACITY_AFTER_SHRINKING are even.
(SEGMENT_MAX_ALLOC_CAPACITY - MIN_LEFTOVER_ALLOC_CAPACITY_AFTER_SHRINKING) / 2) {
//noinspection UnnecessaryReturnStatement
return;
} else {
// This branch is taken when segmentSize <=
// (SEGMENT_MAX_ALLOC_CAPACITY - MIN_LEFTOVER_ALLOC_CAPACITY_AFTER_SHRINKING) / 2 = 22.
// When this condition is met symmetrically both sibling segments it's guaranteed they
// can shrink into one of them with capacity SEGMENT_MAX_ALLOC_CAPACITY with
// MIN_LEFTOVER_ALLOC_CAPACITY_AFTER_SHRINKING = 4 extra leftover capacity after
// shrinking (see the tryShrink3[1] condition).
// Assumption that one of the siblings has the capacity of SEGMENT_MAX_ALLOC_CAPACITY:
// In a pair of two sibling segments at least one should have the capacity of
// SEGMENT_MAX_ALLOC_CAPACITY. There are two exceptions when this may not be the case:
// (1) splitInflated() may produce two intermediate-capacity segments. This should be
// very rare because inflated segments themselves should be rare.
// (2) After shrinkAndTrimToSize(), the sibling segments can be arbitrarily sized. This
// is not a target case for optimization because after shrinkAndTrimToSize()
// SmoothieMap is assumed to be used as an immutable map, which is the point of
// shrinkAndTrimToSize().
// Correctness is preserved in both of these cases in tryShrink3[1] condition. It's just
// that tryShrink2() would be entered unnecessarily and relatively expensive
// computations (including reading from the sibling segment) is done until tryShrink3[1]
// condition. But that's OK because it is either happen rarely (1) or on unconventional
// use of SmoothieMap (2). The alternative to comparing segmentSize with a constant
// in tryShrink1() that could probably cover the cases (1) and (2) is extracting
// segment's alloc capacity from bitSetAndState and comparing segmentSize with
// (allocCapacity - MIN_LEFTOVER_ALLOC_CAPACITY_AFTER_SHRINKING) / 2, or something like
// that. However, this approach has its own complications (we cannot simply apply the
// formula above to intermediate alloc capacity, that would lead to inadequately low
// threshold for entering tryShrink2()) and is more computationally expensive. So it's
// not worthwhile.
// When the current segment is the sole segment in the SmoothieMap, we may enter
// tryShrink2() needlessly which is a deliberate choice: see
// [Not shrinking the sole segment].
@SuppressWarnings("unchecked") Segment seg = (Segment) segment;
// The probability of taking this branch is low, so tryShrink2() should not be inlined.
tryShrink2(seg, bitSetAndState, hash);
}
}
/**
* SegmentOne/SegmentTwo naming: in this method, "segmentOne" and "segmentTwo" are being shrunk.
* This unusual naming is chosen instead of calling segments "first" and "second" to avoid a
* confusing perception of first/second segments, as determined by which one is associated with
* lower ranges of hash codes, and the concept of "first segment index" which is the smallest
* index in {@link #segmentsArray} some segment is stored at.
*/
@AmortizedPerSegment
private void tryShrink2(Segment segmentOne, long segmentOne_bitSetAndState, long hash) {
if (isBulkOperationPlaceholderBitSetAndState(segmentOne_bitSetAndState)) {
// This segment is already being shrunk from a racing thread.
throw new ConcurrentModificationException();
}
int segmentOneOrder = segmentOrder(segmentOne_bitSetAndState);
// Guard in a non-HotPath method: the following branch is unlikely, but it is made
// positive to reduce nesting in the rest of the method, contrary to the
// [Positive likely branch] principle, since performance is not that critically important in
// tryShrink2(), because it is only @AmortizedPerSegment, not @HotPath.
if (segmentOneOrder == 0) { // Unlikely branch
// Not shrinking the sole segment: if the segment has the order 0, it's the sole segment
// in the SmoothieMap, so it doesn't have a sibling to be merged with. Making this check
// in tryShrink2() is better for the case when it is false (that is, there are more than
// one segment in a SmoothieMap), because the hot tryShrink1() method therefore contains
// less code, and worse when this check is actually true, i. e. there is just one
// segment in a SmoothieMap, because tryShrink2() could then potentially be called
// frequently (unless inlined into tryShrink2()). It is chosen to favor the first case,
// because SmoothieMap's target optimization case is when it has more than one segment.
return;
}
// Segment index re-computation: it's possible to pass segmentIndex downstream from
// SmoothieMap's methods instead of recomputing it here, but it doesn't probably worth that
// to store an extra value on the stack, because tryShrink2() is called rarely even if
// shrinking is enabled.
int firstSegmentOneIndex = firstSegmentIndexByHashAndOrder(hash, segmentOneOrder);
@SuppressWarnings("unused")
int modCountChange = tryShrink3(
segmentOne, segmentOne_bitSetAndState, segmentOneOrder, firstSegmentOneIndex);
}
/**
* Returns the total {@link #modCount} change during the operation. Value 0 means no shrinking
* was done.
*/
@AmortizedPerSegment
private int tryShrink3(Segment segmentOne, long segmentOne_bitSetAndState,
int segmentOneOrder, int firstSegmentOneIndex) {
final int originalModCount = getModCountOpaque();
int modCount = originalModCount;
int firstSegmentTwoIndex = siblingSegmentIndex(firstSegmentOneIndex, segmentOneOrder);
Object[] segmentsArray = getNonNullSegmentsArrayOrThrowCme();
// Get segmentTwo and deflate if needed.
Segment segmentTwo = segmentCheckedByIndex(segmentsArray, firstSegmentTwoIndex);
if (segmentTwo instanceof InflatedSegment) { // Unlikely branch
int segmentTwoSize = ((InflatedSegment) segmentTwo).delegate.size();
if (!InflatedSegment.shouldDeflateSmall(segmentTwoSize)) {
return 0;
}
deflateSmallWithSegmentIndex((InflatedSegment) segmentTwo, firstSegmentTwoIndex);
modCount++; // Matches the modCount field increment in deflateSmallWithSegmentIndex().
// Re-read the deflated segment from the segmentsArray.
segmentTwo = segmentCheckedByIndex(segmentsArray, firstSegmentTwoIndex);
}
long segmentTwo_bitSetAndState = segmentTwo.bitSetAndState;
int segmentTwoOrder = segmentOrder(segmentTwo_bitSetAndState);
if (segmentTwoOrder == segmentOneOrder) { // [Positive likely branch]
int segmentOneSize = segmentSize(segmentOne_bitSetAndState);
int segmentTwoSize = segmentSize(segmentTwo_bitSetAndState);
int sizeAfterShrinking = segmentOneSize + segmentTwoSize;
int segmentOne_allocCapacity = allocCapacity(segmentOne_bitSetAndState);
int segmentTwo_allocCapacity = allocCapacity(segmentTwo_bitSetAndState);
int maxAllocCapacity = max(segmentOne_allocCapacity, segmentTwo_allocCapacity);
// This branch is not guaranteed to be taken by the condition in tryShrink1() because
// tryShrink1() concerns only a single sibling segment. Upon entering tryShrink3() it
// may discover that the sibling is still too large for shrinking. Another reason why
// this condition might not be taken is that the
// [Assumption that one of the siblings has the capacity of SEGMENT_MAX_ALLOC_CAPACITY]
// is false.
if (sizeAfterShrinking + MIN_LEFTOVER_ALLOC_CAPACITY_AFTER_SHRINKING <=
maxAllocCapacity) { // (1)
int fromSegment_firstIndex;
Segment fromSegment;
long fromSegment_bitSetAndState;
int fromSegmentSize;
Segment intoSegment;
long intoSegment_bitSetAndState;
// Randomized choice of the segment to shrink into: although tryShrink3() should
// appear to be called from tryShrink2() while performing a removal from the
// lower-index or the higher-index (0 or 1 in (order - 1)th bit) segment randomly
// (unless entries are removed from the SmoothieMap is a specially crafted order),
// tryShrink3() is called from MutableSmoothieIterator always with the higher-index
// segment. Therefore always shrinking segments with equal capacity
// (segmentOne_allocCapacity == segmentTwo_allocCapacity) into segmentOne can cause
// a bias in how entries are arranged in the shrunk segments. `+ ((int) size & 1)`
// inserts pseudo-randomness in whether we are shrinking into the lower-index or the
// higher-index segment when they are of equal capacity, yet actually preserves
// determinism, which is valuable in tests. (Compare with iterGroupIndexStart
// variable in doSplit().)
if (segmentOne_allocCapacity + ((int) size & 1) <= segmentTwo_allocCapacity) {
fromSegment_firstIndex = firstSegmentOneIndex;
fromSegment = segmentOne;
fromSegment_bitSetAndState = segmentOne_bitSetAndState;
fromSegmentSize = segmentOneSize;
intoSegment = segmentTwo;
intoSegment_bitSetAndState = segmentTwo_bitSetAndState;
} else {
fromSegment_firstIndex = firstSegmentTwoIndex;
fromSegment = segmentTwo;
fromSegment_bitSetAndState = segmentTwo_bitSetAndState;
fromSegmentSize = segmentTwoSize;
intoSegment = segmentOne;
intoSegment_bitSetAndState = segmentOne_bitSetAndState;
}
// The bitSetAndState is reset back to an operational value in the epilogue of the
// doShrinkInto() method.
intoSegment.bitSetAndState =
makeBulkOperationPlaceholderBitSetAndState(intoSegment_bitSetAndState);
replaceInSegmentsArray(
segmentsArray, fromSegment_firstIndex, segmentOneOrder, intoSegment
, fromSegment instanceof FullCapacitySegment);
// Matches the modCount field increment performed in replaceInSegmentsArray().
modCount++;
doShrinkInto(fromSegment, fromSegment_bitSetAndState, intoSegment,
intoSegment_bitSetAndState);
// fromSegmentSize is the number of modCount increments that should have been done
// in doShrinkInto().
modCount += fromSegmentSize;
// Check the modCount after both bulky operations performed above:
// replaceInSegmentsArray() and doShrinkInto().
checkModCountOrThrowCme(modCount);
// Fall through to [Common return statement]
} else {
// sizeAfterShrinking is not small enough yet.
// Fall through to [Common return statement]
}
} else if (segmentTwoOrder > segmentOneOrder) {
// The ranges of hash codes sibling to the ranges associated with segmentOne are
// associated with multiple segments, i. e. they are "split deeper" than segmentOne.
// Those multiple segments should be shrunk themselves first before it's possible to
// merge them with segmentOne.
// Fall through to [Common return statement]
} else {
// If the order of segmentTwo is observed to be lower than the order of segmentOne,
// it should be already shrinking in a racing thread.
throw new ConcurrentModificationException();
}
// Common return statement:
return modCount - originalModCount;
}
/**
* fromSegment's bitSetAndState is updated to fully valid value inside this method, including
* decrementing the segment order.
*/
@AmortizedPerSegment
private void doShrinkInto(Segment fromSegment, final long fromSegment_bitSetAndState,
Segment intoSegment, long intoSegment_bitSetAndState) {
int intoSegment_allocCapacity = allocCapacity(intoSegment_bitSetAndState);
// TODO check that Hotspot compiles this expression into branchless code.
long fromSegment_isFullCapacity =
isFullCapacity(fromSegment_bitSetAndState) ? 1L : 0L;
// TODO check that Hotspot compiles this expression into branchless code.
int intoSegment_isFullCapacity =
intoSegment_allocCapacity == SEGMENT_MAX_ALLOC_CAPACITY ? 1 : 0;
long intoSegment_outboundOverflowCount_perGroupAdditions = 0;
long fromSegment_bitSet = extractBitSetForIteration(fromSegment_bitSetAndState);
// Branchless entries iteration: another option is checking every bit of the bitSet,
// avoiding a relatively expensive call to numberOfLeadingZeros() and extra arithmetic
// operations. But in doShrinkInto() there are expected to be many empty alloc indexes
// (since we are able to shrink two segments into one) that would make a branch with a bit
// checking unpredictable. This is a tradeoff similar to [Branchless hash table iteration].
// TODO for Continuous segments, compare the approaches described above.
// TODO for Interleaved segments, the cost of branchless iteration is exacerbated by an
// expensive allocOffset() call.
// Backward entries iteration: should be a little cheaper than forward iteration because
// the loop condition compares iterAllocIndex with 0 (which compiles into less machine μops
// when a flag is checked just after a subtraction) and because it uses numberOfLeadingZeros
// rather than numberOfTrailingZeros: on AMD chips at least up to Ryzen, LZCNT is cheaper
// than TZCNT, see https://www.agner.org/optimize/instruction_tables.pdf.
// [Int-indexed loop to avoid a safepoint poll]. A safepoint poll might be still inserted by
// some JVMs because this is not a conventional counted loop.
for (int iterAllocIndexStep = Long.numberOfLeadingZeros(fromSegment_bitSet) + 1,
iterAllocIndex = Long.SIZE;
(iterAllocIndex -= iterAllocIndexStep) >= 0;) {
long iterAllocOffset = allocOffset((long) iterAllocIndex
, fromSegment_isFullCapacity);
K key = readKeyAtOffset(fromSegment, iterAllocOffset);
V value = readValueAtOffset(fromSegment, iterAllocOffset);
// TODO check what is better - these two statements before or after
// the internal put operation, or one before and one after, or both after?
fromSegment_bitSet = fromSegment_bitSet << iterAllocIndexStep;
iterAllocIndexStep = Long.numberOfLeadingZeros(fromSegment_bitSet) + 1;
// ### [Put the entry into intoSegment]
long hash = keyHashCode(key);
final long baseGroupIndex = baseGroupIndex(hash);
// [Find empty slot]
// TODO [Unbounded search loop]
internalPutLoop:
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ; ) {
long dataGroupOffset = dataGroupOffset(groupIndex
, (long) intoSegment_isFullCapacity);
long dataGroup = readDataGroupAtOffset(intoSegment, dataGroupOffset);
long emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
int insertionSlotIndexWithinGroup =
lowestMatchingSlotIndex(emptyBitMask);
intoSegment_outboundOverflowCount_perGroupAdditions +=
computeOutboundOverflowCount_perGroupChanges(
baseGroupIndex, groupIndex);
int intoSegment_allocIndex = freeAllocIndexClosestTo(
intoSegment_bitSetAndState,
allocIndexBoundaryForLocalAllocation((int) groupIndex
, intoSegment_isFullCapacity)
, intoSegment_allocCapacity
);
intoSegment_bitSetAndState = setAllocBit(
intoSegment_bitSetAndState, intoSegment_allocIndex);
if (intoSegment_allocIndex >= intoSegment_allocCapacity) {
// Hash table overflow is possible if there are put operations
// concurrent with this doShrinkInto() operation, or there is a racing
// doShrinkInto() operation.
throw new ConcurrentModificationException();
}
byte tag = (byte) tagBits(hash);
writeEntry(intoSegment,
(long) intoSegment_isFullCapacity,
key, tag, value, groupIndex, dataGroup, insertionSlotIndexWithinGroup,
intoSegment_allocIndex);
break internalPutLoop;
}
groupIndexStep += 1; // [Quadratic probing]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
// [No break condition in a loop searching for an empty slot]
}
}
// ### Update intoSegment's bitSetAndState and outbound overflow counts.
intoSegment_bitSetAndState -= SEGMENT_ORDER_UNIT;
intoSegment.setBitSetAndStateAfterBulkOperation(intoSegment_bitSetAndState);
addOutboundOverflowCountsPerGroup(
intoSegment, intoSegment_outboundOverflowCount_perGroupAdditions);
}
//endregion
//region Methods that replace inflated segments: deflate or split them
/**
* This method is called to deflate an inflated segment that became small and now an ordinary
* segment can hold all it's entries.
* @param hash a hash code of some entry belonging to the given segment. It allows to determine
* the index(es) of the given segment in {@link #segmentsArray}.
*/
@RarelyCalledAmortizedPerSegment
private void deflateSmall(long hash, InflatedSegment inflatedSegment) {
int segmentOrder = segmentOrder(inflatedSegment.bitSetAndState);
int firstSegmentIndex = firstSegmentIndexByHashAndOrder(hash, segmentOrder);
deflateSmallWithSegmentIndex(inflatedSegment, firstSegmentIndex);
}
/** @see #deflateSmall */
private void deflateSmallWithSegmentIndex(
InflatedSegment inflatedSegment, int firstSegmentIndex) {
int modCount = getModCountOpaque();
// The inflated segment's bitSetAndState is never reset back to an operational value after
// this statement.
long inflatedSegment_bitSetAndState =
inflatedSegment.replaceBitSetAndStateWithBulkOperationPlaceholderOrThrowCme();
int segmentOrder = segmentOrder(inflatedSegment_bitSetAndState);
int deflatedSegment_allocCapacity = SEGMENT_MAX_ALLOC_CAPACITY;
Segment deflatedSegment =
allocateNewSegmentWithoutSettingBitSetAndSet(deflatedSegment_allocCapacity);
doDeflateSmall(
segmentOrder, inflatedSegment, deflatedSegment, deflatedSegment_allocCapacity);
// storeFence() is called inside doDeflateSmall() to make publishing of deflatedSegment
// safe.
boolean inflatedSegment_isFullCapacitySegment = false;
replaceInSegmentsArray(getNonNullSegmentsArrayOrThrowCme(), firstSegmentIndex, segmentOrder,
deflatedSegment
, inflatedSegment_isFullCapacitySegment);
modCount++; // Matches the modCount field increment performed in replaceInSegmentsArray().
checkModCountOrThrowCme(modCount);
}
/**
* Returns the number of entries moved from the inflated segment into an ordinary segment.
*
* intoSegment_allocCapacity could have not been passed and {@link #SEGMENT_MAX_ALLOC_CAPACITY}
* used instead, but doing this to make the choice of intoSegment_allocCapacity the sole
* responsibility of {@link #deflateSmall}.
*/
@RarelyCalledAmortizedPerSegment
private void doDeflateSmall(int segmentOrder, InflatedSegment inflatedSegment,
Segment intoSegment, int intoSegment_allocCapacity) {
int intoSegment_currentSize = 0;
long intoSegment_outboundOverflowCount_perGroupAdditions = 0;
long intoSegment_bitSetAndState =
makeNewBitSetAndState(intoSegment_allocCapacity, segmentOrder);
for (Node node : inflatedSegment.delegate.keySet()) {
K key = node.getKey();
V value = node.getValue();
// Possibly wrong hash from InflatedSegment's node: if there are concurrent
// modifications, this might be a hash not corresponding to the read key. We are
// tolerating that because it doesn't make sense to recompute keyHashCode(key) and
// compare it with the stored hash, then it's easier just to use keyHashCode(key)
// directly. Using a wrong hash might make the entry undiscoverable during subsequent
// operations with the SmoothieMap (i. e. effectively could lead to a memory leak), but
// nothing worse than that.
long hash = node.hash;
// ### [Put the entry into intoSegment]
final long baseGroupIndex = baseGroupIndex(hash);
// [Find empty slot]
internalPutLoop:
for (long groupIndex = baseGroupIndex, groupIndexStep = 0; ; ) {
long dataGroup = FullCapacitySegment.
readDataGroup(intoSegment, groupIndex);
long emptyBitMask = matchEmpty(dataGroup);
if (emptyBitMask != 0) { // [Positive likely branch]
int insertionSlotIndexWithinGroup =
lowestMatchingSlotIndex(emptyBitMask);
intoSegment_outboundOverflowCount_perGroupAdditions +=
computeOutboundOverflowCount_perGroupChanges(
baseGroupIndex, groupIndex);
int intoSegment_allocIndex = freeAllocIndexClosestTo(
intoSegment_bitSetAndState,
// Using FullCapacitySegment's allocIndexBoundaryForLocalAllocation()
// method here rather than InterleavedSegment's because intoSegment is
// guaranteed to be a full-capacity segment. See a check in the
// beginning of this method.
FullCapacitySegment.
allocIndexBoundaryForLocalAllocation((int) groupIndex)
, intoSegment_allocCapacity
);
intoSegment_bitSetAndState = setAllocBit(
intoSegment_bitSetAndState, intoSegment_allocIndex);
if (intoSegment_allocIndex >= intoSegment_allocCapacity) {
// This is possible if entries are added to the inflated segment
// concurrently with the deflation.
throw new ConcurrentModificationException();
}
byte tag = (byte) tagBits(hash);
FullCapacitySegment.writeEntry(
intoSegment, key, tag, value, groupIndex, dataGroup,
insertionSlotIndexWithinGroup, intoSegment_allocIndex);
break internalPutLoop;
}
groupIndexStep += 1; // [Quadratic probing]
groupIndex = addGroupIndex(groupIndex, groupIndexStep);
// [No break condition in a loop searching for an empty slot]
}
intoSegment_currentSize++;
// Unlike in other similar procedures, don't increment modCount here because
// intoSegment is not yet published to segmentsArray.
}
// Update intoSegment's bitSetAndState and outbound overflow counts.
intoSegment.bitSetAndState = intoSegment_bitSetAndState;
int intoSegment_isFullCapacity = 1;
// No point in specializing addOutboundOverflowCountsPerGroup() for a full-capacity segment
// because we are in a RarelyCalledAmortizedPerSegment method.
addOutboundOverflowCountsPerGroup(
intoSegment, intoSegment_outboundOverflowCount_perGroupAdditions);
U.storeFence(); // [Safe segment publication]
}
@RarelyCalledAmortizedPerSegment
private void replaceInflatedWithEmptyOrdinary(
int segmentIndex, InflatedSegment inflatedSegment) {
// The inflated segment's bitSetAndState is never reset back to an operational value after
// this statement.
long inflatedSegment_bitSetAndState =
inflatedSegment.replaceBitSetAndStateWithBulkOperationPlaceholderOrThrowCme();
int segmentOrder = segmentOrder(inflatedSegment_bitSetAndState);
int ordinarySegmentAllocCapacity = getInitialSegmentAllocCapacity(segmentOrder);
Segment