All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dimajix.shaded.guava.collect.CompactHashSet Maven / Gradle / Ivy

There is a newer version: 1.2.0-synapse3.3-spark3.3-hadoop3.3
Show newest version
/*
 * Copyright (C) 2012 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dimajix.shaded.guava.collect;

import static com.dimajix.shaded.guava.base.Preconditions.checkNotNull;
import static com.dimajix.shaded.guava.collect.CollectPreconditions.checkRemove;
import static com.dimajix.shaded.guava.collect.CompactHashing.UNSET;
import static com.dimajix.shaded.guava.collect.Hashing.smearedHash;
import static java.util.Objects.requireNonNull;

import com.dimajix.shaded.guava.annotations.GwtIncompatible;
import com.dimajix.shaded.guava.annotations.VisibleForTesting;
import com.dimajix.shaded.guava.base.Objects;
import com.dimajix.shaded.guava.base.Preconditions;
import com.dimajix.shaded.guava.primitives.Ints;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.io.IOException;
import java.io.InvalidObjectException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.AbstractSet;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.function.Consumer;
import javax.annotation.CheckForNull;
import org.checkerframework.checker.nullness.qual.Nullable;

/**
 * CompactHashSet is an implementation of a Set. All optional operations (adding and removing) are
 * supported. The elements can be any objects.
 *
 * 

{@code contains(x)}, {@code add(x)} and {@code remove(x)}, are all (expected and amortized) * constant time operations. Expected in the hashtable sense (depends on the hash function doing a * good job of distributing the elements to the buckets to a distribution not far from uniform), and * amortized since some operations can trigger a hash table resize. * *

Unlike {@code java.util.HashSet}, iteration is only proportional to the actual {@code size()}, * which is optimal, and not the size of the internal hashtable, which could be much larger * than {@code size()}. Furthermore, this structure only depends on a fixed number of arrays; {@code * add(x)} operations do not create objects for the garbage collector to deal with, and for * every element added, the garbage collector will have to traverse {@code 1.5} references on * average, in the marking phase, not {@code 5.0} as in {@code java.util.HashSet}. * *

If there are no removals, then {@link #iterator iteration} order is the same as insertion * order. Any removal invalidates any ordering guarantees. * *

This class should not be assumed to be universally superior to {@code java.util.HashSet}. * Generally speaking, this class reduces object allocation and memory consumption at the price of * moderately increased constant factors of CPU. Only use this class when there is a specific reason * to prioritize memory over CPU. * * @author Dimitris Andreou * @author Jon Noack */ @GwtIncompatible // not worth using in GWT for now @ElementTypesAreNonnullByDefault class CompactHashSet extends AbstractSet implements Serializable { // TODO(user): cache all field accesses in local vars /** Creates an empty {@code CompactHashSet} instance. */ public static CompactHashSet create() { return new CompactHashSet<>(); } /** * Creates a mutable {@code CompactHashSet} instance containing the elements of the given * collection in unspecified order. * * @param collection the elements that the set should contain * @return a new {@code CompactHashSet} containing those elements (minus duplicates) */ public static CompactHashSet create( Collection collection) { CompactHashSet set = createWithExpectedSize(collection.size()); set.addAll(collection); return set; } /** * Creates a mutable {@code CompactHashSet} instance containing the given elements in * unspecified order. * * @param elements the elements that the set should contain * @return a new {@code CompactHashSet} containing those elements (minus duplicates) */ @SafeVarargs public static CompactHashSet create(E... elements) { CompactHashSet set = createWithExpectedSize(elements.length); Collections.addAll(set, elements); return set; } /** * Creates a {@code CompactHashSet} instance, with a high enough "initial capacity" that it * should hold {@code expectedSize} elements without growth. * * @param expectedSize the number of elements you expect to add to the returned set * @return a new, empty {@code CompactHashSet} with enough capacity to hold {@code expectedSize} * elements without resizing * @throws IllegalArgumentException if {@code expectedSize} is negative */ public static CompactHashSet createWithExpectedSize( int expectedSize) { return new CompactHashSet<>(expectedSize); } /** * Maximum allowed false positive probability of detecting a hash flooding attack given random * input. */ @VisibleForTesting( ) static final double HASH_FLOODING_FPP = 0.001; /** * Maximum allowed length of a hash table bucket before falling back to a j.u.LinkedHashSet based * implementation. Experimentally determined. */ private static final int MAX_HASH_BUCKET_LENGTH = 9; // See CompactHashMap for a detailed description of how the following fields work. That // description talks about `keys`, `values`, and `entries`; here the `keys` and `values` arrays // are replaced by a single `elements` array but everything else works similarly. /** * The hashtable object. This can be either: * *

    *
  • a byte[], short[], or int[], with size a power of two, created by * CompactHashing.createTable, whose values are either *
      *
    • UNSET, meaning "null pointer" *
    • one plus an index into the entries and elements array *
    *
  • another java.util.Set delegate implementation. In most modern JDKs, normal java.util hash * collections intelligently fall back to a binary search tree if hash table collisions are * detected. Rather than going to all the trouble of reimplementing this ourselves, we * simply switch over to use the JDK implementation wholesale if probable hash flooding is * detected, sacrificing the compactness guarantee in very rare cases in exchange for much * more reliable worst-case behavior. *
  • null, if no entries have yet been added to the map *
*/ @CheckForNull private transient Object table; /** * Contains the logical entries, in the range of [0, size()). The high bits of each int are the * part of the smeared hash of the element not covered by the hashtable mask, whereas the low bits * are the "next" pointer (pointing to the next entry in the bucket chain), which will always be * less than or equal to the hashtable mask. * *
   * hash  = aaaaaaaa
   * mask  = 00000fff
   * next  = 00000bbb
   * entry = aaaaabbb
   * 
* *

The pointers in [size(), entries.length) are all "null" (UNSET). */ @CheckForNull private transient int[] entries; /** * The elements contained in the set, in the range of [0, size()). The elements in [size(), * elements.length) are all {@code null}. */ @VisibleForTesting @CheckForNull transient @Nullable Object[] elements; /** * Keeps track of metadata like the number of hash table bits and modifications of this data * structure (to make it possible to throw ConcurrentModificationException in the iterator). Note * that we choose not to make this volatile, so we do less of a "best effort" to track such * errors, for better performance. */ private transient int metadata; /** The number of elements contained in the set. */ private transient int size; /** Constructs a new empty instance of {@code CompactHashSet}. */ CompactHashSet() { init(CompactHashing.DEFAULT_SIZE); } /** * Constructs a new instance of {@code CompactHashSet} with the specified capacity. * * @param expectedSize the initial capacity of this {@code CompactHashSet}. */ CompactHashSet(int expectedSize) { init(expectedSize); } /** Pseudoconstructor for serialization support. */ void init(int expectedSize) { Preconditions.checkArgument(expectedSize >= 0, "Expected size must be >= 0"); // Save expectedSize for use in allocArrays() this.metadata = Ints.constrainToRange(expectedSize, 1, CompactHashing.MAX_SIZE); } /** Returns whether arrays need to be allocated. */ @VisibleForTesting boolean needsAllocArrays() { return table == null; } /** Handle lazy allocation of arrays. */ @CanIgnoreReturnValue int allocArrays() { Preconditions.checkState(needsAllocArrays(), "Arrays already allocated"); int expectedSize = metadata; int buckets = CompactHashing.tableSize(expectedSize); this.table = CompactHashing.createTable(buckets); setHashTableMask(buckets - 1); this.entries = new int[expectedSize]; this.elements = new Object[expectedSize]; return expectedSize; } @SuppressWarnings("unchecked") @VisibleForTesting @CheckForNull Set delegateOrNull() { if (table instanceof Set) { return (Set) table; } return null; } private Set createHashFloodingResistantDelegate(int tableSize) { return new LinkedHashSet<>(tableSize, 1.0f); } @VisibleForTesting @CanIgnoreReturnValue Set convertToHashFloodingResistantImplementation() { Set newDelegate = createHashFloodingResistantDelegate(hashTableMask() + 1); for (int i = firstEntryIndex(); i >= 0; i = getSuccessor(i)) { newDelegate.add(element(i)); } this.table = newDelegate; this.entries = null; this.elements = null; incrementModCount(); return newDelegate; } @VisibleForTesting boolean isUsingHashFloodingResistance() { return delegateOrNull() != null; } /** Stores the hash table mask as the number of bits needed to represent an index. */ private void setHashTableMask(int mask) { int hashTableBits = Integer.SIZE - Integer.numberOfLeadingZeros(mask); metadata = CompactHashing.maskCombine(metadata, hashTableBits, CompactHashing.HASH_TABLE_BITS_MASK); } /** Gets the hash table mask using the stored number of hash table bits. */ private int hashTableMask() { return (1 << (metadata & CompactHashing.HASH_TABLE_BITS_MASK)) - 1; } void incrementModCount() { metadata += CompactHashing.MODIFICATION_COUNT_INCREMENT; } @CanIgnoreReturnValue @Override public boolean add(@ParametricNullness E object) { if (needsAllocArrays()) { allocArrays(); } Set delegate = delegateOrNull(); if (delegate != null) { return delegate.add(object); } int[] entries = requireEntries(); @Nullable Object[] elements = requireElements(); int newEntryIndex = this.size; // current size, and pointer to the entry to be appended int newSize = newEntryIndex + 1; int hash = smearedHash(object); int mask = hashTableMask(); int tableIndex = hash & mask; int next = CompactHashing.tableGet(requireTable(), tableIndex); if (next == UNSET) { // uninitialized bucket if (newSize > mask) { // Resize and add new entry mask = resizeTable(mask, CompactHashing.newCapacity(mask), hash, newEntryIndex); } else { CompactHashing.tableSet(requireTable(), tableIndex, newEntryIndex + 1); } } else { int entryIndex; int entry; int hashPrefix = CompactHashing.getHashPrefix(hash, mask); int bucketLength = 0; do { entryIndex = next - 1; entry = entries[entryIndex]; if (CompactHashing.getHashPrefix(entry, mask) == hashPrefix && Objects.equal(object, elements[entryIndex])) { return false; } next = CompactHashing.getNext(entry, mask); bucketLength++; } while (next != UNSET); if (bucketLength >= MAX_HASH_BUCKET_LENGTH) { return convertToHashFloodingResistantImplementation().add(object); } if (newSize > mask) { // Resize and add new entry mask = resizeTable(mask, CompactHashing.newCapacity(mask), hash, newEntryIndex); } else { entries[entryIndex] = CompactHashing.maskCombine(entry, newEntryIndex + 1, mask); } } resizeMeMaybe(newSize); insertEntry(newEntryIndex, object, hash, mask); this.size = newSize; incrementModCount(); return true; } /** * Creates a fresh entry with the specified object at the specified position in the entry arrays. */ void insertEntry(int entryIndex, @ParametricNullness E object, int hash, int mask) { setEntry(entryIndex, CompactHashing.maskCombine(hash, UNSET, mask)); setElement(entryIndex, object); } /** Resizes the entries storage if necessary. */ private void resizeMeMaybe(int newSize) { int entriesSize = requireEntries().length; if (newSize > entriesSize) { // 1.5x but round up to nearest odd (this is optimal for memory consumption on Android) int newCapacity = Math.min(CompactHashing.MAX_SIZE, (entriesSize + Math.max(1, entriesSize >>> 1)) | 1); if (newCapacity != entriesSize) { resizeEntries(newCapacity); } } } /** * Resizes the internal entries array to the specified capacity, which may be greater or less than * the current capacity. */ void resizeEntries(int newCapacity) { this.entries = Arrays.copyOf(requireEntries(), newCapacity); this.elements = Arrays.copyOf(requireElements(), newCapacity); } @CanIgnoreReturnValue private int resizeTable(int oldMask, int newCapacity, int targetHash, int targetEntryIndex) { Object newTable = CompactHashing.createTable(newCapacity); int newMask = newCapacity - 1; if (targetEntryIndex != UNSET) { // Add target first; it must be last in the chain because its entry hasn't yet been created CompactHashing.tableSet(newTable, targetHash & newMask, targetEntryIndex + 1); } Object oldTable = requireTable(); int[] entries = requireEntries(); // Loop over current hashtable for (int oldTableIndex = 0; oldTableIndex <= oldMask; oldTableIndex++) { int oldNext = CompactHashing.tableGet(oldTable, oldTableIndex); while (oldNext != UNSET) { int entryIndex = oldNext - 1; int oldEntry = entries[entryIndex]; // Rebuild hash using entry hashPrefix and tableIndex ("hashSuffix") int hash = CompactHashing.getHashPrefix(oldEntry, oldMask) | oldTableIndex; int newTableIndex = hash & newMask; int newNext = CompactHashing.tableGet(newTable, newTableIndex); CompactHashing.tableSet(newTable, newTableIndex, oldNext); entries[entryIndex] = CompactHashing.maskCombine(hash, newNext, newMask); oldNext = CompactHashing.getNext(oldEntry, oldMask); } } this.table = newTable; setHashTableMask(newMask); return newMask; } @Override public boolean contains(@CheckForNull Object object) { if (needsAllocArrays()) { return false; } Set delegate = delegateOrNull(); if (delegate != null) { return delegate.contains(object); } int hash = smearedHash(object); int mask = hashTableMask(); int next = CompactHashing.tableGet(requireTable(), hash & mask); if (next == UNSET) { return false; } int hashPrefix = CompactHashing.getHashPrefix(hash, mask); do { int entryIndex = next - 1; int entry = entry(entryIndex); if (CompactHashing.getHashPrefix(entry, mask) == hashPrefix && Objects.equal(object, element(entryIndex))) { return true; } next = CompactHashing.getNext(entry, mask); } while (next != UNSET); return false; } @CanIgnoreReturnValue @Override public boolean remove(@CheckForNull Object object) { if (needsAllocArrays()) { return false; } Set delegate = delegateOrNull(); if (delegate != null) { return delegate.remove(object); } int mask = hashTableMask(); int index = CompactHashing.remove( object, /* value= */ null, mask, requireTable(), requireEntries(), requireElements(), /* values= */ null); if (index == -1) { return false; } moveLastEntry(index, mask); size--; incrementModCount(); return true; } /** * Moves the last entry in the entry array into {@code dstIndex}, and nulls out its old position. */ void moveLastEntry(int dstIndex, int mask) { Object table = requireTable(); int[] entries = requireEntries(); @Nullable Object[] elements = requireElements(); int srcIndex = size() - 1; if (dstIndex < srcIndex) { // move last entry to deleted spot Object object = elements[srcIndex]; elements[dstIndex] = object; elements[srcIndex] = null; // move the last entry to the removed spot, just like we moved the element entries[dstIndex] = entries[srcIndex]; entries[srcIndex] = 0; // also need to update whoever's "next" pointer was pointing to the last entry place int tableIndex = smearedHash(object) & mask; int next = CompactHashing.tableGet(table, tableIndex); int srcNext = srcIndex + 1; if (next == srcNext) { // we need to update the root pointer CompactHashing.tableSet(table, tableIndex, dstIndex + 1); } else { // we need to update a pointer in an entry int entryIndex; int entry; do { entryIndex = next - 1; entry = entries[entryIndex]; next = CompactHashing.getNext(entry, mask); } while (next != srcNext); // here, entries[entryIndex] points to the old entry location; update it entries[entryIndex] = CompactHashing.maskCombine(entry, dstIndex + 1, mask); } } else { elements[dstIndex] = null; entries[dstIndex] = 0; } } int firstEntryIndex() { return isEmpty() ? -1 : 0; } int getSuccessor(int entryIndex) { return (entryIndex + 1 < size) ? entryIndex + 1 : -1; } /** * Updates the index an iterator is pointing to after a call to remove: returns the index of the * entry that should be looked at after a removal on indexRemoved, with indexBeforeRemove as the * index that *was* the next entry that would be looked at. */ int adjustAfterRemove(int indexBeforeRemove, @SuppressWarnings("unused") int indexRemoved) { return indexBeforeRemove - 1; } @Override public Iterator iterator() { Set delegate = delegateOrNull(); if (delegate != null) { return delegate.iterator(); } return new Iterator() { int expectedMetadata = metadata; int currentIndex = firstEntryIndex(); int indexToRemove = -1; @Override public boolean hasNext() { return currentIndex >= 0; } @Override @ParametricNullness public E next() { checkForConcurrentModification(); if (!hasNext()) { throw new NoSuchElementException(); } indexToRemove = currentIndex; E result = element(currentIndex); currentIndex = getSuccessor(currentIndex); return result; } @Override public void remove() { checkForConcurrentModification(); checkRemove(indexToRemove >= 0); incrementExpectedModCount(); CompactHashSet.this.remove(element(indexToRemove)); currentIndex = adjustAfterRemove(currentIndex, indexToRemove); indexToRemove = -1; } void incrementExpectedModCount() { expectedMetadata += CompactHashing.MODIFICATION_COUNT_INCREMENT; } private void checkForConcurrentModification() { if (metadata != expectedMetadata) { throw new ConcurrentModificationException(); } } }; } @Override public Spliterator spliterator() { if (needsAllocArrays()) { return Spliterators.spliterator(new Object[0], Spliterator.DISTINCT | Spliterator.ORDERED); } Set delegate = delegateOrNull(); return (delegate != null) ? delegate.spliterator() : Spliterators.spliterator( requireElements(), 0, size, Spliterator.DISTINCT | Spliterator.ORDERED); } @Override public void forEach(Consumer action) { checkNotNull(action); Set delegate = delegateOrNull(); if (delegate != null) { delegate.forEach(action); } else { for (int i = firstEntryIndex(); i >= 0; i = getSuccessor(i)) { action.accept(element(i)); } } } @Override public int size() { Set delegate = delegateOrNull(); return (delegate != null) ? delegate.size() : size; } @Override public boolean isEmpty() { return size() == 0; } @Override public @Nullable Object[] toArray() { if (needsAllocArrays()) { return new Object[0]; } Set delegate = delegateOrNull(); return (delegate != null) ? delegate.toArray() : Arrays.copyOf(requireElements(), size); } @CanIgnoreReturnValue @Override @SuppressWarnings("nullness") // b/192354773 in our checker affects toArray declarations public T[] toArray(T[] a) { if (needsAllocArrays()) { if (a.length > 0) { a[0] = null; } return a; } Set delegate = delegateOrNull(); return (delegate != null) ? delegate.toArray(a) : ObjectArrays.toArrayImpl(requireElements(), 0, size, a); } /** * Ensures that this {@code CompactHashSet} has the smallest representation in memory, given its * current size. */ public void trimToSize() { if (needsAllocArrays()) { return; } Set delegate = delegateOrNull(); if (delegate != null) { Set newDelegate = createHashFloodingResistantDelegate(size()); newDelegate.addAll(delegate); this.table = newDelegate; return; } int size = this.size; if (size < requireEntries().length) { resizeEntries(size); } int minimumTableSize = CompactHashing.tableSize(size); int mask = hashTableMask(); if (minimumTableSize < mask) { // smaller table size will always be less than current mask resizeTable(mask, minimumTableSize, UNSET, UNSET); } } @Override public void clear() { if (needsAllocArrays()) { return; } incrementModCount(); Set delegate = delegateOrNull(); if (delegate != null) { metadata = Ints.constrainToRange(size(), CompactHashing.DEFAULT_SIZE, CompactHashing.MAX_SIZE); delegate.clear(); // invalidate any iterators left over! table = null; size = 0; } else { Arrays.fill(requireElements(), 0, size, null); CompactHashing.tableClear(requireTable()); Arrays.fill(requireEntries(), 0, size, 0); this.size = 0; } } private void writeObject(ObjectOutputStream stream) throws IOException { stream.defaultWriteObject(); stream.writeInt(size()); for (E e : this) { stream.writeObject(e); } } @SuppressWarnings("unchecked") private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { stream.defaultReadObject(); int elementCount = stream.readInt(); if (elementCount < 0) { throw new InvalidObjectException("Invalid size: " + elementCount); } init(elementCount); for (int i = 0; i < elementCount; i++) { E element = (E) stream.readObject(); add(element); } } /* * For discussion of the safety of the following methods, see the comments near the end of * CompactHashMap. */ private Object requireTable() { return requireNonNull(table); } private int[] requireEntries() { return requireNonNull(entries); } private @Nullable Object[] requireElements() { return requireNonNull(elements); } @SuppressWarnings("unchecked") private E element(int i) { return (E) requireElements()[i]; } private int entry(int i) { return requireEntries()[i]; } private void setElement(int i, E value) { requireElements()[i] = value; } private void setEntry(int i, int value) { requireEntries()[i] = value; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy