All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.alicep.collect.CompactSet Maven / Gradle / Ivy

/*
 * Copyright 2016 Chris Purcell. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.alicep.collect;

import static com.google.common.base.MoreObjects.firstNonNull;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;

import java.io.IOException;
import java.io.Serializable;
import java.io.StreamCorruptedException;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;

/**
 * 

Hash table and array implementation of the {@link Set} interface, * with predictable iteration order. This implementation is similar to * {@link LinkedHashSet}, but uses a more compact memory representation * originally pioneered by PyPy, and subsequently * adopted in Python 3.6. * *

This class provides all of the optional Set operations, and * permits null elements. Like {@link HashSet}, it provides constant-time * performance for the basic operations (add, contains and * remove), assuming the hash function disperses elements * properly among the buckets. Performance is typically within 5% of * HashSet, with only around a third of the memory overhead * (a quarter of LinkedHashSet, and close to a plain {@link ArrayList}). * *

Unlike HashSet and LinkedHashSet, this class does not cache the * {@linkplain Object#hashCode() hash code value} of its elements, as this is typically * redundant: numeric types are a trivial transformation, while Strings already cache * their hash values. This may however result in a significant negative performance * impact if element hashCode/equality checks are expensive. * *

Note that this implementation is not synchronized. * If multiple threads access a compact set concurrently, and at least * one of the threads modifies the set, it must be synchronized * externally. This is typically accomplished by synchronizing on some * object that naturally encapsulates the set. * * If no such object exists, the set should be "wrapped" using the * {@link Collections#synchronizedSet Collections.synchronizedSet} * method. This is best done at creation time, to prevent accidental * unsynchronized access to the set:

 *   Set<...> s = Collections.synchronizedSet(new CompactSet<>(...));
* *

The iterators returned by this class's iterator method are * fail-fast: if the set is modified at any time after the iterator * is created, in any way except through the iterator's own remove * method, the iterator will throw a {@link ConcurrentModificationException}. * Thus, in the face of concurrent modification, the iterator fails quickly * and cleanly, rather than risking arbitrary, non-deterministic behavior at * an undetermined time in the future. * *

Note that the fail-fast behavior of an iterator cannot be guaranteed * as it is, generally speaking, impossible to make any hard guarantees in the * presence of unsynchronized concurrent modification. Fail-fast iterators * throw ConcurrentModificationException on a best-effort basis. * Therefore, it would be wrong to write a program that depended on this * exception for its correctness: the fail-fast behavior of iterators * should be used only to detect bugs. * *

Implementation Details

* *

This section covers the current implementation; future releases may change * some or all of the details. * *

This implementation stores all elements in an insertion-ordered * array. Lookup is done via a compressed hashtable of indices, using * double hashing * to reduce collisions. Small sets achieve a higher compression rate, * as lookup indexes require fewer bits; a newly-allocated instance * needs only 4 bits per bucket to index the default 10-element array. * As the set grows, the element array grows in the same manner as a * ArrayList, and the index hashtable is regenerated. * *

Iteration performance is similar to ArrayList, though if a lot * of elements are deleted, the element array will not be shrunk, meaning * iteration performance does not recover once a set has been large. * *

Memory overhead is a pointer and a half plus a handful of bits per * element in the set. In contrast, HashSet allocates approximately * five pointers plus 16 bytes per element, while LinkedHashSet * allocates two more pointers on top of that; an ArrayList uses * around a pointer and a half. * * @param the type of elements maintained by this set * * @see Set * @see HashSet * @see LinkedHashSet */ public class CompactSet extends AbstractSet implements Serializable { private enum Reserved { NULL } private static final int NO_INDEX = -1; private static final int DEFAULT_CAPACITY = 10; private int size = 0; private int modCount = 0; private Object[] objects; private int head = 0; private long[] lookup; /** * Constructs an empty set with an initial capacity of ten. */ public CompactSet() { this(DEFAULT_CAPACITY); } /** * Constructs an empty set with the specified initial capacity. * * @param initialCapacity the initial capacity of the set * @return an empty set * @throws IllegalArgumentException if the specified initial capacity * is negative * * @param the type of elements maintained by the set */ public static CompactSet withInitialCapacity(int initialCapacity) { return new CompactSet<>(initialCapacity); } /** * Constructs a set containing the elements of the specified * collection, in the order they are returned by the collection's * iterator. (If an element is duplicated, only the first instance * will be stored.) * * @param elements the collection whose elements are to be placed into this list * @throws NullPointerException if the specified collection is null */ public CompactSet(Collection elements) { this(elements.size()); addAll(elements); } private CompactSet(int initialCapacity) { checkArgument(initialCapacity >= 0, "initialCapacity must be non-negative"); objects = new Object[Math.max(initialCapacity, DEFAULT_CAPACITY)]; lookup = newLookupArray(); } @Override public int size() { return size; } @Override public Iterator iterator() { return new IteratorImpl(); } @Override public Spliterator spliterator() { return Spliterators.spliterator(this, Spliterator.DISTINCT | Spliterator.ORDERED); } @Override public boolean contains(Object o) { Object comparisonObject = (o == null) ? Reserved.NULL : o; long index = lookup(comparisonObject); return (index >= 0); } @Override public boolean add(E e) { Object insertionObject = firstNonNull(e, Reserved.NULL); // Ensure there is a free cell _before_ looking up index as rehashing invalidates the index. ensureFreeCell(); long lookupIndex = lookup(insertionObject); if (lookupIndex >= 0) { return false; } int index = head++; objects[index] = insertionObject; addLookup((int) -(lookupIndex + 1), index); size++; modCount++; return true; } @Override public boolean remove(Object o) { long index = lookup((o == null) ? Reserved.NULL : o); if (index < 0) { return false; } deleteObjectAtIndex((int) index); return true; } /* Lookup methods */ private static int log2ceil(int value) { return 32 - Integer.numberOfLeadingZeros(value - 1); } private int lookupEntryBits() { return log2ceil(objects.length); } private int lookupEntriesPerLong() { return Long.SIZE / lookupEntryBits(); } private long[] newLookupArray() { // Aim for a power of two with 50% occupancy maximum int numCells = 1 << (log2ceil(objects.length) + 1); while (objects.length * 2 > numCells) { numCells = numCells * 2; } int cellsPerLong = lookupEntriesPerLong(); long[] lookup = new long[1 + (numCells - 1) / cellsPerLong]; Arrays.fill(lookup, -1); return lookup; } private void addLookup(int lookupIndex, int index) { assertState(index != NO_INDEX, "Invalid index"); if (lookupEntryBits() < Long.SIZE) { addLookupNibble(lookupIndex, index); } else { lookup[lookupIndex] = index; } } private long lookupMask() { return (1 << lookupEntryBits()) - 1; } private void addLookupNibble(int lookupIndex, int index) { long word = lookup[lookupIndex / lookupEntriesPerLong()]; int shift = lookupEntryBits() * (lookupIndex % lookupEntriesPerLong()); word &= ~(lookupMask() << shift); word |= (index & lookupMask()) << shift; lookup[lookupIndex / lookupEntriesPerLong()] = word; } /** * If {@code obj} is in the {@code objects} array, returns its index; otherwise, returns * {@code (-(probe insertion point) - 1)}, where "probe insertion point" is * the index of first free cell in {@code lookup} along the probe sequence for {@code obj}. */ private long lookup(Object obj) { int mask = numLookupCells() - 1; int tombstoneIndex = -1; int lookupIndex = obj.hashCode(); int stride = Integer.reverse(lookupIndex) * 2 + 1; lookupIndex &= mask; stride &= mask; int index; while ((index = getLookupAt(lookupIndex)) != NO_INDEX) { Object other = objects[index]; if (other == null) { if (tombstoneIndex == -1) { tombstoneIndex = lookupIndex; } } else if (other.equals(obj)) { return index; } lookupIndex += stride; lookupIndex &= mask; } if (tombstoneIndex != -1) { return -tombstoneIndex - 1; } else { return -lookupIndex - 1; } } private int numLookupCells() { return Integer.highestOneBit(lookup.length * lookupEntriesPerLong()); } private int getLookupAt(int lookupIndex) { long word = lookup[lookupIndex / lookupEntriesPerLong()]; int shift = lookupEntryBits() * (lookupIndex % lookupEntriesPerLong()); int value = (int) ((word >> shift) & lookupMask()); return (value == (NO_INDEX & lookupMask())) ? -1 : value; } private void clearLookupArray() { Arrays.fill(lookup, -1); } /* Other internal methods */ private void ensureFreeCell() { if (objects.length == head) { if (size >= minGrowthThreshold()) { int newSize = objects.length + (objects.length >> 1); objects = Arrays.copyOf(objects, newSize); lookup = null; } compact(); } } private void deleteObjectAtIndex(int index) { assertState(objects[index] != null, "Cannot delete empty cell"); assertState(size != 0, "Size is 0 but a cell is not empty"); objects[index] = null; size--; modCount++; } private void compact() { if (lookup == null) { lookup = newLookupArray(); } else { clearLookupArray(); } int target = 0; for (int source = 0; source < objects.length; source++) { Object e = objects[source]; if (e == null) { continue; } if (source != target) { objects[target] = e; } long freeLookupCell = -(lookup(e) + 1); checkState(freeLookupCell >= 0); addLookup((int) freeLookupCell, target); target++; } for (; target < objects.length; target++) { objects[target] = null; } head = size; } private int minGrowthThreshold() { // Grow the objects array if less than a quarter of it is DELETED tombstones when it fills up. return objects.length * 3 / 4; } private static void assertState(boolean condition, String message, Object... args) { if (!condition) { throw new AssertionError(String.format(message, args)); } } /* Serialization */ private static final long serialVersionUID = 0; private void writeObject(java.io.ObjectOutputStream s) throws IOException { s.writeInt(size); for (int i = 0; i < head; ++i) { Object o = objects[i]; if (o != null) { s.writeObject(o == Reserved.NULL ? null : o); } } } private void readObject(java.io.ObjectInputStream s) throws IOException, ClassNotFoundException { size = s.readInt(); objects = new Object[Math.max(size, DEFAULT_CAPACITY)]; lookup = newLookupArray(); clearLookupArray(); for (head = 0; head < size; head++) { Object e = firstNonNull(s.readObject(), Reserved.NULL); objects[head] = e; long x = lookup(e); long freeLookupCell = -(x + 1); if (freeLookupCell < 0) { throw new StreamCorruptedException("Duplicate data found in serialized set"); } addLookup((int) freeLookupCell, head); } } /* Iteration */ private class IteratorImpl implements Iterator { private int expectedModCount; private int index; private int nextIndex; IteratorImpl() { expectedModCount = modCount; index = -1; nextIndex = 0; while (nextIndex < head && objects[nextIndex] == null) { nextIndex++; } } @Override public boolean hasNext() { if (modCount != expectedModCount) { throw new ConcurrentModificationException(); } return nextIndex < head; } @Override public E next() { if (!hasNext()) { throw new NoSuchElementException(); } index = nextIndex; do { nextIndex++; } while (nextIndex < head && objects[nextIndex] == null); @SuppressWarnings("unchecked") E o = (E) objects[index]; if (o == null) { throw new ConcurrentModificationException(); } return (o == Reserved.NULL) ? null : o; } @Override public void remove() { checkState(index != -1); if (modCount != expectedModCount) { throw new ConcurrentModificationException(); } deleteObjectAtIndex(index); index = -1; expectedModCount = modCount; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy