io.snappydata.collection.OpenHashSet Maven / Gradle / Ivy
Show all versions of snappydata-util Show documentation
/*
* Copyright (c) 2018 SnappyData, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You
* may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License. See accompanying
* LICENSE file.
*/
package io.snappydata.collection;
import java.util.AbstractSet;
import java.util.Collection;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.function.Consumer;
import com.gemstone.gemfire.internal.concurrent.MapCallback;
import com.gemstone.gemfire.internal.concurrent.MapResult;
import com.gemstone.gemfire.internal.shared.ClientResolverUtils;
/**
* An optimized HashSet using open addressing with quadratic probing.
* In micro-benchmarks this is faster in both inserts, deletes and gets,
* as well as mixed workloads than most other HashSet implementations
* generally available in java (JDK HashSet, fastutil HashSets, or Trove's).
*
* It adds additional APIs like {@link #create}, {@link #getKey},
* {@link #addKey} which is the main reason for having this class.
*/
public class OpenHashSet extends AbstractSet
implements Set, Cloneable, java.io.Serializable {
private static final long serialVersionUID = 2837689134511263091L;
public static final Object REMOVED = new Object();
// maximum power of 2 less than Integer.MAX_VALUE
protected static final int MAX_CAPACITY = 1 << 30;
protected final float loadFactor;
protected int size;
protected int occupied;
protected int growThreshold;
protected int mask;
protected Object[] data;
protected Consumer> postRehashHook;
@SuppressWarnings("unused")
public OpenHashSet() {
this(16, 0.7f);
}
public OpenHashSet(int initialCapacity) {
this(initialCapacity, 0.7f);
}
public OpenHashSet(int initialCapacity, float loadFactor) {
final int capacity = nextPowerOf2(initialCapacity);
this.loadFactor = loadFactor;
this.growThreshold = (int)(loadFactor * capacity);
this.mask = capacity - 1;
this.data = new Object[capacity];
}
public OpenHashSet(Collection extends E> c) {
this(c.size());
addAll(c);
}
protected int keyHash(Object key) {
return ClientResolverUtils.fastHashInt(key != null ? key.hashCode() : 0);
}
protected boolean keyEquals(Object mapKey, Object key) {
return mapKey.equals(key);
}
protected final int insertionIndex(final Object[] data, final Object key,
final int hash) {
final int mask = this.mask;
int pos = hash & mask;
// try to fill the REMOVED slot but only if it is a new insertion
// else we need to keep searching for possible existing value
int removedPos = -1;
int delta = 1;
while (true) {
final Object mapKey = data[pos];
if (mapKey != null) {
if (mapKey == REMOVED) {
removedPos = pos;
} else if (keyEquals(mapKey, key)) {
// return already present key position as negative
return -pos - 1;
}
// quadratic probing (increase delta)
pos = (pos + delta) & mask;
delta++;
} else {
final boolean slotIsNull = (removedPos == -1);
// if slot was a REMOVED token then skip incrementing the "occupied" count
if (slotIsNull) {
occupied++;
return pos;
} else {
return removedPos;
}
}
}
}
protected boolean doInsert(final Object[] data, final Object key,
final int pos) {
data[pos] = key;
return handleNewInsert();
}
protected boolean doRemove(final Object[] data, final int pos) {
// mark as deleted
data[pos] = REMOVED;
return handleRemove();
}
protected final int index(final Object[] data, final Object key,
final int hash) {
final int mask = this.mask;
int pos = hash & mask;
int delta = 1;
while (true) {
final Object mapKey = data[pos];
if (mapKey != null) {
if (mapKey != REMOVED && keyEquals(mapKey, key)) {
return pos;
} else {
// quadratic probing with position increase by 1, 2, 3, ...
pos = (pos + delta) & mask;
delta++;
}
} else {
return -1;
}
}
}
protected final Object create(final K key,
final MapCallback creator, final C context, final P params,
final MapResult result, final int hash) {
final Object[] data = this.data;
final int mask = this.mask;
int pos = hash & mask;
// try to fill the REMOVED slot but only if it is a new insertion
// else we need to keep searching for possible existing value
int removedPos = -1;
int delta = 1;
while (true) {
final Object mapKey = data[pos];
if (mapKey != null) {
if (mapKey == REMOVED) {
removedPos = pos;
} else if (keyEquals(mapKey, key)) {
// return old key
return mapKey;
}
// quadratic probing with position increase by 1, 2, 3, ...
pos = (pos + delta) & mask;
delta++;
} else {
// insert into the map and rehash if required
result.setNewValueCreated(true);
final Object newKey = creator.newValue(key, context, params, result);
if (result.isNewValueCreated()) {
// if slot was a REMOVED token then skip incrementing the "occupied" count
if (removedPos == -1) {
occupied++;
} else {
pos = removedPos;
}
doInsert(data, newKey, pos);
return newKey;
} else {
return null;
}
}
}
}
public final void setPostRehashHook(Consumer> hook) {
this.postRehashHook = hook;
}
public final Object addKey(final Object key, final boolean replace,
final int hash) {
final Object[] data = this.data;
final int pos = insertionIndex(data, key, hash);
if (pos >= 0) {
doInsert(data, key, pos);
return null;
} else {
final int currentPos = -pos - 1;
final Object mapKey = data[currentPos];
if (replace) {
data[currentPos] = key;
}
return mapKey;
}
}
public final Object getKey(final Object key) {
final Object[] data = this.data;
final int pos = index(data, key, keyHash(key));
if (pos >= 0) return data[pos];
else return null;
}
public final Object removeKey(final Object key) {
return removeKey(key, keyHash(key));
}
private Object removeKey(final Object key, final int hash) {
final Object[] data = this.data;
final int pos = index(data, key, hash);
if (pos >= 0) {
final Object mapKey = data[pos];
doRemove(data, pos);
return mapKey;
} else {
// no matching key
return null;
}
}
public boolean contains(Object key) {
return index(data, key, keyHash(key)) >= 0;
}
@Override
public boolean add(E key) {
final Object[] data = this.data;
final int pos = insertionIndex(data, key, keyHash(key));
if (pos >= 0) {
doInsert(data, key, pos);
return true;
} else {
return false;
}
}
@Override
public boolean remove(Object key) {
return removeKey(key, keyHash(key)) != null;
}
@SuppressWarnings("NullableProblems")
@Override
public Itr iterator() {
return new Itr<>(this);
}
@Override
public int size() {
return this.size;
}
public final int capacity() {
return this.data.length;
}
@Override
public void clear() {
final Object[] data = this.data;
final int size = data.length;
for (int i = 0; i < size; i++) {
data[i] = null;
}
this.size = 0;
this.occupied = 0;
}
protected final boolean handleNewInsert() {
size++;
// check and trigger a rehash if load factor exceeded
if (occupied <= growThreshold) {
return false;
} else {
// double the capacity
rehash(checkCapacity(data.length << 1));
return true;
}
}
protected final boolean handleRemove() {
// reduce size but not the number of occupied cells
// if number of deleted entries is too large then rehash to shrink
if (--size > (occupied >>> 1) || data.length <= 128) {
return false;
} else {
// half the capacity
rehash(data.length >>> 1);
return true;
}
}
/**
* Double the table's size and re-hash everything.
* Caller must check for overloaded set before triggering a rehash.
*/
protected final void rehash(final int newCapacity) {
final Object[] data = this.data;
final int capacity = data.length;
final Object[] newData = new Object[newCapacity];
final int newMask = newCapacity - 1;
int oldPos = 0;
while (oldPos < capacity) {
final Object d = data[oldPos];
if (d != null && d != REMOVED) {
final int newHash = keyHash(d);
int newPos = newHash & newMask;
int delta = 1;
// No need to check for equality here when we insert.
while (true) {
if (newData[newPos] == null) {
// Inserting the key at newPos
newData[newPos] = d;
break;
} else {
newPos = (newPos + delta) & newMask;
delta++;
}
}
}
oldPos++;
}
// all deleted entries marked REMOVED have been cleared
this.occupied = this.size;
this.data = newData;
this.mask = newMask;
this.growThreshold = (int)(loadFactor * newCapacity);
if (this.postRehashHook != null) {
this.postRehashHook.accept(this);
}
}
public static int checkCapacity(int capacity) {
if (capacity > 0 && capacity <= MAX_CAPACITY) {
return capacity;
} else if (capacity == 0) {
return 2;
} else {
throw new IllegalStateException("Capacity (" + capacity +
") can't be more than " + MAX_CAPACITY + " elements or negative");
}
}
public static int nextPowerOf2(int n) {
final int highBit = Integer.highestOneBit(n > 0 ? n : 2);
return checkCapacity(highBit == n ? n : highBit << 1);
}
public static final class Itr implements Iterator {
private final Object[] data;
private Object result;
private int pos;
private int prevPos;
private final OpenHashSet> set;
Itr(OpenHashSet> set) {
// take a snapshot of the array to avoid problems due to rehash
this.data = set.data;
this.result = null;
this.pos = -1;
this.prevPos = -1;
this.set = set;
advance(this.data);
}
private void advance(final Object[] data) {
final int size = data.length;
for (int pos = this.pos + 1; pos < size; pos++) {
final Object d = data[pos];
if (d != null && d != REMOVED) {
this.result = d;
this.pos = pos;
return;
}
}
// no next element
this.result = null;
this.pos = -1;
}
@Override
public boolean hasNext() {
return this.result != null;
}
@SuppressWarnings("unchecked")
@Override
public E next() {
final Object result = this.result;
if (result != null) {
final Object[] data = this.data;
this.prevPos = this.pos;
advance(data);
return (E)result;
} else {
throw new NoSuchElementException("invalid iterator position");
}
}
@Override
public void remove() {
final int pos = this.prevPos;
if (pos >= 0) {
final Object[] data = this.data;
final OpenHashSet> set = this.set;
// if no change in storage array (i.e. no rehash) then change in-place
if (data == set.data) {
set.doRemove(data, pos);
} else {
set.remove(data[pos]);
}
} else {
throw new NoSuchElementException("invalid iterator position");
}
}
}
}