org.carrot2.mahout.math.map.OpenIntDoubleHashMap Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of carrot2-mini Show documentation
Show all versions of carrot2-mini Show documentation
Carrot2 search results clustering framework. Minimal functional subset
(core algorithms and infrastructure, no document sources).
/* Imported from Mahout. */package org.carrot2.mahout.math.map;
import java.util.Arrays;
import org.carrot2.mahout.math.function.IntDoubleProcedure;
import org.carrot2.mahout.math.function.IntProcedure;
import org.carrot2.mahout.math.list.DoubleArrayList;
import org.carrot2.mahout.math.list.IntArrayList;
public class OpenIntDoubleHashMap extends AbstractIntDoubleMap {
protected static final byte FREE = 0;
protected static final byte FULL = 1;
protected static final byte REMOVED = 2;
protected static final int NO_KEY_VALUE = 0;
protected int[] table;
protected double[] values;
protected byte[] state;
protected int freeEntries;
public OpenIntDoubleHashMap() {
this(defaultCapacity);
}
public OpenIntDoubleHashMap(int initialCapacity) {
this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
}
public OpenIntDoubleHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
setUp(initialCapacity, minLoadFactor, maxLoadFactor);
}
@Override
public void clear() {
Arrays.fill(this.state, FREE);
distinct = 0;
freeEntries = table.length; // delta
trimToSize();
}
@Override
public Object clone() {
OpenIntDoubleHashMap copy = (OpenIntDoubleHashMap) super.clone();
copy.table = copy.table.clone();
copy.values = copy.values.clone();
copy.state = copy.state.clone();
return copy;
}
@Override
public boolean containsKey(int key) {
return indexOfKey(key) >= 0;
}
@Override
public boolean containsValue(double value) {
return indexOfValue(value) >= 0;
}
@Override
public void ensureCapacity(int minCapacity) {
if (table.length < minCapacity) {
int newCapacity = nextPrime(minCapacity);
rehash(newCapacity);
}
}
@Override
public boolean forEachKey(IntProcedure procedure) {
for (int i = table.length; i-- > 0;) {
if (state[i] == FULL) {
if (!procedure.apply(table[i])) {
return false;
}
}
}
return true;
}
@Override
public boolean forEachPair(IntDoubleProcedure procedure) {
for (int i = table.length; i-- > 0;) {
if (state[i] == FULL) {
if (!procedure.apply(table[i], values[i])) {
return false;
}
}
}
return true;
}
@Override
public double get(int key) {
final int i = indexOfKey(key);
if (i < 0) {
return 0;
} //not contained
return values[i];
}
protected int indexOfInsertion(int key) {
final int length = table.length;
final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
int i = hash % length;
int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
//int decrement = (hash / length) % length;
if (decrement == 0) {
decrement = 1;
}
// stop if we find a removed or free slot, or if we find the key itself
// do NOT skip over removed slots (yes, open addressing is like that...)
while (state[i] == FULL && table[i] != key) {
i -= decrement;
//hashCollisions++;
if (i < 0) {
i += length;
}
}
if (state[i] == REMOVED) {
// stop if we find a free slot, or if we find the key itself.
// do skip over removed slots (yes, open addressing is like that...)
// assertion: there is at least one FREE slot.
final int j = i;
while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) {
i -= decrement;
//hashCollisions++;
if (i < 0) {
i += length;
}
}
if (state[i] == FREE) {
i = j;
}
}
if (state[i] == FULL) {
// key already contained at slot i.
// return a negative number identifying the slot.
return -i - 1;
}
// not already contained, should be inserted at slot i.
// return a number >= 0 identifying the slot.
return i;
}
protected int indexOfKey(int key) {
final int length = table.length;
final int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
int i = hash % length;
int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
//int decrement = (hash / length) % length;
if (decrement == 0) {
decrement = 1;
}
// stop if we find a free slot, or if we find the key itself.
// do skip over removed slots (yes, open addressing is like that...)
while (state[i] != FREE && (state[i] == REMOVED || table[i] != key)) {
i -= decrement;
//hashCollisions++;
if (i < 0) {
i += length;
}
}
if (state[i] == FREE) {
return -1;
} // not found
return i; //found, return index where key is contained
}
protected int indexOfValue(double value) {
double[] val = values;
byte[] stat = state;
for (int i = stat.length; --i >= 0;) {
if (stat[i] == FULL && val[i] == value) {
return i;
}
}
return -1; // not found
}
@Override
public void keys(IntArrayList list) {
list.setSize(distinct);
int [] elements = list.elements();
int j = 0;
for (int i = table.length; i-- > 0;) {
if (state[i] == FULL) {
elements[j++] = table[i];
}
}
}
@Override
public void pairsMatching(IntDoubleProcedure condition,
IntArrayList keyList,
DoubleArrayList valueList) {
keyList.clear();
valueList.clear();
for (int i = table.length; i-- > 0;) {
if (state[i] == FULL && condition.apply(table[i], values[i])) {
keyList.add(table[i]);
valueList.add(values[i]);
}
}
}
@Override
public boolean put(int key, double value) {
int i = indexOfInsertion(key);
if (i < 0) { //already contained
i = -i - 1;
this.values[i] = value;
return false;
}
if (this.distinct > this.highWaterMark) {
int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
rehash(newCapacity);
return put(key, value);
}
this.table[i] = key;
this.values[i] = value;
if (this.state[i] == FREE) {
this.freeEntries--;
}
this.state[i] = FULL;
this.distinct++;
if (this.freeEntries < 1) { //delta
int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
rehash(newCapacity);
}
return true;
}
@Override
public double adjustOrPutValue(int key, double newValue, double incrValue) {
int i = indexOfInsertion(key);
if (i < 0) { //already contained
i = -i - 1;
this.values[i] += incrValue;
return this.values[i];
} else {
put(key, newValue);
return newValue;
}
}
protected void rehash(int newCapacity) {
int oldCapacity = table.length;
//if (oldCapacity == newCapacity) return;
int[] oldTable = table;
double[] oldValues = values;
byte[] oldState = state;
this.table = new int[newCapacity];
this.values = new double[newCapacity];
this.state = new byte[newCapacity];
this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor);
this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor);
this.freeEntries = newCapacity - this.distinct; // delta
for (int i = oldCapacity; i-- > 0;) {
if (oldState[i] == FULL) {
int element = oldTable[i];
int index = indexOfInsertion(element);
this.table[index] = element;
this.values[index] = oldValues[i];
this.state[index] = FULL;
}
}
}
@Override
public boolean removeKey(int key) {
int i = indexOfKey(key);
if (i < 0) {
return false;
} // key not contained
this.state[i] = REMOVED;
//this.values[i]=0; // delta
this.distinct--;
if (this.distinct < this.lowWaterMark) {
int newCapacity = chooseShrinkCapacity(this.distinct, this.minLoadFactor, this.maxLoadFactor);
rehash(newCapacity);
}
return true;
}
@Override
protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
int capacity = initialCapacity;
super.setUp(capacity, minLoadFactor, maxLoadFactor);
capacity = nextPrime(capacity);
if (capacity == 0) {
capacity = 1;
} // open addressing needs at least one FREE slot at any time.
this.table = new int[capacity];
this.values = new double[capacity];
this.state = new byte[capacity];
// memory will be exhausted long before this pathological case happens, anyway.
this.minLoadFactor = minLoadFactor;
if (capacity == PrimeFinder.largestPrime) {
this.maxLoadFactor = 1.0;
} else {
this.maxLoadFactor = maxLoadFactor;
}
this.distinct = 0;
this.freeEntries = capacity; // delta
// lowWaterMark will be established upon first expansion.
// establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
// After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
// See ensureCapacity(...)
this.lowWaterMark = 0;
this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
}
@Override
public void trimToSize() {
// * 1.2 because open addressing's performance exponentially degrades beyond that point
// so that even rehashing the table can take very long
int newCapacity = nextPrime((int) (1 + 1.2 * size()));
if (table.length > newCapacity) {
rehash(newCapacity);
}
}
@Override
public void values(DoubleArrayList list) {
list.setSize(distinct);
double[] elements = list.elements();
int j = 0;
for (int i = state.length; i-- > 0;) {
if (state[i] == FULL) {
elements[j++] = values[i];
}
}
}
protected void getInternalFactors(int[] capacity,
double[] minLoadFactor,
double[] maxLoadFactor) {
capacity[0] = table.length;
minLoadFactor[0] = this.minLoadFactor;
maxLoadFactor[0] = this.maxLoadFactor;
}
}