org.apache.datasketches.hllmap.CouponTraverseMap Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datasketches-java Show documentation
Show all versions of datasketches-java Show documentation
Core sketch algorithms used alone and by other Java repositories in the DataSketches library.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.hllmap;
import java.util.Arrays;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.hash.MurmurHash3;
/**
* Implements a key-value map where the value is a simple array of coupons. Search operations are a
* simple traverse of the consecutive coupons. Because of this, the maximum practical size of the
* coupon array is about 8 coupons.
*
* The map is implemented as a prime-sized, Open Address, Double Hash, with deletes and a 1-bit
* state array. The size of this map can grow or shrink.
*
* @author Lee Rhodes
* @author Alexander Saydakov
* @author Kevin Lang
*/
final class CouponTraverseMap extends Map {
private static final double RSE = 0.408 / Math.sqrt(1024);
private final int maxCouponsPerKey_;
private int tableEntries_;
private int capacityEntries_;
private int numActiveKeys_;
private int numDeletedKeys_;
private double entrySizeBytes_;
//Arrays
private byte[] keysArr_;
private short[] couponsArr_;
/**
*
- State: 0: Empty always, don't need to look at 1st coupon. Coupons could be dirty.
* - State: 1: Valid entry or dirty. During rebuild, look at the first coupon to determine.
* If first coupon != 0 means valid entry; first coupon == 0: dirty (we set to 0 when deleted)
*
*/
private byte[] stateArr_;
private CouponTraverseMap(final int keySizeBytes, final int maxCouponsPerKey) {
super(keySizeBytes);
maxCouponsPerKey_ = maxCouponsPerKey;
}
static CouponTraverseMap getInstance(final int keySizeBytes, final int maxCouponsPerKey) {
final CouponTraverseMap map = new CouponTraverseMap(keySizeBytes, maxCouponsPerKey);
map.tableEntries_ = COUPON_MAP_MIN_NUM_ENTRIES;
map.capacityEntries_ = (int)(map.tableEntries_ * COUPON_MAP_GROW_TRIGGER_FACTOR);
map.numActiveKeys_ = 0;
map.numDeletedKeys_ = 0;
map.entrySizeBytes_ = updateEntrySizeBytes(map.tableEntries_, keySizeBytes, maxCouponsPerKey);
map.keysArr_ = new byte[COUPON_MAP_MIN_NUM_ENTRIES * keySizeBytes];
map.couponsArr_ = new short[COUPON_MAP_MIN_NUM_ENTRIES * maxCouponsPerKey];
map.stateArr_ = new byte[(int) Math.ceil(COUPON_MAP_MIN_NUM_ENTRIES / 8.0)];
return map;
}
@Override //used for test
double update(final byte[] key, final short coupon) {
final int entryIndex = findOrInsertKey(key);
return update(entryIndex, coupon);
}
@Override
double update(final int entryIndex, final short value) {
final int offset = entryIndex * maxCouponsPerKey_;
boolean wasFound = false;
for (int i = 0; i < maxCouponsPerKey_; i++) {
if (couponsArr_[offset + i] == 0) {
if (wasFound) { return i; }
couponsArr_[offset + i] = value;
return i + 1;
}
if (couponsArr_[offset + i] == value) {
wasFound = true;
}
}
if (wasFound) { return maxCouponsPerKey_; }
return -maxCouponsPerKey_; //signal to promote
}
@Override
double getEstimate(final byte[] key) {
final int entryIndex = findKey(key);
if (entryIndex < 0) { return 0; }
return getCouponCount(entryIndex);
}
@Override
double getUpperBound(final byte[] key) {
return getEstimate(key) * (1 + RSE);
}
@Override
double getLowerBound(final byte[] key) {
return getEstimate(key) * (1 - RSE);
}
/**
* Returns entryIndex if the given key is found. If not found, returns one's complement entryIndex
* of an empty slot for insertion, which may be over a deleted key.
* @param key the given key
* @return the entryIndex
*/
@Override
int findKey(final byte[] key) {
final long[] hash = MurmurHash3.hash(key, SEED);
int entryIndex = getIndex(hash[0], tableEntries_);
int firstDeletedIndex = -1;
final int loopIndex = entryIndex;
do {
if (isBitClear(stateArr_, entryIndex)) {
return firstDeletedIndex == -1 ? ~entryIndex : ~firstDeletedIndex; // found empty or deleted
}
if (couponsArr_[entryIndex * maxCouponsPerKey_] == 0) { //found deleted
if (firstDeletedIndex == -1) { firstDeletedIndex = entryIndex; }
} else if (Map.arraysEqual(keysArr_, entryIndex * keySizeBytes_, key, 0, keySizeBytes_)) {
return entryIndex; // found key
}
entryIndex = (entryIndex + getStride(hash[1], tableEntries_)) % tableEntries_;
} while (entryIndex != loopIndex);
throw new SketchesArgumentException("Key not found and no empty slots!");
}
@Override
int findOrInsertKey(final byte[] key) {
int entryIndex = findKey(key);
if (entryIndex < 0) {
entryIndex = ~entryIndex;
if (isBitSet(stateArr_, entryIndex)) { // reusing slot from a deleted key
clearCouponArea(entryIndex);
numDeletedKeys_--;
}
if ((numActiveKeys_ + numDeletedKeys_ + 1) > capacityEntries_) {
resize();
entryIndex = ~findKey(key);
assert entryIndex >= 0;
}
System.arraycopy(key, 0, keysArr_, entryIndex * keySizeBytes_, keySizeBytes_);
setBit(stateArr_, entryIndex);
numActiveKeys_++;
}
return entryIndex;
}
@Override
void deleteKey(final int entryIndex) {
couponsArr_[entryIndex * maxCouponsPerKey_] = 0;
numActiveKeys_--;
numDeletedKeys_++;
if ((numActiveKeys_ > COUPON_MAP_MIN_NUM_ENTRIES)
&& (numActiveKeys_ < (tableEntries_ * COUPON_MAP_SHRINK_TRIGGER_FACTOR))) {
resize();
}
}
private int getCouponCount(final int entryIndex) {
final int offset = entryIndex * maxCouponsPerKey_;
for (int i = 0; i < maxCouponsPerKey_; i++) {
if (couponsArr_[offset + i] == 0) {
return i;
}
}
return maxCouponsPerKey_;
}
@Override
CouponsIterator getCouponsIterator(final int entryIndex) {
return new CouponsIterator(couponsArr_, entryIndex * maxCouponsPerKey_, maxCouponsPerKey_);
}
@Override
double getEntrySizeBytes() {
return entrySizeBytes_;
}
@Override
int getTableEntries() {
return tableEntries_;
}
@Override
int getCapacityEntries() {
return capacityEntries_;
}
@Override
int getCurrentCountEntries() {
return numActiveKeys_ + numDeletedKeys_;
}
@Override
long getMemoryUsageBytes() {
return keysArr_.length
+ ((long)couponsArr_.length * Short.BYTES)
+ stateArr_.length + (4L * Integer.BYTES);
}
@Override
int getActiveEntries() {
return numActiveKeys_;
}
@Override
int getDeletedEntries() {
return numDeletedKeys_;
}
@Override
int getMaxCouponsPerEntry() {
return maxCouponsPerKey_;
}
@Override
int getCapacityCouponsPerEntry() {
return maxCouponsPerKey_;
}
private void resize() { //can grow or shrink
final byte[] oldKeysArr = keysArr_;
final short[] oldCouponsArr = couponsArr_;
final byte[] oldStateArr = stateArr_;
final int oldSizeKeys = tableEntries_;
tableEntries_ = Math.max(
nextPrime((int) (numActiveKeys_ / COUPON_MAP_TARGET_FILL_FACTOR)),
COUPON_MAP_MIN_NUM_ENTRIES
);
capacityEntries_ = (int)(tableEntries_ * COUPON_MAP_GROW_TRIGGER_FACTOR);
numActiveKeys_ = 0;
numDeletedKeys_ = 0;
entrySizeBytes_ = updateEntrySizeBytes(tableEntries_, keySizeBytes_, maxCouponsPerKey_);
keysArr_ = new byte[tableEntries_ * keySizeBytes_];
couponsArr_ = new short[tableEntries_ * maxCouponsPerKey_];
stateArr_ = new byte[(int) Math.ceil(tableEntries_ / 8.0)];
//move data
for (int i = 0; i < oldSizeKeys; i++) {
if (isBitSet(oldStateArr, i) && (oldCouponsArr[i * maxCouponsPerKey_] != 0)) {
final byte[] key =
Arrays.copyOfRange(oldKeysArr, i * keySizeBytes_, (i * keySizeBytes_) + keySizeBytes_);
final int index = insertKey(key);
System.arraycopy(oldCouponsArr, i * maxCouponsPerKey_, couponsArr_,
index * maxCouponsPerKey_, maxCouponsPerKey_);
}
}
}
// for internal use during resize, so no resize check here
private int insertKey(final byte[] key) {
final long[] hash = MurmurHash3.hash(key, SEED);
int entryIndex = getIndex(hash[0], tableEntries_);
final int loopIndex = entryIndex;
do {
if (isBitClear(stateArr_, entryIndex)) {
System.arraycopy(key, 0, keysArr_, entryIndex * keySizeBytes_, keySizeBytes_);
setBit(stateArr_, entryIndex);
numActiveKeys_++;
return entryIndex;
}
entryIndex = (entryIndex + getStride(hash[1], tableEntries_)) % tableEntries_;
} while (entryIndex != loopIndex);
throw new SketchesArgumentException("Key not found and no empty slots!");
}
private void clearCouponArea(final int entryIndex) {
final int couponAreaIndex = entryIndex * maxCouponsPerKey_;
for (int i = 0; i < maxCouponsPerKey_; i++) {
couponsArr_[couponAreaIndex + i] = 0;
}
}
private static final double updateEntrySizeBytes(final int tableEntries, final int keySizeBytes,
final int maxCouponsPerKey) {
final double byteFraction = Math.ceil(tableEntries / 8.0) / tableEntries;
return keySizeBytes + ((double) maxCouponsPerKey * Short.BYTES) + byteFraction;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy