org.apache.datasketches.hll.DirectCouponHashSet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datasketches-java Show documentation
Show all versions of datasketches-java Show documentation
Core sketch algorithms used alone and by other Java repositories in the DataSketches library.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.hll;
import static org.apache.datasketches.hll.HllUtil.EMPTY;
import static org.apache.datasketches.hll.HllUtil.KEY_MASK_26;
import static org.apache.datasketches.hll.HllUtil.RESIZE_DENOM;
import static org.apache.datasketches.hll.HllUtil.RESIZE_NUMER;
import static org.apache.datasketches.hll.HllUtil.noWriteAccess;
import static org.apache.datasketches.hll.PreambleUtil.HASH_SET_INT_ARR_START;
import static org.apache.datasketches.hll.PreambleUtil.HASH_SET_PREINTS;
import static org.apache.datasketches.hll.PreambleUtil.LG_K_BYTE;
import static org.apache.datasketches.hll.PreambleUtil.extractHashSetCount;
import static org.apache.datasketches.hll.PreambleUtil.extractInt;
import static org.apache.datasketches.hll.PreambleUtil.extractLgArr;
import static org.apache.datasketches.hll.PreambleUtil.insertHashSetCount;
import static org.apache.datasketches.hll.PreambleUtil.insertInt;
import static org.apache.datasketches.hll.PreambleUtil.insertLgArr;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.SketchesStateException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
/**
* @author Lee Rhodes
*/
class DirectCouponHashSet extends DirectCouponList {
//Constructs this sketch with data.
DirectCouponHashSet(final int lgConfigK, final TgtHllType tgtHllType,
final WritableMemory wmem) {
super(lgConfigK, tgtHllType, CurMode.SET, wmem);
assert wmem.getByte(LG_K_BYTE) > 7;
}
//Constructs this sketch with read-only data, may be compact.
DirectCouponHashSet(final int lgConfigK, final TgtHllType tgtHllType,
final Memory mem) {
super(lgConfigK, tgtHllType, CurMode.SET, mem);
assert mem.getByte(LG_K_BYTE) > 7;
}
@Override //returns on-heap Set
CouponHashSet copy() {
return CouponHashSet.heapifySet(mem);
}
@Override //returns on-heap Set
CouponHashSet copyAs(final TgtHllType tgtHllType) {
final CouponHashSet clist = CouponHashSet.heapifySet(mem);
return new CouponHashSet(clist, tgtHllType);
}
@Override
HllSketchImpl couponUpdate(final int coupon) {
if (wmem == null) { noWriteAccess(); }
//avoid array copy
final int index = find(mem, getLgCouponArrInts(), coupon);
if (index >= 0) {
return this; //found duplicate, ignore
}
insertInt(wmem, HASH_SET_INT_ARR_START + (~index << 2), coupon);
insertHashSetCount(wmem, getCouponCount() + 1);
final boolean promote = checkGrowOrPromote();
if (!promote) { return this; }
return promoteListOrSetToHll(this);
}
@Override
int getCouponCount() {
return extractHashSetCount(mem);
}
@Override
int getMemDataStart() {
return HASH_SET_INT_ARR_START;
}
@Override
int getPreInts() {
return HASH_SET_PREINTS;
}
private boolean checkGrowOrPromote() {
int lgCouponArrInts = getLgCouponArrInts();
if ((RESIZE_DENOM * getCouponCount()) > (RESIZE_NUMER * (1 << lgCouponArrInts))) {
if (lgCouponArrInts == (getLgConfigK() - 3)) {
return true; // promote
}
//TODO if direct, ask for more memory
insertLgArr(wmem, ++lgCouponArrInts);
growHashSet(wmem, lgCouponArrInts);
}
return false;
}
private static final void growHashSet(final WritableMemory wmem, final int tgtLgCouponArrSize) {
final int tgtArrSize = 1 << tgtLgCouponArrSize;
final int[] tgtCouponIntArr = new int[tgtArrSize];
final int oldLen = 1 << extractLgArr(wmem);
for (int i = 0; i < oldLen; i++) {
final int fetched = extractInt(wmem, HASH_SET_INT_ARR_START + (i << 2));
if (fetched != EMPTY) {
final int idx = find(tgtCouponIntArr, tgtLgCouponArrSize, fetched);
if (idx < 0) { //found EMPTY
tgtCouponIntArr[~idx] = fetched; //insert
continue;
}
throw new SketchesStateException("Error: found duplicate.");
}
}
wmem.clear(HASH_SET_INT_ARR_START, tgtArrSize << 2);
wmem.putIntArray(HASH_SET_INT_ARR_START, tgtCouponIntArr, 0, tgtArrSize);
}
//Searches the Coupon hash table (embedded in Memory) for an empty slot
// or a duplicate depending on the context.
//If entire entry is empty, returns one's complement of index = found empty.
//If entry equals given coupon, returns its index = found duplicate coupon
//Continues searching
//If the probe comes back to original index, throws an exception.
private static final int find(final Memory mem, final int lgArr,
final int coupon) {
final int arrMask = (1 << lgArr) - 1;
int probe = coupon & arrMask;
final int loopIndex = probe;
do {
final int couponAtIndex = extractInt(mem, HASH_SET_INT_ARR_START + (probe << 2));
if (couponAtIndex == EMPTY) { return ~probe; } //empty
else if (coupon == couponAtIndex) { return probe; } //duplicate
final int stride = ((coupon & KEY_MASK_26) >>> lgArr) | 1;
probe = (probe + stride) & arrMask;
} while (probe != loopIndex);
throw new SketchesArgumentException("Key not found and no empty slots!");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy