All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.hllmap.CouponHashMap Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.hllmap;

import static org.apache.datasketches.Util.checkIfPowerOf2;
import static org.apache.datasketches.Util.invPow2;

import java.util.Arrays;

import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.hash.MurmurHash3;

/**
 * Implements a key-value map where the value is a hash map of coupons.
 *
 * 

The outer map is implemented as a prime-sized, Open Address, Double Hash, with deletes, so * this table can grow and shrink. Each entry row has a 1-byte count where 255 is a marker for * "dirty" and zero is empty. * *

The inner hash tables are implemented with linear probing or OASH and a load factor of 0.75. * * @author Lee Rhodes * @author Alexander Saydakov * @author Kevin Lang */ final class CouponHashMap extends Map { private static final double INNER_LOAD_FACTOR = 0.75; private static final byte DELETED_KEY_MARKER = (byte) 255; private static final int BYTE_MASK = 0XFF; private static final int COUPON_K = 1024; private static final double RSE = 0.408 / Math.sqrt(1024); private final int maxCouponsPerKey_; private final int capacityCouponsPerKey_; private final int entrySizeBytes_; private int tableEntries_; private int capacityEntries_; private int numActiveKeys_; private int numDeletedKeys_; //Arrays private byte[] keysArr_; private short[] couponsArr_; private byte[] curCountsArr_; //also acts as a stateArr: 0 empty, 255 deleted private float[] invPow2SumArr_; private float[] hipEstAccumArr_; private CouponHashMap(final int keySizeBytes, final int maxCouponsPerKey) { super(keySizeBytes); maxCouponsPerKey_ = maxCouponsPerKey; capacityCouponsPerKey_ = (int)(maxCouponsPerKey * INNER_LOAD_FACTOR); entrySizeBytes_ = keySizeBytes + (maxCouponsPerKey * Short.BYTES) + 1 + 4 + 4; } static CouponHashMap getInstance(final int keySizeBytes, final int maxCouponsPerKey) { checkMaxCouponsPerKey(maxCouponsPerKey); final int tableEntries = COUPON_MAP_MIN_NUM_ENTRIES; final CouponHashMap map = new CouponHashMap(keySizeBytes, maxCouponsPerKey); map.tableEntries_ = tableEntries; map.capacityEntries_ = (int)(tableEntries * COUPON_MAP_GROW_TRIGGER_FACTOR); map.numActiveKeys_ = 0; map.numDeletedKeys_ = 0; map.keysArr_ = new byte[tableEntries * keySizeBytes]; map.couponsArr_ = new short[tableEntries * maxCouponsPerKey]; map.curCountsArr_ = new byte[tableEntries]; map.invPow2SumArr_ = new float[tableEntries]; map.hipEstAccumArr_ = new float[tableEntries]; return map; } @Override double update(final byte[] key, final short coupon) { final int entryIndex = findOrInsertKey(key); return update(entryIndex, coupon); //negative when time to promote } @Override double update(final int entryIndex, final short coupon) { final int couponMapArrEntryIndex = entryIndex * maxCouponsPerKey_; int innerCouponIndex = (coupon & 0xFFFF) % maxCouponsPerKey_; while (couponsArr_[couponMapArrEntryIndex + innerCouponIndex] != 0) { if (couponsArr_[couponMapArrEntryIndex + innerCouponIndex] == coupon) { return hipEstAccumArr_[entryIndex]; //duplicate, returns the estimate } innerCouponIndex = (innerCouponIndex + 1) % maxCouponsPerKey_; //linear search } if (((curCountsArr_[entryIndex] + 1) & BYTE_MASK) > capacityCouponsPerKey_) { //returns the negative estimate, as signal to promote return -hipEstAccumArr_[entryIndex]; } couponsArr_[couponMapArrEntryIndex + innerCouponIndex] = coupon; //insert curCountsArr_[entryIndex]++; //hip += k/qt; qt -= 1/2^(val); hipEstAccumArr_[entryIndex] += COUPON_K / invPow2SumArr_[entryIndex]; invPow2SumArr_[entryIndex] -= invPow2(coupon16Value(coupon)); return hipEstAccumArr_[entryIndex]; //returns the estimate } @Override double getEstimate(final byte[] key) { final int index = findKey(key); if (index < 0) { return 0; } return hipEstAccumArr_[index]; } @Override double getUpperBound(final byte[] key) { return getEstimate(key) * (1 + RSE); } @Override double getLowerBound(final byte[] key) { return getEstimate(key) * (1 - RSE); } @Override void updateEstimate(final int entryIndex, final double estimate) { if (entryIndex < 0) { throw new SketchesArgumentException("Key not found."); } hipEstAccumArr_[entryIndex] = (float) estimate; } /** * Returns entryIndex if the given key is found. If not found, returns one's complement index * of an empty slot for insertion, which may be over a deleted key. * @param key the given key * @return the entryIndex */ @Override int findKey(final byte[] key) { final long[] hash = MurmurHash3.hash(key, SEED); int entryIndex = getIndex(hash[0], tableEntries_); int firstDeletedIndex = -1; final int loopIndex = entryIndex; do { if (curCountsArr_[entryIndex] == 0) { return firstDeletedIndex == -1 ? ~entryIndex : ~firstDeletedIndex; // found empty or deleted } if (curCountsArr_[entryIndex] == DELETED_KEY_MARKER) { if (firstDeletedIndex == -1) { firstDeletedIndex = entryIndex; } } else if (Map.arraysEqual(keysArr_, entryIndex * keySizeBytes_, key, 0, keySizeBytes_)) { return entryIndex; // found key } entryIndex = (entryIndex + getStride(hash[1], tableEntries_)) % tableEntries_; } while (entryIndex != loopIndex); throw new SketchesArgumentException("Key not found and no empty slots!"); } @Override int findOrInsertKey(final byte[] key) { int entryIndex = findKey(key); if (entryIndex < 0) { //key not found entryIndex = ~entryIndex; if (curCountsArr_[entryIndex] == DELETED_KEY_MARKER) { // reusing slot from a deleted key Arrays.fill(couponsArr_, entryIndex * maxCouponsPerKey_, (entryIndex + 1) * maxCouponsPerKey_, (short) 0); curCountsArr_[entryIndex] = 0; numDeletedKeys_--; } if ((numActiveKeys_ + numDeletedKeys_) >= capacityEntries_) { resize(); entryIndex = ~findKey(key); assert entryIndex >= 0; } //insert new key System.arraycopy(key, 0, keysArr_, entryIndex * keySizeBytes_, keySizeBytes_); //initialize HIP: qt <- k; hip <- 0; invPow2SumArr_[entryIndex] = COUPON_K; hipEstAccumArr_[entryIndex] = 0; numActiveKeys_++; } return entryIndex; } @Override void deleteKey(final int entryIndex) { curCountsArr_[entryIndex] = DELETED_KEY_MARKER; numActiveKeys_--; numDeletedKeys_++; if ((numActiveKeys_ > COUPON_MAP_MIN_NUM_ENTRIES) && (numActiveKeys_ < (tableEntries_ * COUPON_MAP_SHRINK_TRIGGER_FACTOR))) { resize(); } } @Override CouponsIterator getCouponsIterator(final int entryIndex) { return new CouponsIterator(couponsArr_, entryIndex * maxCouponsPerKey_, maxCouponsPerKey_); } @Override double getEntrySizeBytes() { return entrySizeBytes_; } @Override int getTableEntries() { return tableEntries_; } @Override int getCapacityEntries() { return capacityEntries_; } @Override int getCurrentCountEntries() { return numActiveKeys_ + numDeletedKeys_; } @Override long getMemoryUsageBytes() { final long arrays = keysArr_.length + ((long) couponsArr_.length * Short.BYTES) + curCountsArr_.length + ((long) invPow2SumArr_.length * Float.BYTES) + ((long) hipEstAccumArr_.length * Float.BYTES); final long other = 4 * 5; return arrays + other; } @Override int getActiveEntries() { return numActiveKeys_; } @Override int getDeletedEntries() { return numDeletedKeys_; } @Override int getMaxCouponsPerEntry() { return maxCouponsPerKey_; } @Override int getCapacityCouponsPerEntry() { return capacityCouponsPerKey_; } private static final void checkMaxCouponsPerKey(final int maxCouponsPerKey) { checkIfPowerOf2(maxCouponsPerKey, "maxCouponsPerKey"); final int cpk = maxCouponsPerKey; if ((cpk < 16) || (cpk > 256)) { throw new SketchesArgumentException( "Required: 16 <= maxCouponsPerKey <= 256 : " + maxCouponsPerKey); } } private void resize() { final byte[] oldKeysArr = keysArr_; final short[] oldCouponMapArr = couponsArr_; final byte[] oldCurCountsArr = curCountsArr_; final float[] oldInvPow2SumArr = invPow2SumArr_; final float[] oldHipEstAccumArr = hipEstAccumArr_; final int oldNumEntries = tableEntries_; tableEntries_ = Math.max( nextPrime((int) (numActiveKeys_ / COUPON_MAP_TARGET_FILL_FACTOR)), COUPON_MAP_MIN_NUM_ENTRIES ); capacityEntries_ = (int)(tableEntries_ * COUPON_MAP_GROW_TRIGGER_FACTOR); keysArr_ = new byte[tableEntries_ * keySizeBytes_]; couponsArr_ = new short[tableEntries_ * maxCouponsPerKey_]; curCountsArr_ = new byte[tableEntries_]; invPow2SumArr_ = new float[tableEntries_]; hipEstAccumArr_ = new float[tableEntries_]; numActiveKeys_ = 0; numDeletedKeys_ = 0; for (int i = 0; i < oldNumEntries; i++) { if ((oldCurCountsArr[i] != 0) && (oldCurCountsArr[i] != DELETED_KEY_MARKER)) { //extract an old valid key final byte[] key = Arrays.copyOfRange(oldKeysArr, i * keySizeBytes_, (i * keySizeBytes_) + keySizeBytes_); //insert the key and get its index final int index = insertKey(key); //copy the coupons array into that index System.arraycopy(oldCouponMapArr, i * maxCouponsPerKey_, couponsArr_, index * maxCouponsPerKey_, maxCouponsPerKey_); //transfer the count curCountsArr_[index] = oldCurCountsArr[i]; //transfer the HIP registers invPow2SumArr_[index] = oldInvPow2SumArr[i]; hipEstAccumArr_[index] = oldHipEstAccumArr[i]; } } } // for internal use by resize, no resize check and no deleted key check here // no changes to HIP private int insertKey(final byte[] key) { final long[] hash = MurmurHash3.hash(key, SEED); int entryIndex = getIndex(hash[0], tableEntries_); final int loopIndex = entryIndex; do { if (curCountsArr_[entryIndex] == 0) { System.arraycopy(key, 0, keysArr_, entryIndex * keySizeBytes_, keySizeBytes_); numActiveKeys_++; return entryIndex; } entryIndex = (entryIndex + getStride(hash[1], tableEntries_)) % tableEntries_; } while (entryIndex != loopIndex); throw new SketchesArgumentException("Key not found and no empty slots!"); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy