All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.hllmap.Map Maven / Gradle / Ivy

Go to download

Core sketch algorithms used alone and by other Java repositories in the DataSketches library.

There is a newer version: 7.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.hllmap;

import java.math.BigInteger;

import org.apache.datasketches.hash.MurmurHash3;

/**
 * Base class and API for all the maps.
 *
 * @author Lee Rhodes
 * @author Alexander Saydakov
 * @author Kevin Lang
 */
abstract class Map {

  private static final String LS = System.getProperty("line.separator");

  static final long SEED = 1234567890L;
  static final int SIX_BIT_MASK = 0X3F; // 6 bits
  static final int TEN_BIT_MASK = 0X3FF; //10 bits

  // These parameters are tuned as a set to avoid pathological resizing.
  // Consider modeling the behavior before changing any of them
  static final int COUPON_MAP_MIN_NUM_ENTRIES = 157;
  static final double COUPON_MAP_SHRINK_TRIGGER_FACTOR = 0.5;
  static final double COUPON_MAP_GROW_TRIGGER_FACTOR = 15.0 / 16.0;
  static final double COUPON_MAP_TARGET_FILL_FACTOR = 2.0 / 3.0;

  final int keySizeBytes_;

  Map(final int keySizeBytes) {
    keySizeBytes_ = keySizeBytes;
  }

  /**
   * Update this map with a key and a coupon.
   * Return the cardinality estimate of all identifiers that have been associated with this key,
   * including this update.
   * @param key the dimensional criteria for measuring cardinality
   * @param coupon the property associated with the key for which cardinality is to be measured.
   * @return the cardinality estimate of all identifiers that have been associated with this key,
   * including this update.
   */
  abstract double update(byte[] key, short coupon);

  /**
   * Updates this map with an index and a coupon
   * @param index the given index
   * @param coupon the given coupon
   * @return the cardinality estimate of all identifiers that have been associated with this key,
   * including this update.
   */
  abstract double update(int index, short coupon);

  /**
   * Returns the estimate of the cardinality of identifiers associated with the given key.
   * @param key the given key
   * @return the estimate of the cardinality of identifiers associated with the given key.
   */
  abstract double getEstimate(byte[] key);

  /**
   * Update the internal estimate at the given index
   * @param index the given index
   * @param estimate the given estimate
   */
  void updateEstimate(final int index, final double estimate) {}

  /**
   * Returns the upper bound cardinality with respect to {@link #getEstimate(byte[])} associated
   * with the given key. This approximates the RSE with 68% confidence.
   * @param key the given key
   * @return the upper bound cardinality with respect to {@link #getEstimate(byte[])} associated
   * with the given key.
   */
  abstract double getUpperBound(byte[] key);

  /**
   * Returns the lower bound cardinality with respect to {@link #getEstimate(byte[])} associated
   * with the given key. This approximates the RSE with 68% confidence.
   * @param key the given key
   * @return the lower bound cardinality with respect to {@link #getEstimate(byte[])} associated
   * with the given key.
   */
  abstract double getLowerBound(byte[] key);

  abstract int findKey(byte[] key);

  abstract int findOrInsertKey(byte[] key);

  abstract CouponsIterator getCouponsIterator(int index);

  abstract int getMaxCouponsPerEntry();

  abstract int getCapacityCouponsPerEntry();

  abstract int getActiveEntries();

  abstract int getDeletedEntries();

  abstract double getEntrySizeBytes();

  abstract int getTableEntries();

  abstract int getCapacityEntries();

  abstract int getCurrentCountEntries();

  abstract long getMemoryUsageBytes();

  int getKeySizeBytes() {
    return keySizeBytes_;
  }

  /**
   * Delete the key at the given index
   * @param index the given index
   */
  void deleteKey(final int index) {}

  /**
   * Returns true if the two specified sub-arrays of bytes are equal to one another.
   * Two arrays are considered equal if all corresponding pairs of elements in the two arrays are
   * equal. In other words, two arrays are equal if and only if they contain the same elements
   * in the same order.
   *
   * @param a one sub-array to be tested for equality
   * @param offsetA the offset in bytes of the start of sub-array a.
   * @param b the other sub-array to be tested for equality
   * @param offsetB the offset in bytes of the start of sub-array b.
   * @param length the length in bytes of the two sub-arrays.
   * @return true if the two sub-arrays are equal
   */
  static final boolean arraysEqual(final byte[] a, final int offsetA, final byte[] b,
      final int offsetB, final int length) {
    for (int i = 0; i < length; i++) {
      if (a[i + offsetA] != b[i + offsetB]) {
        return false;
      }
    }
    return true;
  }

  /**
   * Returns the HLL array index and value as a 16-bit coupon given the identifier to be hashed
   * and k.
   * @param identifier the given identifier
   * @return the HLL array index and value
   */
  static final int coupon16(final byte[] identifier) {
    final long[] hash = MurmurHash3.hash(identifier, SEED);
    final int hllIdx = (int) (((hash[0] >>> 1) % 1024) & TEN_BIT_MASK); //hash[0] for 10-bit address
    final int lz = Long.numberOfLeadingZeros(hash[1]);
    final int value = (lz > 62 ? 62 : lz) + 1;
    return (value << 10) | hllIdx;
  }

  static final int coupon16Value(final int coupon) {
    return (coupon >>> 10) & SIX_BIT_MASK;
  }

  static final int getIndex(final long hash, final int tableEntries) {
    return (int) ((hash >>> 1) % tableEntries);
  }

  static final int getStride(final long hash, final int tableEntries) {
    return (int) ((hash >>> 1) % (tableEntries - 2L) + 1L);
  }

  static boolean isBitSet(final byte[] byteArr, final int bitIndex) {
    final int byteIndex = bitIndex / 8;
    final int mask = 1 << (bitIndex % 8);
    return (byteArr[byteIndex] & mask) > 0;
  }

  static boolean isBitClear(final byte[] byteArr, final int bitIndex) {
    final int byteIndex = bitIndex / 8;
    final int mask = 1 << (bitIndex % 8);
    return (byteArr[byteIndex] & mask) == 0;
  }

  static void clearBit(final byte[] byteArr, final int index) {
    final int byteIndex = index / 8;
    final int mask = 1 << (index % 8);
    byteArr[byteIndex] &= ~mask;
  }

  static void setBit(final byte[] bits, final int index) {
    final int byteIndex = index / 8;
    final int mask = 1 << (index % 8);
    bits[byteIndex] |= mask;
  }

  /**
   * Returns the next prime number that is greater than the given target. There will be
   * no prime numbers less than the returned prime number that are greater than the given target.
   * @param target the starting value to begin the search for the next prime
   * @return the next prime number that is greater than the given target.
   */
  static int nextPrime(final int target) {
    return BigInteger.valueOf(target).nextProbablePrime().intValueExact();
  }

  static String fmtLong(final long value) {
    return String.format("%,d", value);
  }

  static String fmtDouble(final double value) {
    return String.format("%,.3f", value);
  }

  @Override
  public String toString() {
    final String mcpe = Map.fmtLong(getMaxCouponsPerEntry());
    final String ccpe = Map.fmtLong(getCapacityCouponsPerEntry());
    final String te = Map.fmtLong(getTableEntries());
    final String ce = Map.fmtLong(getCapacityEntries());
    final String cce = Map.fmtLong(getCurrentCountEntries());
    final String ae = Map.fmtLong(getActiveEntries());
    final String de = Map.fmtLong(getDeletedEntries());
    final String esb = Map.fmtDouble(getEntrySizeBytes());
    final String mub = Map.fmtLong(getMemoryUsageBytes());

    final StringBuilder sb = new StringBuilder();
    final String thisSimpleName = this.getClass().getSimpleName();
    sb.append("### ").append(thisSimpleName).append(" SUMMARY: ").append(LS);
    sb.append("    Max Coupons Per Entry     : ").append(mcpe).append(LS);
    sb.append("    Capacity Coupons Per Entry: ").append(ccpe).append(LS);
    sb.append("    Table Entries             : ").append(te).append(LS);
    sb.append("    Capacity Entries          : ").append(ce).append(LS);
    sb.append("    Current Count Entries     : ").append(cce).append(LS);
    sb.append("      Active Entries          : ").append(ae).append(LS);
    sb.append("      Deleted Entries         : ").append(de).append(LS);
    sb.append("    Entry Size Bytes          : ").append(esb).append(LS);
    sb.append("    Memory Usage Bytes        : ").append(mub).append(LS);
    sb.append("### END SKETCH SUMMARY").append(LS);
    return sb.toString();
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy