All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.theta.Rebuilder Maven / Gradle / Ivy

/*
 * Copyright 2015-16, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.theta;

import static com.yahoo.sketches.QuickSelect.selectExcludingZeros;
import static com.yahoo.sketches.theta.PreambleUtil.LG_ARR_LONGS_BYTE;
import static com.yahoo.sketches.theta.PreambleUtil.RETAINED_ENTRIES_INT;
import static com.yahoo.sketches.theta.PreambleUtil.THETA_LONG;

import com.yahoo.memory.Memory;
import com.yahoo.memory.NativeMemory;
import com.yahoo.sketches.HashOperations;
import com.yahoo.sketches.Util;

/**
 * Handles common resize, rebuild and move operations.
 * The Memory based operations assume a specific data structure that is unique to the theta sketches.
 *
 * @author Lee Rhodes
 */
final class Rebuilder {

  private Rebuilder() {}

  /**
   * Rebuild the hashTable in the given Memory at its current size. Changes theta and thus count.
   * This assumes a Memory preamble of standard form with correct values of curCount and thetaLong.
   * ThetaLong and curCount will change.
   * Afterwards, caller must update local class members curCount and thetaLong from Memory.
   *
   * @param mem the Memory the given Memory
   * @param preambleLongs size of preamble in longs
   * @param lgNomLongs the log_base2 of k, the configuration parameter of the sketch
   * @param lgArrLongs the log_base2 of the current size of the hash table
   * @param curCount the number of valid entries
   */
  static final void quickSelectAndRebuild(final Memory mem, final int preambleLongs,
      final int lgNomLongs, final int lgArrLongs, int curCount) {
    //Pull data into tmp arr for QS algo
    int arrLongs = 1 << lgArrLongs;
    long[] tmpArr = new long[arrLongs];
    int preBytes = preambleLongs << 3;
    mem.getLongArray(preBytes, tmpArr, 0, arrLongs); //copy mem data to tmpArr

    //Do the QuickSelect on a tmp arr to create new thetaLong
    int pivot = (1 << lgNomLongs) + 1; // (K+1) pivot for QS
    long newThetaLong = selectExcludingZeros(tmpArr, curCount, pivot);
    mem.putLong(THETA_LONG, newThetaLong); //UPDATE thetalong

    //Rebuild to clean up dirty data, update count
    long[] tgtArr = new long[arrLongs];
    int newCurCount = HashOperations.hashArrayInsert(tmpArr, tgtArr, lgArrLongs, newThetaLong);
    mem.putInt(RETAINED_ENTRIES_INT, newCurCount); //UPDATE curCount

    //put the rebuilt array back into memory
    mem.putLongArray(preBytes, tgtArr, 0, arrLongs);
  }

  /**
   * Moves me (the entire sketch) to a new larger Memory location and rebuilds the hash table.
   * This assumes a Memory preamble of standard form with the correct value of thetaLong.
   * Afterwards, the caller must update the local Memory reference, lgArrLongs
   * and hashTableThreshold from the dstMemory and free the source Memory.
   *
   * @param srcMem the source Memory
   * @param preambleLongs size of preamble in longs
   * @param srcLgArrLongs size (log_base2) of source hash table
   * @param dstMem the destination Memory, which may be garbage
   * @param dstLgArrLongs the destination hash table target size
   * @param thetaLong theta as a long
   */
  static final void moveAndResize(final Memory srcMem, final int preambleLongs,
      final int srcLgArrLongs, final Memory dstMem, final int dstLgArrLongs, final long thetaLong) {
    //Move Preamble to destination memory
    int preBytes = preambleLongs << 3;
    NativeMemory.copy(srcMem, 0, dstMem, 0, preBytes); //copy the preamble
    //Bulk copy source to on-heap buffer
    int srcHTLen = 1 << srcLgArrLongs;
    long[] srcHTArr = new long[srcHTLen];
    srcMem.getLongArray(preBytes, srcHTArr, 0, srcHTLen);
    //Create destination buffer
    int dstHTLen = 1 << dstLgArrLongs;
    long[] dstHTArr = new long[dstHTLen];
    //Rebuild hash table in destination buffer
    HashOperations.hashArrayInsert(srcHTArr, dstHTArr, dstLgArrLongs, thetaLong);
    //Bulk copy to destination memory
    dstMem.putLongArray(preBytes, dstHTArr, 0, dstHTLen);
    dstMem.putByte(LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update in dstMem
  }

  /**
   * Resizes existing hash array into a larger one within a single Memory assuming enough space.
   * This assumes a Memory preamble of standard form with the correct value of thetaLong.
   * The Memory lgArrLongs will change.
   * Afterwards, the caller must update local copies of lgArrLongs and hashTableThreshold from
   * Memory.
   *
   * @param mem the Memory
   * @param preambleLongs the size of the preamble in longs
   * @param srcLgArrLongs the size of the source hash table
   * @param dstLgArrLongs the LgArrLongs value for the new hash table
   */
  static final void resize(final Memory mem, final int preambleLongs,
      final int srcLgArrLongs, final int dstLgArrLongs) {
    //Preamble stays in place
    int preBytes = preambleLongs << 3;
    //Bulk copy source to on-heap buffer
    int srcHTLen = 1 << srcLgArrLongs; //current value
    long[] srcHTArr = new long[srcHTLen]; //on-heap src buffer
    mem.getLongArray(preBytes, srcHTArr, 0, srcHTLen);
    //Create destination on-heap buffer
    int dstHTLen = 1 << dstLgArrLongs;
    long[] dstHTArr = new long[dstHTLen]; //on-heap dst buffer
    //Rebuild hash table in destination buffer
    long thetaLong = mem.getLong(THETA_LONG);
    HashOperations.hashArrayInsert(srcHTArr, dstHTArr, dstLgArrLongs, thetaLong);
    //Bulk copy to destination memory
    mem.putLongArray(preBytes, dstHTArr, 0, dstHTLen); //put it back, no need to clear
    mem.putByte(LG_ARR_LONGS_BYTE, (byte) dstLgArrLongs); //update in mem
  }

  /**
   * Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be
   * an integer value between zero and the given lgRF, inclusive;
   * @param capBytes the current memory capacity in bytes
   * @param lgArrLongs the current lg hash table size in longs
   * @param preLongs the current preamble size in longs
   * @param lgRF the configured lg Resize Factor
   * @return the actual log2 Resize Factor that can be used to grow the hash table
   */
  static final int actLgResizeFactor(long capBytes, int lgArrLongs, int preLongs, int lgRF) {
    int maxHTLongs = Util.floorPowerOf2(((int)(capBytes >> 3) - preLongs));
    int lgFactor = Math.max(Integer.numberOfTrailingZeros(maxHTLongs) - lgArrLongs, 0);
    return (lgFactor >= lgRF) ? lgRF : lgFactor;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy