All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.kll.KllHelper Maven / Gradle / Ivy

Go to download

Core sketch algorithms used alone and by other Java repositories in the DataSketches library.

There is a newer version: 6.1.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.kll;

import static java.lang.Math.abs;
import static java.lang.Math.ceil;
import static java.lang.Math.exp;
import static java.lang.Math.log;
import static java.lang.Math.max;
import static java.lang.Math.min;
import static java.lang.Math.pow;
import static java.lang.Math.round;
import static org.apache.datasketches.common.Family.KLL;
import static org.apache.datasketches.common.Util.floorPowerOf2;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.LEVEL_ZERO_SORTED_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK;
import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_EMPTY;
import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_FULL;
import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_SINGLE;
import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH;
import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH;
import static org.apache.datasketches.quantilescommon.QuantilesAPI.UNSUPPORTED_MSG;

import java.nio.ByteOrder;
import java.util.Arrays;

import org.apache.datasketches.common.ArrayOfItemsSerDe;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.kll.KllSketch.SketchStructure;
import org.apache.datasketches.kll.KllSketch.SketchType;
import org.apache.datasketches.memory.WritableBuffer;
import org.apache.datasketches.memory.WritableMemory;

/**
 * This class provides some useful sketch analysis tools that are used internally.
 *
 * @author Lee Rhodes
 */
final class KllHelper {
  public static final String LS = System.getProperty("line.separator");
  static final double EPS_DELTA_THRESHOLD = 1E-6;
  static final double MIN_EPS = 4.7634E-5;
  static final double PMF_COEF = 2.446;
  static final double PMF_EXP = 0.9433;
  static final double CDF_COEF = 2.296;
  static final double CDF_EXP = 0.9723;

  static class GrowthStats {
    SketchType sketchType;
    int k;
    int m;
    long givenN;
    long maxN;
    int numLevels;
    int maxItems;
    int compactBytes;
    int updatableBytes;
  }

  static class LevelStats {
    long n;
    public int numLevels;
    int numItems;

    LevelStats(final long n, final int numLevels, final int numItems) {
      this.n = n;
      this.numLevels = numLevels;
      this.numItems = numItems;
    }
  }

  /**
   * This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index
   */
  private static long[] powersOfThree =
      new long[] {1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441,
  1594323, 4782969, 14348907, 43046721, 129140163, 387420489, 1162261467,
  3486784401L, 10460353203L, 31381059609L, 94143178827L, 282429536481L,
  847288609443L, 2541865828329L, 7625597484987L, 22876792454961L, 68630377364883L,
  205891132094649L};

  /**
   * Checks the validity of the given k
   * @param k must be greater than 7 and less than 65536.
   */
  static void checkK(final int k, final int m) {
    if (k < m || k > KllSketch.MAX_K) {
      throw new SketchesArgumentException(
          "K must be >= " + m + " and <= " + KllSketch.MAX_K + ": " + k);
    }
  }

  static void checkM(final int m) {
    if (m < KllSketch.MIN_M || m > KllSketch.MAX_M || ((m & 1) == 1)) {
      throw new SketchesArgumentException(
          "M must be >= 2, <= 8 and even: " + m);
    }
  }

  /**
   * Returns the approximate maximum number of items that this sketch can handle
   * @param k The sizing / accuracy parameter of the sketch in items.
   * Note: this method actually works for k items up to k = 2^29 and 61 levels,
   * however only k items up to (2^16 - 1) are currently used by the sketch.
   * @param m the size of the smallest level in items. Default is 8.
   * @param numLevels the upper bound number of levels based on n items.
   * @return the total item capacity of the sketch.
   */
  static int computeTotalItemCapacity(final int k, final int m, final int numLevels) {
    long total = 0;
    for (int level = 0; level < numLevels; level++) {
      total += levelCapacity(k, numLevels, level, m);
    }
    return (int) total;
  }

  /**
   * Convert the individual weights into cumulative weights.
   * An array of {1,1,1,1} becomes {1,2,3,4}
   * @param array of actual weights from the sketch, where first element is not zero.
   * @return total weight
   */
  public static long convertToCumulative(final long[] array) {
    long subtotal = 0;
    for (int i = 0; i < array.length; i++) {
      final long newSubtotal = subtotal + array[i];
      subtotal = array[i] = newSubtotal;
    }
    return subtotal;
  }

  static int currentLevelSizeItems(final int level, final int numLevels, final int[] levels) {
    if (level >= numLevels) { return 0; }
    return levels[level + 1] - levels[level];
  }

  /**
   * Given k, m, and numLevels, this computes and optionally prints the structure of the sketch when the given
   * number of levels are completely filled.
   * @param k the given user configured sketch parameter
   * @param m the given user configured sketch parameter
   * @param numLevels the given number of levels of the sketch
   * @param printSketchStructure if true will print the details of the sketch structure at the given numLevels.
   * @return LevelStats with the final summary of the sketch's cumulative N,
   * and cumulative items at the given numLevels.
   */
  static LevelStats getFinalSketchStatsAtNumLevels(
      final int k,
      final int m,
      final int numLevels,
      final boolean printSketchStructure) {
    int cumItems = 0;
    long cumN = 0;
    if (printSketchStructure) {
      println("SKETCH STRUCTURE:");
      println("Given K        : " + k);
      println("Given M        : " + m);
      println("Given NumLevels: " + numLevels);
      printf("%6s %8s %12s %18s %18s\n", "Level", "Items", "CumItems", "N at Level", "CumN");
    }
    for (int level = 0; level < numLevels; level++) {
      final LevelStats lvlStats = getLevelCapacityItems(k, m, numLevels, level);
      cumItems += lvlStats.numItems;
      cumN += lvlStats.n;
      if (printSketchStructure) {
        printf("%6d %,8d %,12d %,18d %,18d\n", level, lvlStats.numItems, cumItems, lvlStats.n, cumN);
      }
    }
    return new LevelStats(cumN, numLevels, cumItems);
  }

  /**
   * This method is for direct Double and Float sketches only.
   * Given k, m, n, and the sketch type, this computes (and optionally prints) the growth scheme for a sketch as it
   * grows large enough to accommodate a stream length of n items.
   * @param k the given user configured sketch parameter
   * @param m the given user configured sketch parameter
   * @param n the desired stream length
   * @param sketchType the given sketch type: either DOUBLES_SKETCH or FLOATS_SKETCH.
   * @param printGrowthScheme if true the entire growth scheme of the sketch will be printed.
   * @return GrowthStats with the final numItems of the growth scheme
   */
  static GrowthStats getGrowthSchemeForGivenN(
      final int k,
      final int m,
      final long n,
      final SketchType sketchType,
      final boolean printGrowthScheme) {
    if (sketchType == ITEMS_SKETCH) { throw new SketchesArgumentException(UNSUPPORTED_MSG); }
    LevelStats lvlStats;
    final GrowthStats gStats = new GrowthStats();
    gStats.numLevels = 0;
    gStats.k = k;
    gStats.m = m;
    gStats.givenN = n;
    gStats.sketchType = sketchType;
    if (printGrowthScheme) {
      println("GROWTH SCHEME:");
      println("Given SketchType: " + gStats.sketchType.toString());
      println("Given K         : " + gStats.k);
      println("Given M         : " + gStats.m);
      println("Given N         : " + gStats.givenN);
      printf("%10s %10s %20s %13s %15s\n", "NumLevels", "MaxItems", "MaxN", "CompactBytes", "UpdatableBytes");
    }
    final int typeBytes = sketchType.getBytes();
    do {
      gStats.numLevels++; //
      lvlStats = getFinalSketchStatsAtNumLevels(gStats.k, gStats.m, gStats.numLevels, false);
      gStats.maxItems = lvlStats.numItems;
      gStats.maxN = lvlStats.n; //
      gStats.compactBytes =
          gStats.maxItems * typeBytes + gStats.numLevels * Integer.BYTES + 2 * typeBytes + DATA_START_ADR;
      gStats.updatableBytes = gStats.compactBytes + Integer.BYTES;
      if (printGrowthScheme) {
        printf("%10d %,10d %,20d %,13d %,15d\n",
            gStats.numLevels, gStats.maxItems, gStats.maxN, gStats.compactBytes, gStats.updatableBytes);
      }
    } while (lvlStats.n < n);

    //gStats.numLevels = lvlStats.numLevels; //
    //gStats.maxItems = lvlStats.numItems; //
    return gStats;
  }

  // constants were derived as the best fit to 99 percentile empirically measured max error in
  // thousands of trials
  static int getKFromEpsilon(final double epsilon, final boolean pmf) {
    //Ensure that eps is >= than the lowest possible eps given MAX_K and pmf=false.
    final double eps = max(epsilon, MIN_EPS);
    final double kdbl = pmf
        ? exp(log(PMF_COEF / eps) / PMF_EXP)
        : exp(log(CDF_COEF / eps) / CDF_EXP);
    final double krnd = round(kdbl);
    final double del = abs(krnd - kdbl);
    final int k = (int) (del < EPS_DELTA_THRESHOLD ? krnd : ceil(kdbl));
    return max(KllSketch.MIN_M, min(KllSketch.MAX_K, k));
  }

  /**
   * Given k, m, numLevels, this computes the item capacity of a single level.
   * @param k the given user sketch configuration parameter
   * @param m the given user sketch configuration parameter
   * @param numLevels the given number of levels of the sketch
   * @param level the specific level to compute its item capacity
   * @return LevelStats with the computed N and items for the given level.
   */
  static LevelStats getLevelCapacityItems(
      final int k,
      final int m,
      final int numLevels,
      final int level) {
    final int items = KllHelper.levelCapacity(k, numLevels, level, m);
    final long n = (long)items << level;
    return new LevelStats(n, numLevels, items);
  }

  /**
   * Gets the normalized rank error given k and pmf.
   * Static method version of the getNormalizedRankError(boolean).
   * @param k the configuration parameter
   * @param pmf if true, returns the "double-sided" normalized rank error for the getPMF() function.
   * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
   * @return if pmf is true, the normalized rank error for the getPMF() function.
   * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
   * @see KllHeapDoublesSketch
   */
  // constants were derived as the best fit to 99 percentile empirically measured max error in
  // thousands of trials
  static double getNormalizedRankError(final int k, final boolean pmf) {
    return pmf
        ? PMF_COEF / pow(k, PMF_EXP)
        : CDF_COEF / pow(k, CDF_EXP);
  }

  static int getNumRetainedAboveLevelZero(final int numLevels, final int[] levels) {
    return levels[numLevels] - levels[1];
  }

  /**
   * Returns the item capacity of a specific level.
   * @param k the accuracy parameter of the sketch. Because of the Java limits on array sizes,
   * the theoretical maximum k is 2^29. However, this implementation of the KLL sketch
   * limits k to 2^16 -1.
   * @param numLevels the number of current levels in the sketch. Maximum is 61.
   * @param level the zero-based index of a level. This varies from 0 to 60.
   * @param m the minimum level width. Default is 8.
   * @return the capacity of a specific level
   */
  static int levelCapacity(final int k, final int numLevels, final int level, final int m) {
    assert (k <= (1 << 29));
    assert (numLevels >= 1) && (numLevels <= 61);
    assert (level >= 0) && (level < numLevels);
    final int depth = numLevels - level - 1; //depth is # levels from the top level (= 0)
    return (int) Math.max(m, intCapAux(k, depth));
  }

  /**
   * This method is for direct Double and Float sketches only and does the following:
   * 
    *
  • Determines if the required sketch bytes will fit in the current Memory. * If so, it will stretch the positioning of the arrays to fit. Otherwise: *
  • Allocates a new WritableMemory of the required size
  • *
  • Copies over the preamble as is (20 bytes)
  • *
  • The caller is responsible for filling the remainder and updating the preamble.
  • *
* * @param sketch The current sketch that needs to be expanded. * @param newLevelsArrLen the element length of the new Levels array. * @param newItemsArrLen the element length of the new Items array. * @return the new expanded memory with preamble. */ static WritableMemory memorySpaceMgmt( final KllSketch sketch, final int newLevelsArrLen, final int newItemsArrLen) { final KllSketch.SketchType sketchType = sketch.sketchType; if (sketchType == ITEMS_SKETCH) { throw new SketchesArgumentException(UNSUPPORTED_MSG); } final WritableMemory wmem = sketch.getWritableMemory(); if (wmem == null) { return null; } final WritableMemory oldWmem = wmem; final int typeBytes = sketchType.getBytes(); final int requiredSketchBytes = DATA_START_ADR + newLevelsArrLen * Integer.BYTES + 2 * typeBytes + newItemsArrLen * typeBytes; final WritableMemory newWmem; if (requiredSketchBytes > oldWmem.getCapacity()) { //Acquire new WritableMemory newWmem = sketch.getMemoryRequestServer().request(oldWmem, requiredSketchBytes); oldWmem.copyTo(0, newWmem, 0, DATA_START_ADR); //copy preamble (first 20 bytes) } else { //Expand or contract in current memory newWmem = oldWmem; } assert requiredSketchBytes <= newWmem.getCapacity(); return newWmem; } private static String outputData(final KllSketch sketch) { final int[] levelsArr = sketch.getLevelsArray(SketchStructure.UPDATABLE); final int numLevels = sketch.getNumLevels(); final int k = sketch.getK(); final int m = sketch.getM(); final StringBuilder sb = new StringBuilder(); sb.append("### KllSketch itemsArray & levelsArray data:").append(LS); sb.append("Index, Value").append(LS); if (levelsArr[0] > 0) { final String gbg = " Empty or Garbage, size = " + levelsArr[0]; for (int i = 0; i < levelsArr[0]; i++) { sb.append(" ").append(i + ", ").append(sketch.getItemAsString(i)); if (i == 0) { sb.append(gbg); } sb.append(LS); } } int level = 0; while (level < numLevels) { final int fromIndex = levelsArr[level]; final int toIndex = levelsArr[level + 1]; // exclusive String lvlData = ""; if (fromIndex < toIndex) { lvlData = " level[" + level + "]=" + levelsArr[level] + ", cap=" + KllHelper.levelCapacity(k, numLevels, level, m) + ", size=" + KllHelper.currentLevelSizeItems(level, numLevels, levelsArr) + ", wt=" + (1 << level) + LS; } for (int i = fromIndex; i < toIndex; i++) { sb.append(" ").append(i + ", ").append(sketch.getItemAsString(i)); if (i == fromIndex) { sb.append(lvlData); } else { sb.append(LS); } } level++; } sb.append(" ----------level[" + level + "]=" + levelsArr[level] + ": itemsArray[].length"); sb.append(LS); sb.append("### End data").append(LS); return sb.toString(); } static long sumTheSampleWeights(final int num_levels, final int[] levels) { long total = 0; long weight = 1; for (int i = 0; i < num_levels; i++) { total += weight * (levels[i + 1] - levels[i]); weight *= 2; } return total; } static byte[] toByteArray(final KllSketch srcSk, final boolean updatable) { //ITEMS_SKETCH byte array is never updatable final boolean myUpdatable = srcSk.sketchType == ITEMS_SKETCH ? false : updatable; final long srcN = srcSk.getN(); final SketchStructure tgtStructure; if (myUpdatable) { tgtStructure = UPDATABLE; } else if (srcN == 0) { tgtStructure = COMPACT_EMPTY; } else if (srcN == 1) { tgtStructure = COMPACT_SINGLE; } else { tgtStructure = COMPACT_FULL; } final int totalBytes = srcSk.currentSerializedSizeBytes(myUpdatable); final byte[] bytesOut = new byte[totalBytes]; final WritableBuffer wbuf = WritableMemory.writableWrap(bytesOut).asWritableBuffer(ByteOrder.LITTLE_ENDIAN); //ints 0,1 final byte preInts = (byte)tgtStructure.getPreInts(); final byte serVer = (byte)tgtStructure.getSerVer(); final byte famId = (byte)(KLL.getID()); final byte flags = (byte) ((srcSk.isEmpty() ? EMPTY_BIT_MASK : 0) | (srcSk.isLevelZeroSorted() ? LEVEL_ZERO_SORTED_BIT_MASK : 0) | (srcSk.getN() == 1 ? SINGLE_ITEM_BIT_MASK : 0)); final short k = (short) srcSk.getK(); final byte m = (byte) srcSk.getM(); //load first 8 bytes wbuf.putByte(preInts); //byte 0 wbuf.putByte(serVer); wbuf.putByte(famId); wbuf.putByte(flags); wbuf.putShort(k); wbuf.putByte(m); wbuf.incrementPosition(1); //byte 7 is unused if (tgtStructure == COMPACT_EMPTY) { return bytesOut; } if (tgtStructure == COMPACT_SINGLE) { final byte[] siByteArr = srcSk.getSingleItemByteArr(); final int len = siByteArr.length; wbuf.putByteArray(siByteArr, 0, len); wbuf.incrementPosition(-len); return bytesOut; } // Tgt is either COMPACT_FULL or UPDATABLE //ints 2,3 final long n = srcSk.getN(); //ints 4 final short minK = (short) srcSk.getMinK(); final byte numLevels = (byte) srcSk.getNumLevels(); //end of full preamble final int[] lvlsArr = srcSk.getLevelsArray(tgtStructure); final byte[] minMaxByteArr = srcSk.getMinMaxByteArr(); final byte[] itemsByteArr = tgtStructure == COMPACT_FULL ? srcSk.getRetainedItemsByteArr() : srcSk.getTotalItemsByteArr(); wbuf.putLong(n); wbuf.putShort(minK); wbuf.putByte(numLevels); wbuf.incrementPosition(1); wbuf.putIntArray(lvlsArr, 0, lvlsArr.length); wbuf.putByteArray(minMaxByteArr, 0, minMaxByteArr.length); wbuf.putByteArray(itemsByteArr, 0, itemsByteArr.length); return bytesOut; } static String toStringImpl(final KllSketch sketch, final boolean withSummary, final boolean withData, final ArrayOfItemsSerDe serDe) { final SketchType sketchType = sketch.sketchType; final boolean hasMemory = sketch.hasMemory(); final int k = sketch.getK(); final int m = sketch.getM(); final long n = sketch.getN(); final int numLevels = sketch.getNumLevels(); final int[] fullLevelsArr = sketch.getLevelsArray(UPDATABLE); //final int[] levelsArr = sketch.getLevelsArray(sketch.sketchStructure); final String epsPct = String.format("%.3f%%", sketch.getNormalizedRankError(false) * 100); final String epsPMFPct = String.format("%.3f%%", sketch.getNormalizedRankError(true) * 100); final boolean compact = sketch.isCompactMemoryFormat(); final StringBuilder sb = new StringBuilder(); final String directStr = hasMemory ? "Direct" : ""; final String compactStr = compact ? "Compact" : ""; final String readOnlyStr = sketch.isReadOnly() ? "true" + ("(" + (compact ? "Format" : "Memory") + ")") : "false"; final String skTypeStr = sketchType.getName(); final String className = "Kll" + directStr + compactStr + skTypeStr; sb.append(LS).append("### ").append(className).append(" Summary:").append(LS); sb.append(" K : ").append(k).append(LS); sb.append(" Dynamic min K : ").append(sketch.getMinK()).append(LS); sb.append(" M : ").append(m).append(LS); sb.append(" N : ").append(n).append(LS); sb.append(" Epsilon : ").append(epsPct).append(LS); sb.append(" Epsilon PMF : ").append(epsPMFPct).append(LS); sb.append(" Empty : ").append(sketch.isEmpty()).append(LS); sb.append(" Estimation Mode : ").append(sketch.isEstimationMode()).append(LS); sb.append(" Levels : ").append(numLevels).append(LS); sb.append(" Level 0 Sorted : ").append(sketch.isLevelZeroSorted()).append(LS); sb.append(" Capacity Items : ").append(fullLevelsArr[numLevels]).append(LS); sb.append(" Retained Items : ").append(sketch.getNumRetained()).append(LS); sb.append(" Empty/Garbage Items : ").append(sketch.levelsArr[0]).append(LS); sb.append(" ReadOnly : ").append(readOnlyStr).append(LS); if (sketchType != ITEMS_SKETCH) { sb.append(" Updatable Storage Bytes: ").append(sketch.currentSerializedSizeBytes(true)).append(LS); } sb.append(" Compact Storage Bytes : ").append(sketch.currentSerializedSizeBytes(false)).append(LS); final String emptyStr = (sketchType == ITEMS_SKETCH) ? "Null" : "NaN"; sb.append(" Min Item : ").append(sketch.isEmpty() ? emptyStr : sketch.getMinItemAsString()) .append(LS); sb.append(" Max Item : ").append(sketch.isEmpty() ? emptyStr : sketch.getMaxItemAsString()) .append(LS); sb.append("### End sketch summary").append(LS); if (! withSummary) { sb.setLength(0); } if (withData) { sb.append(outputData(sketch)); } return sb.toString(); } /** * Returns very conservative upper bound of the number of levels based on n. * @param n the length of the stream * @return floor( log_2(n) ) */ static int ubOnNumLevels(final long n) { return 1 + Long.numberOfTrailingZeros(floorPowerOf2(n)); } /** * This grows the levels arr by 1 (if needed) and increases the capacity of the items array * at the bottom. Only numLevels, the levels array and the items array are affected. * This assumes sketch is writable and UPDATABLE. * @param sketch the current sketch */ static void addEmptyTopLevelToCompletelyFullSketch(final KllSketch sketch) { final SketchType sketchType = sketch.sketchType; final int[] myCurLevelsArr = sketch.getLevelsArray(sketch.sketchStructure); final int myCurNumLevels = sketch.getNumLevels(); final int myCurTotalItemsCapacity = myCurLevelsArr[myCurNumLevels]; final int myNewNumLevels; final int[] myNewLevelsArr; final int myNewTotalItemsCapacity; double[] myCurDoubleItemsArr = null; double[] myNewDoubleItemsArr = null; double minDouble = Double.NaN; double maxDouble = Double.NaN; float[] myCurFloatItemsArr = null; float[] myNewFloatItemsArr = null; float minFloat = Float.NaN; float maxFloat = Float.NaN; Object[] myCurItemsArr = null; Object[] myNewItemsArr = null; Object minItem = null; Object maxItem = null; if (sketchType == DOUBLES_SKETCH) { final KllDoublesSketch dblSk = (KllDoublesSketch) sketch; myCurDoubleItemsArr = dblSk.getDoubleItemsArray(); minDouble = dblSk.getMinItem(); maxDouble = dblSk.getMaxItem(); //assert we are following a certain growth scheme assert myCurDoubleItemsArr.length == myCurTotalItemsCapacity; } else if (sketchType == FLOATS_SKETCH) { final KllFloatsSketch fltSk = (KllFloatsSketch) sketch; myCurFloatItemsArr = fltSk.getFloatItemsArray(); minFloat = fltSk.getMinItem(); maxFloat = fltSk.getMaxItem(); //assert we are following a certain growth scheme assert myCurFloatItemsArr.length == myCurTotalItemsCapacity; } else { //sketchType == ITEMS_SKETCH final KllItemsSketch itmSk = (KllItemsSketch) sketch; myCurItemsArr = itmSk.getTotalItemsArray(); minItem = itmSk.getMinItem(); maxItem = itmSk.getMaxItem(); } assert myCurLevelsArr[0] == 0; //definition of full is part of the growth scheme final int deltaItemsCap = levelCapacity(sketch.getK(), myCurNumLevels + 1, 0, sketch.getM()); myNewTotalItemsCapacity = myCurTotalItemsCapacity + deltaItemsCap; // Check if growing the levels arr if required. // Note that merging MIGHT over-grow levels_, in which case we might not have to grow it final boolean growLevelsArr = myCurLevelsArr.length < myCurNumLevels + 2; // GROW LEVELS ARRAY if (growLevelsArr) { //grow levels arr by one and copy the old data to the new array, extra space at the top. myNewLevelsArr = Arrays.copyOf(myCurLevelsArr, myCurNumLevels + 2); assert myNewLevelsArr.length == myCurLevelsArr.length + 1; myNewNumLevels = myCurNumLevels + 1; sketch.incNumLevels(); //increment for off-heap } else { myNewLevelsArr = myCurLevelsArr; myNewNumLevels = myCurNumLevels; } // This loop updates all level indices EXCLUDING the "extra" index at the top for (int level = 0; level <= myNewNumLevels - 1; level++) { myNewLevelsArr[level] += deltaItemsCap; } myNewLevelsArr[myNewNumLevels] = myNewTotalItemsCapacity; // initialize the new "extra" index at the top // GROW items ARRAY if (sketchType == DOUBLES_SKETCH) { myNewDoubleItemsArr = new double[myNewTotalItemsCapacity]; // copy and shift the current data into the new array System.arraycopy(myCurDoubleItemsArr, 0, myNewDoubleItemsArr, deltaItemsCap, myCurTotalItemsCapacity); } else if (sketchType == FLOATS_SKETCH) { myNewFloatItemsArr = new float[myNewTotalItemsCapacity]; // copy and shift the current items data into the new array System.arraycopy(myCurFloatItemsArr, 0, myNewFloatItemsArr, deltaItemsCap, myCurTotalItemsCapacity); } else { //sketchType == ITEMS_SKETCH myNewItemsArr = new Object[myNewTotalItemsCapacity]; // copy and shift the current items data into the new array System.arraycopy(myCurItemsArr, 0, myNewItemsArr, deltaItemsCap, myCurTotalItemsCapacity); } //MEMORY SPACE MANAGEMENT if (sketch.getWritableMemory() != null) { final WritableMemory wmem = memorySpaceMgmt(sketch, myNewLevelsArr.length, myNewTotalItemsCapacity); sketch.setWritableMemory(wmem); } //update our sketch with new expanded spaces sketch.setNumLevels(myNewNumLevels); //for off-heap only sketch.setLevelsArray(myNewLevelsArr); //the KllSketch copy if (sketchType == DOUBLES_SKETCH) { final KllDoublesSketch dblSk = (KllDoublesSketch) sketch; dblSk.setMinItem(minDouble); dblSk.setMaxItem(maxDouble); dblSk.setDoubleItemsArray(myNewDoubleItemsArr); } else if (sketchType == FLOATS_SKETCH) { final KllFloatsSketch fltSk = (KllFloatsSketch) sketch; fltSk.setMinItem(minFloat); fltSk.setMaxItem(maxFloat); fltSk.setFloatItemsArray(myNewFloatItemsArr); } else { //sketchType == ITEMS_SKETCH final KllItemsSketch itmSk = (KllItemsSketch) sketch; itmSk.setMinItem(minItem); itmSk.setMaxItem(maxItem); itmSk.setItemsArray(myNewItemsArr); } } /** * Finds the first level starting with level 0 that exceeds its nominal capacity * @param k configured size of sketch. Range [m, 2^16] * @param m minimum level size. Default is 8. * @param numLevels one-based number of current levels * @return level to compact */ static int findLevelToCompact(final int k, final int m, final int numLevels, final int[] levels) { int level = 0; while (true) { assert level < numLevels; final int pop = levels[level + 1] - levels[level]; final int cap = KllHelper.levelCapacity(k, numLevels, level, m); if (pop >= cap) { return level; } level++; } } /** * Computes the actual item capacity of a given level given its depth index. * If the depth of levels exceeds 30, this uses a folding technique to accurately compute the * actual level capacity up to a depth of 60 (or 61 levels). * Without folding, the internal calculations would exceed the capacity of a long. * This method just decides whether folding is required or not. * @param k the configured k of the sketch * @param depth the zero-based index of the level being computed. * @return the actual capacity of a given level given its depth index. */ private static long intCapAux(final int k, final int depth) { if (depth <= 30) { return intCapAuxAux(k, depth); } final int half = depth / 2; final int rest = depth - half; final long tmp = intCapAuxAux(k, half); return intCapAuxAux(tmp, rest); } /** * Performs the integer based calculation of an individual level (or folded level). * @param k the configured k of the sketch * @param depth the zero-based index of the level being computed. The max depth is 30! * @return the actual capacity of a given level given its depth index. */ private static long intCapAuxAux(final long k, final int depth) { final long twok = k << 1; // for rounding at the end, pre-multiply by 2 here, divide by 2 during rounding. final long tmp = ((twok << depth) / powersOfThree[depth]); //2k* (2/3)^depth. 2k also keeps the fraction larger. final long result = ((tmp + 1L) >>> 1); // (tmp + 1)/2. If odd, round up. This guarantees an integer. assert (result <= k); return result; } private final static boolean enablePrinting = false; /** * @param format the format * @param args the args */ private static final void printf(final String format, final Object ... args) { if (enablePrinting) { System.out.printf(format, args); } } /** * @param o the Object to println */ private static final void println(final Object o) { if (enablePrinting) { System.out.println(o.toString()); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy