All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.kll.KllFloatsQuantileCalculator Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.kll;

import java.util.Arrays;

import org.apache.datasketches.QuantilesHelper;

/**
 * Data structure for answering quantile queries based on the samples from KllSketch
 * @author Kevin Lang
 * @author Alexander Saydakov
 */
final class KllFloatsQuantileCalculator {

  private long n_;
  private float[] items_;
  private long[] weights_;
  private int[] levels_;
  private int numLevels_;

  // assumes that all levels are sorted including level 0
  KllFloatsQuantileCalculator(final float[] items, final int[] levels, final int numLevels,
      final long n) {
    n_ = n;
    final int numItems = levels[numLevels] - levels[0];
    items_ = new float[numItems];
    weights_ = new long[numItems + 1]; // one more is intentional
    levels_ = new int[numLevels + 1];
    populateFromSketch(items, levels, numLevels, numItems);
    blockyTandemMergeSort(items_, weights_, levels_, numLevels_);
    QuantilesHelper.convertToPrecedingCummulative(weights_);
  }

  float getQuantile(final double phi) {
    final long pos = QuantilesHelper.posOfPhi(phi, n_);
    return approximatelyAnswerPositonalQuery(pos);
  }

  private float approximatelyAnswerPositonalQuery(final long pos) {
    assert pos >= 0;
    assert pos < n_;
    final int index = QuantilesHelper.chunkContainingPos(weights_, pos);
    return items_[index];
  }

  private void populateFromSketch(final float[] srcItems, final int[] srcLevels,
      final int numLevels, final int numItems) {
    final int offset = srcLevels[0];
    System.arraycopy(srcItems, offset, items_, 0, numItems);
    int srcLevel = 0;
    int dstLevel = 0;
    long weight = 1;
    while (srcLevel < numLevels) {
      final int fromIndex = srcLevels[srcLevel] - offset;
      final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive
      if (fromIndex < toIndex) { // skip empty levels
        Arrays.fill(weights_, fromIndex, toIndex, weight);
        levels_[dstLevel] = fromIndex;
        levels_[dstLevel + 1] = toIndex;
        dstLevel++;
      }
      srcLevel++;
      weight *= 2;
    }
    weights_[numItems] = 0;
    numLevels_ = dstLevel;
  }

  private static void blockyTandemMergeSort(final float[] items, final long[] weights,
      final int[] levels, final int numLevels) {
    if (numLevels == 1) { return; }

    // duplicate the input in preparation for the "ping-pong" copy reduction strategy.
    final float[] itemsTmp = Arrays.copyOf(items, items.length);
    final long[] weightsTmp = Arrays.copyOf(weights, items.length); // don't need the extra one here

    blockyTandemMergeSortRecursion(itemsTmp, weightsTmp, items, weights, levels, 0, numLevels);
  }

  private static void blockyTandemMergeSortRecursion(final float[] itemsSrc, final long[] weightsSrc,
      final float[] itemsDst, final long[] weightsDst, final int[] levels, final int startingLevel,
      final int numLevels) {
    if (numLevels == 1) { return; }
    final int numLevels1 = numLevels / 2;
    final int numLevels2 = numLevels - numLevels1;
    assert numLevels1 >= 1;
    assert numLevels2 >= numLevels1;
    final int startingLevel1 = startingLevel;
    final int startingLevel2 = startingLevel + numLevels1;
    // swap roles of src and dst
    blockyTandemMergeSortRecursion(itemsDst, weightsDst, itemsSrc, weightsSrc, levels,
        startingLevel1, numLevels1);
    blockyTandemMergeSortRecursion(itemsDst, weightsDst, itemsSrc, weightsSrc, levels,
        startingLevel2, numLevels2);
    tandemMerge(itemsSrc, weightsSrc, itemsDst, weightsDst, levels, startingLevel1, numLevels1,
        startingLevel2, numLevels2);
  }

  private static void tandemMerge(final float[] itemsSrc, final long[] weightsSrc,
      final float[] itemsDst, final long[] weightsDst,
      final int[] levelStarts, final int startingLevel1, final int numLevels1,
      final int startingLevel2, final int numLevels2) {
    final int fromIndex1 = levelStarts[startingLevel1];
    final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive
    final int fromIndex2 = levelStarts[startingLevel2];
    final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive
    int iSrc1 = fromIndex1;
    int iSrc2 = fromIndex2;
    int iDst = fromIndex1;

    while ((iSrc1 < toIndex1) && (iSrc2 < toIndex2)) {
      if (itemsSrc[iSrc1] < itemsSrc[iSrc2]) {
        itemsDst[iDst] = itemsSrc[iSrc1];
        weightsDst[iDst] = weightsSrc[iSrc1];
        iSrc1++;
      } else {
        itemsDst[iDst] = itemsSrc[iSrc2];
        weightsDst[iDst] = weightsSrc[iSrc2];
        iSrc2++;
      }
      iDst++;
    }
    if (iSrc1 < toIndex1) {
      System.arraycopy(itemsSrc, iSrc1, itemsDst, iDst, toIndex1 - iSrc1);
      System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1);
    } else if (iSrc2 < toIndex2) {
      System.arraycopy(itemsSrc, iSrc2, itemsDst, iDst, toIndex2 - iSrc2);
      System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2);
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy