All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.math4.stat.descriptive.rank.Percentile Maven / Gradle / Ivy

Go to download

Statistical sampling library for use in virtdata libraries, based on apache commons math 4

There is a newer version: 5.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.math4.stat.descriptive.rank;

import java.io.Serializable;
import java.util.Arrays;
import java.util.BitSet;

import org.apache.commons.math4.exception.MathIllegalArgumentException;
import org.apache.commons.math4.exception.NullArgumentException;
import org.apache.commons.math4.exception.OutOfRangeException;
import org.apache.commons.math4.exception.util.LocalizedFormats;
import org.apache.commons.math4.stat.descriptive.AbstractUnivariateStatistic;
import org.apache.commons.math4.stat.ranking.NaNStrategy;
import org.apache.commons.math4.util.FastMath;
import org.apache.commons.math4.util.KthSelector;
import org.apache.commons.math4.util.MathArrays;
import org.apache.commons.math4.util.MathUtils;
import org.apache.commons.math4.util.MedianOf3PivotingStrategy;
import org.apache.commons.math4.util.PivotingStrategyInterface;
import org.apache.commons.numbers.core.Precision;

/**
 * Provides percentile computation.
 * 

* There are several commonly used methods for estimating percentiles (a.k.a. * quantiles) based on sample data. For large samples, the different methods * agree closely, but when sample sizes are small, different methods will give * significantly different results. The algorithm implemented here works as follows: *

    *
  1. Let n be the length of the (sorted) array and * 0 < p <= 100 be the desired percentile.
  2. *
  3. If n = 1 return the unique array element (regardless of * the value of p); otherwise
  4. *
  5. Compute the estimated percentile position * pos = p * (n + 1) / 100 and the difference, d * between pos and floor(pos) (i.e. the fractional * part of pos).
  6. *
  7. If pos < 1 return the smallest element in the array.
  8. *
  9. Else if pos >= n return the largest element in the array.
  10. *
  11. Else let lower be the element in position * floor(pos) in the array and let upper be the * next element in the array. Return lower + d * (upper - lower) *
  12. *
*

* To compute percentiles, the data must be at least partially ordered. Input * arrays are copied and recursively partitioned using an ordering definition. * The ordering used by Arrays.sort(double[]) is the one determined * by {@link Double#compareTo(Double)}. This ordering makes * Double.NaN larger than any other value (including * Double.POSITIVE_INFINITY). Therefore, for example, the median * (50th percentile) of * {0, 1, 2, 3, 4, Double.NaN} evaluates to 2.5.

*

* Since percentile estimation usually involves interpolation between array * elements, arrays containing NaN or infinite values will often * result in NaN or infinite values returned.

*

* Further, to include different estimation types such as R1, R2 as mentioned in * Quantile page(wikipedia), * a type specific NaN handling strategy is used to closely match with the * typically observed results from popular tools like R(R1-R9), Excel(R7).

*

* Since 2.2, Percentile uses only selection instead of complete sorting * and caches selection algorithm state between calls to the various * {@code evaluate} methods. This greatly improves efficiency, both for a single * percentile and multiple percentile computations. To maximize performance when * multiple percentiles are computed based on the same data, users should set the * data array once using either one of the {@link #evaluate(double[], double)} or * {@link #setData(double[])} methods and thereafter {@link #evaluate(double)} * with just the percentile provided. *

*

* Note that this implementation is not synchronized. If * multiple threads access an instance of this class concurrently, and at least * one of the threads invokes the increment() or * clear() method, it must be synchronized externally.

*/ public class Percentile extends AbstractUnivariateStatistic implements Serializable { /** Serializable version identifier */ private static final long serialVersionUID = 20150412L; /** Maximum number of partitioning pivots cached (each level double the number of pivots). */ private static final int MAX_CACHED_LEVELS = 10; /** Maximum number of cached pivots in the pivots cached array */ private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1; /** Default KthSelector used with default pivoting strategy */ private final KthSelector kthSelector; /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */ private final EstimationType estimationType; /** NaN Handling of the input as defined by {@link NaNStrategy} */ private final NaNStrategy nanStrategy; /** * Determines what percentile is computed when evaluate() is activated * with no quantile argument. */ private double quantile; /** Cached pivots. */ private int[] cachedPivots; /** * Constructs a Percentile with the following defaults. *
    *
  • default quantile: 50.0, can be reset with {@link #setQuantile(double)}
  • *
  • default estimation type: {@link EstimationType#LEGACY}, * can be reset with {@link #withEstimationType(EstimationType)}
  • *
  • default NaN strategy: {@link NaNStrategy#REMOVED}, * can be reset with {@link #withNaNStrategy(NaNStrategy)}
  • *
  • a KthSelector that makes use of {@link MedianOf3PivotingStrategy}, * can be reset with {@link #withKthSelector(KthSelector)}
  • *
*/ public Percentile() { // No try-catch or advertised exception here - arg is valid this(50.0); } /** * Constructs a Percentile with the specific quantile value and the following *
    *
  • default method type: {@link EstimationType#LEGACY}
  • *
  • default NaN strategy: {@link NaNStrategy#REMOVED}
  • *
  • a Kth Selector : {@link KthSelector}
  • *
* @param quantile the quantile * @throws MathIllegalArgumentException if p is not greater than 0 and less * than or equal to 100 */ public Percentile(final double quantile) throws MathIllegalArgumentException { this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED, new KthSelector(new MedianOf3PivotingStrategy())); } /** * Copy constructor, creates a new {@code Percentile} identical * to the {@code original} * * @param original the {@code Percentile} instance to copy * @throws NullArgumentException if original is null */ public Percentile(final Percentile original) throws NullArgumentException { MathUtils.checkNotNull(original); estimationType = original.getEstimationType(); nanStrategy = original.getNaNStrategy(); kthSelector = original.getKthSelector(); setData(original.getDataRef()); if (original.cachedPivots != null) { System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length); } setQuantile(original.quantile); } /** * Constructs a Percentile with the specific quantile value, * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}. * * @param quantile the quantile to be computed * @param estimationType one of the percentile {@link EstimationType estimation types} * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs * @param kthSelector a {@link KthSelector} to use for pivoting during search * @throws MathIllegalArgumentException if p is not within (0,100] * @throws NullArgumentException if type or NaNStrategy passed is null */ protected Percentile(final double quantile, final EstimationType estimationType, final NaNStrategy nanStrategy, final KthSelector kthSelector) throws MathIllegalArgumentException { setQuantile(quantile); cachedPivots = null; MathUtils.checkNotNull(estimationType); MathUtils.checkNotNull(nanStrategy); MathUtils.checkNotNull(kthSelector); this.estimationType = estimationType; this.nanStrategy = nanStrategy; this.kthSelector = kthSelector; } /** {@inheritDoc} */ @Override public void setData(final double[] values) { if (values == null) { cachedPivots = null; } else { cachedPivots = new int[PIVOTS_HEAP_LENGTH]; Arrays.fill(cachedPivots, -1); } super.setData(values); } /** {@inheritDoc} */ @Override public void setData(final double[] values, final int begin, final int length) throws MathIllegalArgumentException { if (values == null) { cachedPivots = null; } else { cachedPivots = new int[PIVOTS_HEAP_LENGTH]; Arrays.fill(cachedPivots, -1); } super.setData(values, begin, length); } /** * Returns the result of evaluating the statistic over the stored data. *

* The stored array is the one which was set by previous calls to * {@link #setData(double[])} *

* @param p the percentile value to compute * @return the value of the statistic applied to the stored data * @throws MathIllegalArgumentException if p is not a valid quantile value * (p must be greater than 0 and less than or equal to 100) */ public double evaluate(final double p) throws MathIllegalArgumentException { return evaluate(getDataRef(), p); } /** * Returns an estimate of the pth percentile of the values * in the values array. *

* Calls to this method do not modify the internal quantile * state of this statistic.

*
    *
  • Returns Double.NaN if values has length * 0
  • *
  • Returns (for any value of p) values[0] * if values has length 1
  • *
  • Throws MathIllegalArgumentException if values * is null or p is not a valid quantile value (p must be greater than 0 * and less than or equal to 100)
  • *
*

* See {@link Percentile} for a description of the percentile estimation * algorithm used.

* * @param values input array of values * @param p the percentile value to compute * @return the percentile value or Double.NaN if the array is empty * @throws MathIllegalArgumentException if values is null or p is invalid */ public double evaluate(final double[] values, final double p) throws MathIllegalArgumentException { MathArrays.verifyValues(values, 0, 0); return evaluate(values, 0, values.length, p); } /** * Returns an estimate of the quantileth percentile of the * designated values in the values array. The quantile * estimated is determined by the quantile property. *
    *
  • Returns Double.NaN if length = 0
  • *
  • Returns (for any value of quantile) * values[begin] if length = 1
  • *
  • Throws MathIllegalArgumentException if values * is null, or start or length is invalid
  • *
*

* See {@link Percentile} for a description of the percentile estimation * algorithm used.

* * @param values the input array * @param start index of the first array element to include * @param length the number of elements to include * @return the percentile value * @throws MathIllegalArgumentException if the parameters are not valid * */ @Override public double evaluate(final double[] values, final int start, final int length) throws MathIllegalArgumentException { return evaluate(values, start, length, quantile); } /** * Returns an estimate of the pth percentile of the values * in the values array, starting with the element in (0-based) * position begin in the array and including length * values. *

* Calls to this method do not modify the internal quantile * state of this statistic.

*
    *
  • Returns Double.NaN if length = 0
  • *
  • Returns (for any value of p) values[begin] * if length = 1
  • *
  • Throws MathIllegalArgumentException if values * is null , begin or length is invalid, or * p is not a valid quantile value (p must be greater than 0 * and less than or equal to 100)
  • *
*

* See {@link Percentile} for a description of the percentile estimation * algorithm used.

* * @param values array of input values * @param p the percentile to compute * @param begin the first (0-based) element to include in the computation * @param length the number of array elements to include * @return the percentile value * @throws MathIllegalArgumentException if the parameters are not valid or the * input array is null */ public double evaluate(final double[] values, final int begin, final int length, final double p) throws MathIllegalArgumentException { MathArrays.verifyValues(values, begin, length); if (p > 100 || p <= 0) { throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100); } if (length == 0) { return Double.NaN; } if (length == 1) { return values[begin]; // always return single value for n = 1 } final double[] work = getWorkArray(values, begin, length); final int[] pivotsHeap = getPivots(values); return work.length == 0 ? Double.NaN : estimationType.evaluate(work, pivotsHeap, p, kthSelector); } /** * Returns the value of the quantile field (determines what percentile is * computed when evaluate() is called with no quantile argument). * * @return quantile set while construction or {@link #setQuantile(double)} */ public double getQuantile() { return quantile; } /** * Sets the value of the quantile field (determines what percentile is * computed when evaluate() is called with no quantile argument). * * @param p a value between 0 < p <= 100 * @throws MathIllegalArgumentException if p is not greater than 0 and less * than or equal to 100 */ public void setQuantile(final double p) throws MathIllegalArgumentException { if (p <= 0 || p > 100) { throw new OutOfRangeException( LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100); } quantile = p; } /** * {@inheritDoc} */ @Override public Percentile copy() { return new Percentile(this); } /** * Get the work array to operate. Makes use of prior {@code storedData} if * it exists or else do a check on NaNs and copy a subset of the array * defined by begin and length parameters. The set {@link #nanStrategy} will * be used to either retain/remove/replace any NaNs present before returning * the resultant array. * * @param values the array of numbers * @param begin index to start reading the array * @param length the length of array to be read from the begin index * @return work array sliced from values in the range [begin,begin+length) * @throws MathIllegalArgumentException if values or indices are invalid */ protected double[] getWorkArray(final double[] values, final int begin, final int length) { final double[] work; if (values == getDataRef()) { work = getDataRef(); } else { switch (nanStrategy) { case MAXIMAL:// Replace NaNs with +INFs work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY); break; case MINIMAL:// Replace NaNs with -INFs work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY); break; case REMOVED:// Drop NaNs from data work = removeAndSlice(values, begin, length, Double.NaN); break; case FAILED:// just throw exception as NaN is un-acceptable work = copyOf(values, begin, length); MathArrays.checkNotNaN(work); break; default: //FIXED work = copyOf(values,begin,length); break; } } return work; } /** * Make a copy of the array for the slice defined by array part from * [begin, begin+length) * @param values the input array * @param begin start index of the array to include * @param length number of elements to include from begin * @return copy of a slice of the original array */ private static double[] copyOf(final double[] values, final int begin, final int length) { MathArrays.verifyValues(values, begin, length); return MathArrays.copyOfRange(values, begin, begin + length); } /** * Replace every occurrence of a given value with a replacement value in a * copied slice of array defined by array part from [begin, begin+length). * @param values the input array * @param begin start index of the array to include * @param length number of elements to include from begin * @param original the value to be replaced with * @param replacement the value to be used for replacement * @return the copy of sliced array with replaced values */ private static double[] replaceAndSlice(final double[] values, final int begin, final int length, final double original, final double replacement) { final double[] temp = copyOf(values, begin, length); for(int i = 0; i < length; i++) { temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ? replacement : temp[i]; } return temp; } /** * Remove the occurrence of a given value in a copied slice of array * defined by the array part from [begin, begin+length). * @param values the input array * @param begin start index of the array to include * @param length number of elements to include from begin * @param removedValue the value to be removed from the sliced array * @return the copy of the sliced array after removing the removedValue */ private static double[] removeAndSlice(final double[] values, final int begin, final int length, final double removedValue) { MathArrays.verifyValues(values, begin, length); final double[] temp; //BitSet(length) to indicate where the removedValue is located final BitSet bits = new BitSet(length); for (int i = begin; i < begin+length; i++) { if (Precision.equalsIncludingNaN(removedValue, values[i])) { bits.set(i - begin); } } //Check if empty then create a new copy if (bits.isEmpty()) { temp = copyOf(values, begin, length); // Nothing removed, just copy } else if(bits.cardinality() == length) { temp = new double[0]; // All removed, just empty }else { // Some removable, so new temp = new double[length - bits.cardinality()]; int start = begin; //start index from source array (i.e values) int dest = 0; //dest index in destination array(i.e temp) int nextOne = -1; //nextOne is the index of bit set of next one int bitSetPtr = 0; //bitSetPtr is start index pointer of bitset while ((nextOne = bits.nextSetBit(bitSetPtr)) != -1) { final int lengthToCopy = nextOne - bitSetPtr; System.arraycopy(values, start, temp, dest, lengthToCopy); dest += lengthToCopy; start = begin + (bitSetPtr = bits.nextClearBit(nextOne)); } //Copy any residue past start index till begin+length if (start < begin + length) { System.arraycopy(values,start,temp,dest,begin + length - start); } } return temp; } /** * Get pivots which is either cached or a newly created one * * @param values array containing the input numbers * @return cached pivots or a newly created one */ private int[] getPivots(final double[] values) { final int[] pivotsHeap; if (values == getDataRef()) { pivotsHeap = cachedPivots; } else { pivotsHeap = new int[PIVOTS_HEAP_LENGTH]; Arrays.fill(pivotsHeap, -1); } return pivotsHeap; } /** * Get the estimation {@link EstimationType type} used for computation. * * @return the {@code estimationType} set */ public EstimationType getEstimationType() { return estimationType; } /** * Build a new instance similar to the current one except for the * {@link EstimationType estimation type}. *

* This method is intended to be used as part of a fluent-type builder * pattern. Building finely tune instances should be done as follows: *

*
     *   Percentile customized = new Percentile(quantile).
     *                           withEstimationType(estimationType).
     *                           withNaNStrategy(nanStrategy).
     *                           withKthSelector(kthSelector);
     * 
*

* If any of the {@code withXxx} method is omitted, the default value for * the corresponding customization parameter will be used. *

* @param newEstimationType estimation type for the new instance * @return a new instance, with changed estimation type * @throws NullArgumentException when newEstimationType is null */ public Percentile withEstimationType(final EstimationType newEstimationType) { return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector); } /** * Get the {@link NaNStrategy NaN Handling} strategy used for computation. * @return {@code NaN Handling} strategy set during construction */ public NaNStrategy getNaNStrategy() { return nanStrategy; } /** * Build a new instance similar to the current one except for the * {@link NaNStrategy NaN handling} strategy. *

* This method is intended to be used as part of a fluent-type builder * pattern. Building finely tune instances should be done as follows: *

*
     *   Percentile customized = new Percentile(quantile).
     *                           withEstimationType(estimationType).
     *                           withNaNStrategy(nanStrategy).
     *                           withKthSelector(kthSelector);
     * 
*

* If any of the {@code withXxx} method is omitted, the default value for * the corresponding customization parameter will be used. *

* @param newNaNStrategy NaN strategy for the new instance * @return a new instance, with changed NaN handling strategy * @throws NullArgumentException when newNaNStrategy is null */ public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) { return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector); } /** * Get the {@link KthSelector kthSelector} used for computation. * @return the {@code kthSelector} set */ public KthSelector getKthSelector() { return kthSelector; } /** * Get the {@link PivotingStrategyInterface} used in KthSelector for computation. * @return the pivoting strategy set */ public PivotingStrategyInterface getPivotingStrategy() { return kthSelector.getPivotingStrategy(); } /** * Build a new instance similar to the current one except for the * {@link KthSelector kthSelector} instance specifically set. *

* This method is intended to be used as part of a fluent-type builder * pattern. Building finely tune instances should be done as follows: *

*
     *   Percentile customized = new Percentile(quantile).
     *                           withEstimationType(estimationType).
     *                           withNaNStrategy(nanStrategy).
     *                           withKthSelector(newKthSelector);
     * 
*

* If any of the {@code withXxx} method is omitted, the default value for * the corresponding customization parameter will be used. *

* @param newKthSelector KthSelector for the new instance * @return a new instance, with changed KthSelector * @throws NullArgumentException when newKthSelector is null */ public Percentile withKthSelector(final KthSelector newKthSelector) { return new Percentile(quantile, estimationType, nanStrategy, newKthSelector); } /** * An enum for various estimation strategies of a percentile referred in * wikipedia on quantile * with the names of enum matching those of types mentioned in * wikipedia. *

* Each enum corresponding to the specific type of estimation in wikipedia * implements the respective formulae that specializes in the below aspects *

    *
  • An index method to calculate approximate index of the * estimate
  • *
  • An estimate method to estimate a value found at the earlier * computed index
  • *
  • A minLimit on the quantile for which first element of sorted * input is returned as an estimate
  • *
  • A maxLimit on the quantile for which last element of sorted * input is returned as an estimate
  • *
*

* Users can now create {@link Percentile} by explicitly passing this enum; * such as by invoking {@link Percentile#withEstimationType(EstimationType)} *

* References: *

    *
  1. * Wikipedia on quantile *
  2. *
  3. * * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical * packages, American Statistician 50, 361–365
  4. *
  5. * * R-Manual
  6. *
*/ public enum EstimationType { /** * This is the default type used in the {@link Percentile}.This method * has the following formulae for index and estimates
* \( \begin{align} * &index = (N+1)p\ \\ * &estimate = x_{\lceil h\,-\,1/2 \rceil} \\ * &minLimit = 0 \\ * &maxLimit = 1 \\ * \end{align}\) */ LEGACY("Legacy Apache Commons Math") { /** * {@inheritDoc}.This method in particular makes use of existing * Apache Commons Math style of picking up the index. */ @Override protected double index(final double p, final int length) { final double minLimit = 0d; final double maxLimit = 1d; return Double.compare(p, minLimit) == 0 ? 0 : Double.compare(p, maxLimit) == 0 ? length : p * (length + 1); } }, /** * The method R_1 has the following formulae for index and estimates
* \( \begin{align} * &index= Np + 1/2\, \\ * &estimate= x_{\lceil h\,-\,1/2 \rceil} \\ * &minLimit = 0 \\ * \end{align}\) */ R_1("R-1") { @Override protected double index(final double p, final int length) { final double minLimit = 0d; return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5; } /** * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5) */ @Override protected double estimate(final double[] values, final int[] pivotsHeap, final double pos, final int length, final KthSelector selector) { return super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector); } }, /** * The method R_2 has the following formulae for index and estimates
* \( \begin{align} * &index= Np + 1/2\, \\ * &estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} + * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\ * &minLimit = 0 \\ * &maxLimit = 1 \\ * \end{align}\) */ R_2("R-2") { @Override protected double index(final double p, final int length) { final double minLimit = 0d; final double maxLimit = 1d; return Double.compare(p, maxLimit) == 0 ? length : Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5; } /** * {@inheritDoc}This method in particular for R_2 averages the * values at ceil(p+0.5) and floor(p-0.5). */ @Override protected double estimate(final double[] values, final int[] pivotsHeap, final double pos, final int length, final KthSelector selector) { final double low = super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector); final double high = super.estimate(values, pivotsHeap,FastMath.floor(pos + 0.5), length, selector); return (low + high) / 2; } }, /** * The method R_3 has the following formulae for index and estimates
* \( \begin{align} * &index= Np \\ * &estimate= x_{\lfloor h \rceil}\, \\ * &minLimit = 0.5/N \\ * \end{align}\) */ R_3("R-3") { @Override protected double index(final double p, final int length) { final double minLimit = 1d/2 / length; return Double.compare(p, minLimit) <= 0 ? 0 : FastMath.rint(length * p); } }, /** * The method R_4 has the following formulae for index and estimates
* \( \begin{align} * &index= Np\, \\ * &estimate= x_{\lfloor h \rfloor} + (h - * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h * \rfloor}) \\ * &minLimit = 1/N \\ * &maxLimit = 1 \\ * \end{align}\) */ R_4("R-4") { @Override protected double index(final double p, final int length) { final double minLimit = 1d / length; final double maxLimit = 1d; return Double.compare(p, minLimit) < 0 ? 0 : Double.compare(p, maxLimit) == 0 ? length : length * p; } }, /** * The method R_5 has the following formulae for index and estimates
* \( \begin{align} * &index= Np + 1/2\\ * &estimate= x_{\lfloor h \rfloor} + (h - * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h * \rfloor}) \\ * &minLimit = 0.5/N \\ * &maxLimit = (N-0.5)/N * \end{align}\) */ R_5("R-5") { @Override protected double index(final double p, final int length) { final double minLimit = 1d/2 / length; final double maxLimit = (length - 0.5) / length; return Double.compare(p, minLimit) < 0 ? 0 : Double.compare(p, maxLimit) >= 0 ? length : length * p + 0.5; } }, /** * The method R_6 has the following formulae for index and estimates
* \( \begin{align} * &index= (N + 1)p \\ * &estimate= x_{\lfloor h \rfloor} + (h - * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h * \rfloor}) \\ * &minLimit = 1/(N+1) \\ * &maxLimit = N/(N+1) \\ * \end{align}\) *

* Note: This method computes the index in a manner very close to * the default Commons Math Percentile existing implementation. However * the difference to be noted is in picking up the limits with which * first element (p<1(N+1)) and last elements (p>N/(N+1))are done. * While in default case; these are done with p=0 and p=1 respectively. */ R_6("R-6") { @Override protected double index(final double p, final int length) { final double minLimit = 1d / (length + 1); final double maxLimit = 1d * length / (length + 1); return Double.compare(p, minLimit) < 0 ? 0 : Double.compare(p, maxLimit) >= 0 ? length : (length + 1) * p; } }, /** * The method R_7 implements Microsoft Excel style computation has the * following formulae for index and estimates.
* \( \begin{align} * &index = (N-1)p + 1 \\ * &estimate = x_{\lfloor h \rfloor} + (h - * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h * \rfloor}) \\ * &minLimit = 0 \\ * &maxLimit = 1 \\ * \end{align}\) */ R_7("R-7") { @Override protected double index(final double p, final int length) { final double minLimit = 0d; final double maxLimit = 1d; return Double.compare(p, minLimit) == 0 ? 0 : Double.compare(p, maxLimit) == 0 ? length : 1 + (length - 1) * p; } }, /** * The method R_8 has the following formulae for index and estimates
* \( \begin{align} * &index = (N + 1/3)p + 1/3 \\ * &estimate = x_{\lfloor h \rfloor} + (h - \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h * \rfloor}) \\ * &minLimit = (2/3)/(N+1/3) \\ * &maxLimit = (N-1/3)/(N+1/3) \\ * \end{align}\) *

* As per Ref [2,3] this approach is most recommended as it provides * an approximate median-unbiased estimate regardless of distribution. */ R_8("R-8") { @Override protected double index(final double p, final int length) { final double minLimit = 2 * (1d / 3) / (length + 1d / 3); final double maxLimit = (length - 1d / 3) / (length + 1d / 3); return Double.compare(p, minLimit) < 0 ? 0 : Double.compare(p, maxLimit) >= 0 ? length : (length + 1d / 3) * p + 1d / 3; } }, /** * The method R_9 has the following formulae for index and estimates
* \( \begin{align} * &index = (N + 1/4)p + 3/8\\ * &estimate = x_{\lfloor h \rfloor} + (h - \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h * \rfloor}) \\ * &minLimit = (5/8)/(N+1/4) \\ * &maxLimit = (N-3/8)/(N+1/4) \\ * \end{align}\) */ R_9("R-9") { @Override protected double index(final double p, final int length) { final double minLimit = 5d/8 / (length + 0.25); final double maxLimit = (length - 3d/8) / (length + 0.25); return Double.compare(p, minLimit) < 0 ? 0 : Double.compare(p, maxLimit) >= 0 ? length : (length + 0.25) * p + 3d/8; } }, ; /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */ private final String name; /** * Constructor * * @param type name of estimation type as per wikipedia */ EstimationType(final String type) { this.name = type; } /** * Finds the index of array that can be used as starting index to * {@link #estimate(double[], int[], double, int, KthSelector) estimate} * percentile. The calculation of index calculation is specific to each * {@link EstimationType}. * * @param p the pth quantile * @param length the total number of array elements in the work array * @return a computed real valued index as explained in the wikipedia */ protected abstract double index(final double p, final int length); /** * Estimation based on Kth selection. This may be overridden * in specific enums to compute slightly different estimations. * * @param work array of numbers to be used for finding the percentile * @param pos indicated positional index prior computed from calling * {@link #index(double, int)} * @param pivotsHeap an earlier populated cache if exists; will be used * @param length size of array considered * @param selector a {@link KthSelector} used for pivoting during search * @return estimated percentile */ protected double estimate(final double[] work, final int[] pivotsHeap, final double pos, final int length, final KthSelector selector) { final double fpos = FastMath.floor(pos); final int intPos = (int) fpos; final double dif = pos - fpos; if (pos < 1) { return selector.select(work, pivotsHeap, 0); } if (pos >= length) { return selector.select(work, pivotsHeap, length - 1); } final double lower = selector.select(work, pivotsHeap, intPos - 1); final double upper = selector.select(work, pivotsHeap, intPos); return lower + dif * (upper - lower); } /** * Evaluate method to compute the percentile for a given bounded array * using earlier computed pivots heap.
* This basically calls the {@link #index(double, int) index} and then * {@link #estimate(double[], int[], double, int, KthSelector) estimate} * functions to return the estimated percentile value. * * @param work array of numbers to be used for finding the percentile * @param pivotsHeap a prior cached heap which can speed up estimation * @param p the pth quantile to be computed * @param selector a {@link KthSelector} used for pivoting during search * @return estimated percentile * @throws OutOfRangeException if p is out of range * @throws NullArgumentException if work array is null */ protected double evaluate(final double[] work, final int[] pivotsHeap, final double p, final KthSelector selector) { MathUtils.checkNotNull(work); if (p > 100 || p <= 0) { throw new OutOfRangeException(LocalizedFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100); } return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector); } /** * Evaluate method to compute the percentile for a given bounded array. * This basically calls the {@link #index(double, int) index} and then * {@link #estimate(double[], int[], double, int, KthSelector) estimate} * functions to return the estimated percentile value. Please * note that this method does not make use of cached pivots. * * @param work array of numbers to be used for finding the percentile * @param p the pth quantile to be computed * @return estimated percentile * @param selector a {@link KthSelector} used for pivoting during search * @throws OutOfRangeException if length or p is out of range * @throws NullArgumentException if work array is null */ public double evaluate(final double[] work, final double p, final KthSelector selector) { return this.evaluate(work, null, p, selector); } /** * Gets the name of the enum * * @return the name */ String getName() { return name; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy