All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.isi.nlp.math.PercentileComputer Maven / Gradle / Ivy

The newest version!
package edu.isi.nlp.math;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.Beta;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Doubles;
import java.util.Arrays;
import java.util.List;

/**
 * Computes percentiles. There are multiple ways of computing them, so you will need to choose a
 * {@link #nistPercentileComputer()} or {@link #excelPercentileComputer()}.
 *
 * 

This may be at least partially superseded by Guava's {@code Quantiles} when we update to Guava * 21. */ @Beta public final class PercentileComputer { private final Algorithm algorithm; private PercentileComputer(Algorithm algorithm) { this.algorithm = checkNotNull(algorithm); } /** * Creates a {@code PercentileComputer} which uses the algorithm given in NIST's Engineering * Statistics Handbook. The relevant section is copied below: * *

* * Percentiles can be estimated from {@code N} measurements as follows: for the pth percentile, * set p(N+1) equal to k + d for k an integer, and d, a fraction greater than or equal to 0 and * less than 1. * *
    *
  • For 0 < k < N, Y(p) = Y[k] + d(Y[k+1] - Y[k]) *
  • For k = 0, Y(p) = Y[1] *
  • For k = N, Y(p) = Y[N] *
* *
* * Note NIST is using 1-based indexing. */ public static PercentileComputer nistPercentileComputer() { return new PercentileComputer(Algorithm.NIST); } /** * Creates a {@code PercentileComputer} which uses the "Excel" alternative algorithm given in * NIST's Engineering Statistics Handbook. It is the same as the base NIST algorithm, except it * assigns (k,d) to the integral and fractional parts of p(N-1)+1 instead of p(N+1). */ public static PercentileComputer excelPercentileComputer() { return new PercentileComputer(Algorithm.EXCEL); } /** Computes percentiles for {@code data}, assuming it will not be externally modified. */ public Percentiles calculatePercentilesAdoptingData(double[] data) { return new Percentiles(algorithm, data); } /** Computes percentiles for {@code data}, making a copy in case it is modified externally. */ public Percentiles calculatePercentilesCopyingData(double[] data) { return new Percentiles(algorithm, data.clone()); } // these may assume percentile is valid and data non-empty private enum Algorithm { NIST { @Override public double computePercentile(double percentile, double[] data) { final int N = data.length; final double rank = percentile * (N + 1); final int k = (int) rank; final double d = rank - k; if (k == 0) { return data[0]; } else if (k == N) { return data[N - 1]; } else { // we subtract 1 when looking up because NIST uses 1-based indexing final double yK = data[k - 1]; final double yKPlusOne = data[k]; return yK + d * (yKPlusOne - yK); } } }, EXCEL { @Override public double computePercentile(double percentile, double[] data) { final int N = data.length; final double rank = percentile * (N - 1) + 1; final int k = (int) rank; final double d = rank - k; if (k == 0) { return data[0]; } else if (k == N) { return data[N - 1]; } else { // we subtract 1 when looking up because NIST uses 1-based indexing final double yK = data[k - 1]; final double yKPlusOne = data[k]; return yK + d * (yKPlusOne - yK); } } }; public abstract double computePercentile(double percentile, double[] data); } /** * This represents the computation of percentiles on a data set. It can be queried for various * percentile-related information. * *

Most things returned are {@link Optional} to force the user to deal with the case of empty * data. */ public static final class Percentiles { @JsonProperty("algorithm") private final Algorithm algorithm; @JsonProperty("data") final double[] data; @JsonCreator Percentiles( @JsonProperty("algorithm") Algorithm algorithm, @JsonProperty("data") double[] data) { this.algorithm = checkNotNull(algorithm); this.data = data; Arrays.sort(data); } public int numObservedValues() { return data.length; } public Optional median() { if (data.length == 0) { return Optional.absent(); } if (data.length % 2 == 0) { // if we have an event number of elements, return the mean of the two // middle element return Optional.of(0.5 * ((data[data.length / 2] + data[data.length / 2 - 1]))); } else { // if we have an odd number of elements, return the unique middle element return Optional.of(data[data.length / 2]); } } public Optional min() { if (data.length == 0) { return Optional.absent(); } return Optional.of(data[0]); } public Optional max() { if (data.length == 0) { return Optional.absent(); } return Optional.of(data[data.length - 1]); } /** * Calculates the p-th percentile of the observed data. The algorithm used varies depending on * what {@link PercentileComputer} generated this data. If no data was observed, this will throw * a {@link java.util.NoSuchElementException}. This method takes time linear in the number of * observed values. * * @param p Must be in [0.0, 1.0) */ public Optional percentile(double p) { checkArgument(p >= 0.0 && p < 1.0, "Percentiles must be in [0.0, 1.0)"); if (data.length == 0) { return Optional.absent(); } return Optional.of(algorithm.computePercentile(p, data)); } public List> percentiles(Iterable percentilesToGet) { final ImmutableList.Builder> ret = ImmutableList.builder(); for (final double percentile : percentilesToGet) { ret.add(percentile(percentile)); } return ret.build(); } public List rawData() { return Doubles.asList(data); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy