All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.math4.stat.ranking.NaturalRanking Maven / Gradle / Ivy

Go to download

Statistical sampling library for use in virtdata libraries, based on apache commons math 4

There is a newer version: 5.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.math4.stat.ranking;

import org.apache.commons.math4.exception.MathInternalError;
import org.apache.commons.math4.exception.NotANumberException;
import org.apache.commons.math4.random.RandomUtils;
import org.apache.commons.math4.util.FastMath;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.simple.RandomSource;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;


/**
 * Ranking based on the natural ordering on doubles.
 *
 * 

NaNs are treated according to the configured {@link NaNStrategy} and ties * are handled using the selected {@link TiesStrategy}. * Configuration settings are supplied in optional constructor arguments. * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE}, * respectively. When using {@link TiesStrategy#RANDOM}, a * {@link UniformRandomProvider random generator} may be supplied as a * constructor argument.

*

Examples: *

* * * * * * * * * * * * * * * * * * * * * * * * *
rankings
* Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17) *
NaNStrategyTiesStrategyrank(data)
default (NaNs maximal)default (ties averaged)(5, 3, 6, 7, 3, 8, 9, 1, 3)
default (NaNs maximal)MINIMUM(5, 2, 6, 7, 2, 8, 9, 1, 2)
MINIMALdefault (ties averaged)(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)
REMOVEDSEQUENTIAL(5, 2, 6, 7, 3, 8, 1, 4)
MINIMALMAXIMUM(6, 5, 7, 8, 5, 9, 2, 2, 5)
* * @since 2.0 */ public class NaturalRanking implements RankingAlgorithm { /** default NaN strategy */ public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED; /** default ties strategy */ public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE; /** NaN strategy - defaults to NaNs maximal */ private final NaNStrategy nanStrategy; /** Ties strategy - defaults to ties averaged */ private final TiesStrategy tiesStrategy; /** Source of random data - used only when ties strategy is RANDOM */ private final RandomUtils.DataGenerator randomData; /** * Create a NaturalRanking with default strategies for handling ties and NaNs. */ public NaturalRanking() { this(DEFAULT_NAN_STRATEGY, DEFAULT_TIES_STRATEGY, null); } /** * Create a NaturalRanking with the given TiesStrategy. * * @param tiesStrategy the TiesStrategy to use */ public NaturalRanking(TiesStrategy tiesStrategy) { this(DEFAULT_NAN_STRATEGY, tiesStrategy, RandomSource.create(RandomSource.WELL_19937_C)); } /** * Create a NaturalRanking with the given NaNStrategy. * * @param nanStrategy the NaNStrategy to use */ public NaturalRanking(NaNStrategy nanStrategy) { this(nanStrategy, DEFAULT_TIES_STRATEGY, null); } /** * Create a NaturalRanking with the given NaNStrategy and TiesStrategy. * * @param nanStrategy NaNStrategy to use * @param tiesStrategy TiesStrategy to use */ public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) { this(nanStrategy, tiesStrategy, RandomSource.create(RandomSource.WELL_19937_C)); } /** * Create a NaturalRanking with TiesStrategy.RANDOM and the given * random generator as the source of random data. * * @param randomGenerator source of random data */ public NaturalRanking(UniformRandomProvider randomGenerator) { this(DEFAULT_NAN_STRATEGY, TiesStrategy.RANDOM, randomGenerator); } /** * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM * and the given source of random data. * * @param nanStrategy NaNStrategy to use * @param randomGenerator source of random data */ public NaturalRanking(NaNStrategy nanStrategy, UniformRandomProvider randomGenerator) { this(nanStrategy, TiesStrategy.RANDOM, randomGenerator); } /** * @param nanStrategy NaN strategy. * @param tiesStrategy Tie strategy. * @param randomGenerator RNG. */ private NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy, UniformRandomProvider randomGenerator) { this.nanStrategy = nanStrategy; this.tiesStrategy = tiesStrategy; randomData = RandomUtils.createDataGenerator(randomGenerator); } /** * Return the NaNStrategy * * @return returns the NaNStrategy */ public NaNStrategy getNanStrategy() { return nanStrategy; } /** * Return the TiesStrategy * * @return the TiesStrategy */ public TiesStrategy getTiesStrategy() { return tiesStrategy; } /** * Rank data using the natural ordering on Doubles, with * NaN values handled according to nanStrategy and ties * resolved using tiesStrategy. * * @param data array to be ranked * @return array of ranks * @throws NotANumberException if the selected {@link NaNStrategy} is {@code FAILED} * and a {@link Double#NaN} is encountered in the input data */ @Override public double[] rank(double[] data) { // Array recording initial positions of data to be ranked IntDoublePair[] ranks = new IntDoublePair[data.length]; for (int i = 0; i < data.length; i++) { ranks[i] = new IntDoublePair(data[i], i); } // Recode, remove or record positions of NaNs List nanPositions = null; switch (nanStrategy) { case MAXIMAL: // Replace NaNs with +INFs recodeNaNs(ranks, Double.POSITIVE_INFINITY); break; case MINIMAL: // Replace NaNs with -INFs recodeNaNs(ranks, Double.NEGATIVE_INFINITY); break; case REMOVED: // Drop NaNs from data ranks = removeNaNs(ranks); break; case FIXED: // Record positions of NaNs nanPositions = getNanPositions(ranks); break; case FAILED: nanPositions = getNanPositions(ranks); if (nanPositions.size() > 0) { throw new NotANumberException(); } break; default: // this should not happen unless NaNStrategy enum is changed throw new MathInternalError(); } // Sort the IntDoublePairs Arrays.sort(ranks); // Walk the sorted array, filling output array using sorted positions, // resolving ties as we go double[] out = new double[ranks.length]; int pos = 1; // position in sorted array out[ranks[0].getPosition()] = pos; List tiesTrace = new ArrayList<>(); tiesTrace.add(ranks[0].getPosition()); for (int i = 1; i < ranks.length; i++) { if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) { // tie sequence has ended (or had length 1) pos = i + 1; if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve resolveTie(out, tiesTrace); } tiesTrace = new ArrayList<>(); tiesTrace.add(ranks[i].getPosition()); } else { // tie sequence continues tiesTrace.add(ranks[i].getPosition()); } out[ranks[i].getPosition()] = pos; } if (tiesTrace.size() > 1) { // handle tie sequence at end resolveTie(out, tiesTrace); } if (nanStrategy == NaNStrategy.FIXED) { restoreNaNs(out, nanPositions); } return out; } /** * Returns an array that is a copy of the input array with IntDoublePairs * having NaN values removed. * * @param ranks input array * @return array with NaN-valued entries removed */ private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) { if (!containsNaNs(ranks)) { return ranks; } IntDoublePair[] outRanks = new IntDoublePair[ranks.length]; int j = 0; for (int i = 0; i < ranks.length; i++) { if (Double.isNaN(ranks[i].getValue())) { // drop, but adjust original ranks of later elements for (int k = i + 1; k < ranks.length; k++) { ranks[k] = new IntDoublePair( ranks[k].getValue(), ranks[k].getPosition() - 1); } } else { outRanks[j] = new IntDoublePair( ranks[i].getValue(), ranks[i].getPosition()); j++; } } IntDoublePair[] returnRanks = new IntDoublePair[j]; System.arraycopy(outRanks, 0, returnRanks, 0, j); return returnRanks; } /** * Recodes NaN values to the given value. * * @param ranks array to recode * @param value the value to replace NaNs with */ private void recodeNaNs(IntDoublePair[] ranks, double value) { for (int i = 0; i < ranks.length; i++) { if (Double.isNaN(ranks[i].getValue())) { ranks[i] = new IntDoublePair( value, ranks[i].getPosition()); } } } /** * Checks for presence of NaNs in ranks. * * @param ranks array to be searched for NaNs * @return true iff ranks contains one or more NaNs */ private boolean containsNaNs(IntDoublePair[] ranks) { for (int i = 0; i < ranks.length; i++) { if (Double.isNaN(ranks[i].getValue())) { return true; } } return false; } /** * Resolve a sequence of ties, using the configured {@link TiesStrategy}. * The input ranks array is expected to take the same value * for all indices in tiesTrace. The common value is recoded * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>, * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged. * The same array and trace with tiesStrategy AVERAGE will come out * <5,8,3,6,3,7,1,3>. * * @param ranks array of ranks * @param tiesTrace list of indices where ranks is constant * -- that is, for any i and j in TiesTrace, ranks[i] == ranks[j] * */ private void resolveTie(double[] ranks, List tiesTrace) { // constant value of ranks over tiesTrace final double c = ranks[tiesTrace.get(0)]; // length of sequence of tied ranks final int length = tiesTrace.size(); switch (tiesStrategy) { case AVERAGE: // Replace ranks with average fill(ranks, tiesTrace, (2 * c + length - 1) / 2d); break; case MAXIMUM: // Replace ranks with maximum values fill(ranks, tiesTrace, c + length - 1); break; case MINIMUM: // Replace ties with minimum fill(ranks, tiesTrace, c); break; case RANDOM: // Fill with random integral values in [c, c + length - 1] Iterator iterator = tiesTrace.iterator(); long f = FastMath.round(c); while (iterator.hasNext()) { // No advertised exception because args are guaranteed valid ranks[iterator.next()] = randomData.nextLong(f, f + length - 1); } break; case SEQUENTIAL: // Fill sequentially from c to c + length - 1 // walk and fill iterator = tiesTrace.iterator(); f = FastMath.round(c); int i = 0; while (iterator.hasNext()) { ranks[iterator.next()] = f + i++; } break; default: // this should not happen unless TiesStrategy enum is changed throw new MathInternalError(); } } /** * Setsdata[i] = value for each i in tiesTrace. * * @param data array to modify * @param tiesTrace list of index values to set * @param value value to set */ private void fill(double[] data, List tiesTrace, double value) { Iterator iterator = tiesTrace.iterator(); while (iterator.hasNext()) { data[iterator.next()] = value; } } /** * Set ranks[i] = Double.NaN for each i in nanPositions. * * @param ranks array to modify * @param nanPositions list of index values to set to Double.NaN */ private void restoreNaNs(double[] ranks, List nanPositions) { if (nanPositions.size() == 0) { return; } Iterator iterator = nanPositions.iterator(); while (iterator.hasNext()) { ranks[iterator.next().intValue()] = Double.NaN; } } /** * Returns a list of indexes where ranks is NaN. * * @param ranks array to search for NaNs * @return list of indexes i such that ranks[i] = NaN */ private List getNanPositions(IntDoublePair[] ranks) { ArrayList out = new ArrayList<>(); for (int i = 0; i < ranks.length; i++) { if (Double.isNaN(ranks[i].getValue())) { out.add(Integer.valueOf(i)); } } return out; } /** * Represents the position of a double value in an ordering. * Comparable interface is implemented so Arrays.sort can be used * to sort an array of IntDoublePairs by value. Note that the * implicitly defined natural ordering is NOT consistent with equals. */ private static class IntDoublePair implements Comparable { /** Value of the pair */ private final double value; /** Original position of the pair */ private final int position; /** * Construct an IntDoublePair with the given value and position. * @param value the value of the pair * @param position the original position */ IntDoublePair(double value, int position) { this.value = value; this.position = position; } /** * Compare this IntDoublePair to another pair. * Only the values are compared. * * @param other the other pair to compare this to * @return result of Double.compare(value, other.value) */ @Override public int compareTo(IntDoublePair other) { return Double.compare(value, other.value); } // N.B. equals() and hashCode() are not implemented; see MATH-610 for discussion. /** * Returns the value of the pair. * @return value */ public double getValue() { return value; } /** * Returns the original position of the pair. * @return position */ public int getPosition() { return position; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy