All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jaitools.numeric.SampleStats Maven / Gradle / Ivy

Go to download

Provides a single jar containing all JAI-tools modules which you can use instead of including individual modules in your project. Note: It does not include the Jiffle scripting language or Jiffle image operator.

The newest version!
/*
 * Copyright 2009-2011 Michael Bedward
 * 
 * This file is part of jai-tools.
 *
 * jai-tools is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of the 
 * License, or (at your option) any later version.
 *
 * jai-tools is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public 
 * License along with jai-tools.  If not, see .
 * 
 */

package jaitools.numeric;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.SortedSet;

import jaitools.CollectionFactory;

/**
 * A collection of static methods to calculate summary statistics for
 * a sample of double-valued data. This class is used by both Jiffle
 * and the KernelStats operator.
 *
 * @author Michael Bedward
 * @author Daniele Romagnoli, GeoSolutions S.A.S.
 * @since 1.0
 * @version $Id: SampleStats.java 1610 2011-03-31 04:44:28Z michael.bedward $
 */
public class SampleStats {
    
    /**
     * Return the maximum of the given values.
     *
     * @param values sample values
     * @param ignoreNaN specifies whether to ignore NaN values
     * @return max value or Double.NaN if the sample is empty
     */
    public static double max(Double[] values, boolean ignoreNaN) {
        if (values == null || values.length == 0) {
            return Double.NaN;
        } else if (values.length == 1) {
            return values[0];
        }
        
        SortedSet set = CollectionFactory.sortedSet();
        set.addAll(Arrays.asList(values));
        if (ignoreNaN) set.remove(Double.NaN);
        return set.last();
    }

    /**
     * Return the mean of the given values.
     *
     * @param values sample values
     * @param ignoreNaN specifies whether to ignore NaN values
     * @return mean value or Double.NaN if the sample is empty
     */
    public static double mean(Double[] values, boolean ignoreNaN) {
        if (values == null || values.length == 0) {
            return Double.NaN;
        } else if (values.length == 1) {
            return values[0];
        }

        double sum = 0.0d;
        int n = 0;
        for (Double val : values) {
            if (val.isNaN()) {
                if (!ignoreNaN) return Double.NaN;
            } else {
                sum += val;
                n++ ;
            }
        }

        return sum / n;
    }

    /**
     * Calculates the minimum of the given values.
     *
     * @param values sample values
     * @param ignoreNaN specifies whether to ignore NaN values
     * @return min value or Double.NaN if the sample is empty
     */
    public static double min(Double[] values, boolean ignoreNaN) {
        if (values == null || values.length == 0) {
            return Double.NaN;
        } else if (values.length == 1) {
            return values[0];
        }
        
        SortedSet set = CollectionFactory.sortedSet();
        set.addAll(Arrays.asList(values));
        if (ignoreNaN) set.remove(Double.NaN);
        return set.first();
    }

    /**
     * Calculates the median of the given values. For a sample with an odd
     * number of elements the median is the mid-point value of the 
     * sorted sample. For an even number of elements it is the mean of
     * the two values on either side of the mid-point. 
     * 
     * @param values sample values (need not be pre-sorted)
     * @param ignoreNaN specifies whether to ignore NaN values
     * @return median value or Double.NaN if the sample is empty
     */
    @SuppressWarnings("empty-statement")
    public static double median(Double[] values, boolean ignoreNaN) {
        if (values == null) {
            return Double.NaN;
        }
        
        List nonNaNValues = CollectionFactory.list();
        nonNaNValues.addAll(Arrays.asList(values));
        if (ignoreNaN) {
            while (nonNaNValues.remove(Double.NaN)) /* deliberately empty */ ;
        }
        
        if (nonNaNValues.isEmpty()) {
            return Double.NaN;
        } else if (nonNaNValues.size() == 1) {
            return nonNaNValues.get(0);
        } else if (nonNaNValues.size() == 2) {
            return (nonNaNValues.get(0) + nonNaNValues.get(1)) / 2;
        }
        
        Collections.sort(nonNaNValues);
        
        int midHi = nonNaNValues.size() / 2;
        int midLo = midHi - 1;
        boolean even = nonNaNValues.size() % 2 == 0;

        Double result = 0.0d;
        int k = 0;
        for (Double val : nonNaNValues) {
            if (k == midHi) {
                if (!even) {
                    return val;
                } else {
                    result += val;
                    return result / 2;
                }
            } else if (even && k == midLo) {
                result += val;
            }
            k++ ;
        }
        
        return 0;  // to suppress compiler warning
    }
    
    /**
     * Calculates the empirical mode (highest frequency value) of the given values.
     * Double.NaN values are ignored. If more than one data value occurs with
     * maximum frequency the following tie-break rules are used:
     * 
    *
  • for an odd number of tied values, return their median *
  • for an even number of tied values, return the value below * the mid-point of the sorted list of tied values *
* This ensures that the calculated mode occurs in the sample data. * Whether or not the mode is meaningful for the sample is up to the user ! * * @param values sample values * @param ignoreNaN specifies whether to ignore NaN values * @return calculated mode or Double.NaN if the sample is empty */ @SuppressWarnings("empty-statement") public static double mode(Double[] values, boolean ignoreNaN) { if (values == null) { return Double.NaN; } List list = CollectionFactory.list(); list.addAll(Arrays.asList(values)); if (ignoreNaN) { while (list.remove(Double.NaN)) /* deliberately empty */ ; } if (list.isEmpty()) { return Double.NaN; } else if (list.size() == 1) { return list.get(0); } Collections.sort(list); List uniqueValues = CollectionFactory.list(); List freq = CollectionFactory.list(); Double curVal = list.get(0); int curFreq = 1; int maxFreq = 1; for (int i = 1; i < list.size(); i++) { if (CompareOp.aequal(curVal, list.get(i))) { curFreq++ ; } else { uniqueValues.add(curVal); freq.add(curFreq); curVal = list.get(i); if (curFreq > maxFreq) maxFreq = curFreq; curFreq = 1; } } uniqueValues.add(curVal); freq.add(curFreq); if (curFreq > maxFreq) maxFreq = curFreq; List maxFreqIndices = CollectionFactory.list(); int k = 0; for (Integer f : freq) { if (f == maxFreq) { maxFreqIndices.add(k); } k++ ; } if (maxFreqIndices.size() == 1) { return uniqueValues.get(maxFreqIndices.get(0)); } boolean even = maxFreqIndices.size() % 2 == 0; int i = maxFreqIndices.size() / 2; if (even) i-- ; return uniqueValues.get(maxFreqIndices.get(i)); } /** * Calculates the range (max - min) of a set of values. * * @param values sample values * @param ignoreNaN specifies whether to ignore NaN values * @return the range or Double.NaN if the set is empty */ public static double range(Double[] values, boolean ignoreNaN) { if (values == null || values.length == 0) { return Double.NaN; } else if (values.length == 1) { return 0d; } SortedSet set = CollectionFactory.sortedSet(); set.addAll(Arrays.asList(values)); if (ignoreNaN) set.remove(Double.NaN); return set.last() - set.first(); } /** * Calculates sample variance using the running sample algorithm * of Welford (1962) described by Knuth in The Art of Computer * Programming (3rd ed) Vol.2 p.232 * * @param values sample values * @param ignoreNaN specifies whether to ignore NaN values * @return sample variance */ public static double variance(Double[] values, boolean ignoreNaN) { if (values.length < 2) { return Double.NaN; } double mNew, mOld = 0.0d, s = 0.0d; int n = 0; for (int i = 0; i < values.length; i++) { if (Double.isNaN(values[i])) { if (!ignoreNaN) { return Double.NaN; } } else { n++; if (n == 1) { mNew = mOld = values[i]; } else { mNew = mOld + (values[i] - mOld) / n; s = s + (values[i] - mOld) * (values[i] - mNew); mOld = mNew; } } } if (n > 1) { return s / (n - 1); } else if (n == 1) { return 0.0d; } else { return Double.NaN; } } /** * Calculates sample standard deviation. This is a convenience * method that calls {@linkplain #variance(java.lang.Double[], boolean) } * and returns the square-root of the result * * @param values sample values * @param ignoreNaN specifies whether to ignore NaN values * @return sample standard deviation as a double */ public static double sdev(Double[] values, boolean ignoreNaN) { double var = variance(values, ignoreNaN); return (Double.isNaN(var) ? Double.NaN : Math.sqrt(var)); } /** * Calculates the sum of the values. * * @param values sample values * @param ignoreNaN specifies whether to ignore NaN values * @return sum of the values */ public static double sum(Double[] values, boolean ignoreNaN) { double sum = 0.0d; for (int i = 0; i < values.length; i++) { if (Double.isNaN(values[i])) { if (!ignoreNaN) { return Double.NaN; } } else { sum = sum + values[i]; } } return sum; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy