picard.util.MathUtil Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of picard Show documentation
A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) data and formats such as SAM/BAM/CRAM and VCF.
There is a newer version: 3.2.0
Show newest version
/*
 * The MIT License
 *
 * Copyright (c) 2011 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

package picard.util;

import org.apache.commons.math3.random.RandomDataGenerator;

import java.math.BigDecimal;
import java.util.Arrays;

import static java.lang.Math.log1p;
import static java.lang.Math.pow;

/**
 * General math utilities
 *
 * @author Tim Fennell
 */
final public class MathUtil {
    private MathUtil(){};

    /** The double value closest to 1 while still being less than 1. */
    public static final double MAX_PROB_BELOW_ONE = 0.9999999999999999d;

    /** Constant to convert between the natural base e and 4.0.  Useful for
     * entropy calculations on DNA. */
    public static final double LOG_4_BASE_E = Math.log(4.0);

    /**
     *  this function mimics the behavior of log_1p but resulting in log _base 10_ of (1+x) instead of natural log of 1+x
     */
    public static double log10_1p(final double x){
        return log1p(x)/LOG_10_MATH.log_of_base;
    }

    /** Calculated the mean of an array of doubles. */
    public static double mean(final double[] in, final int start, final int stop) {
        double total = 0;
        for (int i = start; i < stop; ++i) {
            total += in[i];
        }

        return total / (stop - start);
    }

    public static double mean(final double[] in) {
        return mean(in, 0, in.length);
    }

    /**
     * Calculated the standard deviation of an array of doubles.
     */
    public static double stddev(final double[] in, final int start, final int length) {
        return stddev(in, start, length, mean(in, start, length));
    }

    /**
     * Calculated the standard deviation of an array of doubles.
     */
    public static double stddev(final double[] in, final int start, final int stop, final double mean) {
        double total = 0;
        for (int i = start; i < stop; ++i) {
            total += (in[i] * in[i]);
        }

        return Math.sqrt((total / (stop - start)) - (mean * mean));
    }

    /**
     * Calculated the standard deviation of an array of doubles.
     */
    public static double stddev(final double[] in, final double mean) {
        return stddev(in, 0, in.length, mean);
    }

    public static int compare(final int v1, final int v2) {
        return (v1 < v2 ? -1 : (v1 == v2 ? 0 : 1));
    }

    /**
     * Calculate the median of an array of doubles. Assumes that the input is sorted
     */
    public static double median(final double... in) {
        if (in.length == 0) {
            throw new IllegalArgumentException("Attempting to find the median of an empty array");
        }

        final double[] data = Arrays.copyOf(in, in.length);
        Arrays.sort(data);
        final int middle = data.length / 2;
        return data.length % 2 == 1 ? data[middle] : (data[middle - 1] + data[middle]) / 2.0;
    }

    /**
     * Obtains percentage of two Longs
     * @param numerator   dividend
     * @param denominator divisor
     * @return numerator/(double)denominator if both are non-null and denominator != 0, else returns null.
     */
    public static Double percentageOrNull(final Long numerator, final Long denominator) {
        if (numerator != null && denominator != null && denominator != 0) {
            return numerator.doubleValue() / denominator.doubleValue();
        } else {
            return null;
        }
    }

    /** 
     * Round off the value to the specified precision. 
     */
    public static double round(final double num, final int precision) {
        BigDecimal bd = new BigDecimal(num);
        bd = bd.setScale(precision, BigDecimal.ROUND_HALF_UP);
        return bd.doubleValue();
    }

    /**
     * Divide two Doubles but return 0.0 if the denominator is 0
     * @param numerator dividend
     * @param denominator divisor
     * @return numerator/denominator unless denominator is 0, in which case returns 0
     */
    public static double divide(final double numerator, final double denominator) {
        return Math.abs(0.0 - denominator) >  0.000001
                ? numerator / denominator
                : 0.0;
    }

    /** Returns the largest value stored in the array. */
    public static double max(final double[] nums) {
        return nums[indexOfMax(nums)];
    }

    /**
     * Returns the index of the largest element in the array.  If there are multiple equal maxima then
     * the earliest one in the array is returned.
     */
    public static int indexOfMax(final double[] nums) {
        double max = nums[0];
        int index  = 0;
        for (int i = 1; i < nums.length; ++i) {
            if (nums[i] > max) {
                max = nums[i];
                index = i;
            }
        }

        return index;
    }

    /** Returns the largest value stored in the array. */
    public static long max(final long[] nums) {
        return nums[indexOfMax(nums)];
    }

    /**
     * Returns the index of the largest element in the array.  If there are multiple equal maxima then
     * the earliest one in the array is returned.
     */
    public static int indexOfMax(final long[] nums) {
        long max = nums[0];
        int index  = 0;
        for (int i = 1; i < nums.length; ++i) {
            if (nums[i] > max) {
                max = nums[i];
                index = i;
            }
        }

        return index;
    }

    /** Returns the smallest value stored in the array. */
    public static double min(final double[] nums) {
        return nums[indexOfMin(nums)];
    }

    /** Returns the smallest value stored in the array. */
    public static int min(final int[] nums) {
        return nums[indexOfMin(nums)];
    }
    
    /** Returns the smallest value stored in the array. */
    public static short min(final short[] nums) {
        short min = nums[0];
        for (int i = 1; i < nums.length; ++i) {
            if (nums[i] < min) min = nums[i];
        }

        return min;
    }

    /** Returns the smallest value stored in the array. */
    public static byte min(final byte[] nums) {
        byte min = nums[0];
        for (int i = 1; i < nums.length; ++i) {
            if (nums[i] < min) min = nums[i];
        }

        return min;
    }


    /**
     * Returns the index of the smallest element in the array.  If there are multiple equal minima then
     * the earliest one in the array is returned.
     */
    public static int indexOfMin(final int[] nums) {
        long min = nums[0];
        int index  = 0;
        for (int i = 1; i < nums.length; ++i) {
            if (nums[i] < min) {
                min = nums[i];
                index = i;
            }
        }

        return index;
    }

    /**
     * Returns the index of the smallest element in the array.  If there are multiple equal minima then
     * the earliest one in the array is returned.
     */
    public static int indexOfMin(final double[] nums) {
        double min = nums[0];
        int index  = 0;
        for (int i = 1; i < nums.length; ++i) {
            if (nums[i] < min) {
                min = nums[i];
                index = i;
            }
        }

        return index;
    }
    /** Mimic's R's seq() function to produce a sequence of equally spaced numbers. */
    public static double[] seq(final double from, final double to, final double by) {
        if (from < to && by <= 0) return new double[0];
        if (from > to && by >= 0) return new double[0];
        final int values = 1 + (int) Math.floor((to - from) / by);
        final double[] results = new double[values];

        BigDecimal value = new BigDecimal(from);
        BigDecimal increment = new BigDecimal(by);

        for (int i=0; i= p >= (1-0.9999999999999999)/(lPosteriors.length-1)
     */
    public static double[] pNormalizeLogProbability(final double[] lPosterior) {
        // Note: bumping all the LLs so that the biggest is 300 ensures that we have the
        // widest range possible when unlogging them before one of them underflows. 10^300 is
        // near the maximum before you hit positive infinity.

        final double maxLikelihood = max(lPosterior);
        final double bump = 300 - maxLikelihood;

        final double[] tmp = new double[lPosterior.length];
        double total = 0;
        for (int i = 0; i < lPosterior.length; ++i) {
            tmp[i] = pow(10, lPosterior[i] + bump);
            total += tmp[i];
        }

        final double maxP = MAX_PROB_BELOW_ONE;
        final double minP = (1 - MAX_PROB_BELOW_ONE) / (tmp.length - 1);

        for (int i = 0; i < lPosterior.length; ++i) {
            tmp[i] /= total;
            if (tmp[i] > maxP) tmp[i] = maxP;
            else if (tmp[i] < minP) tmp[i] = minP;
        }

        return tmp;
    }

    /** Calculates the ratio of two arrays of the same length. */
    public static double[] divide(final double[] numerators, final double[] denominators) {
        if (numerators.length != denominators.length) throw new IllegalArgumentException("Arrays must be of same length.");

        final int len = numerators.length;
        final double[] result = new double[len];
        for (int i = 0; i < len; ++i) result[i] = numerators[i] / denominators[i];
        return result;
    }

    /**
     * Takes a vector of numbers and converts it to a vector of probabilities
     * that sum to 1 with as much fidelity as possible.  Limits probabilities to be in the space:
     * 0.9999999999999999 >= p >= (1-0.9999999999999999)/(likelihoods.length-1)
     */
    public static double[] pNormalizeVector(final double[] pPosterior) {

        final double[] tmp = new double[pPosterior.length];
        final double total = sum(pPosterior);

        final double maxP = MAX_PROB_BELOW_ONE;
        final double minP = (1 - MAX_PROB_BELOW_ONE) / (tmp.length - 1);

        for (int i = 0; i < pPosterior.length; ++i) {
            tmp[i] = pPosterior[i] / total;
            if (tmp[i] > maxP) tmp[i] = maxP;
            else if (tmp[i] < minP) tmp[i] = minP;
        }

        return tmp;
    }

    /** Calculates the product of two arrays of the same length. */
    public static double[] multiply(final double[] lhs, final double[] rhs) {
        if (lhs.length != rhs.length) throw new IllegalArgumentException("Arrays must be of same length.");

        final int len = lhs.length;
        final double[] result = new double[len];
        for (int i = 0; i < len; ++i) result[i] = lhs[i] * rhs[i];
        return result;
    }

    /** calculates the sum of the arrays as a third array. */
    public static double[] sum(final double[] lhs, final double[] rhs) {
        if (lhs.length != rhs.length) throw new IllegalArgumentException("Arrays must be of same length.");

        final int len = lhs.length;
        final double [] result = new double[len];
        for (int i = 0; i < len; ++i) result[i] = lhs[i] + rhs[i];
        return result;
    }

    /** Returns the sum of the elements in the array. */
    public static double sum(final double[] arr) {
        double result = 0;
        for (final double next : arr) result += next;
        return result;
    }

    /** Returns the sum of the elements in the array starting with start and ending before stop. */
    public static long sum(final long[] arr, final int start, final int stop) {
        long result = 0;
        for (int i=start; i