htsjdk.variant.utils.GeneralUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of htsjdk Show documentation
Show all versions of htsjdk Show documentation
A Java API for high-throughput sequencing data (HTS) formats
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package htsjdk.variant.utils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
/**
* Constants and utility methods used throughout the VCF/BCF/VariantContext classes
*/
public class GeneralUtils {
/**
* Setting this to true causes the VCF/BCF/VariantContext classes to emit debugging information
* to standard error
*/
public static final boolean DEBUG_MODE_ENABLED = false;
/**
* The smallest log10 value we'll emit from normalizeFromLog10 and other functions
* where the real-space value is 0.0.
*/
public final static double LOG10_P_OF_ZERO = -1000000.0;
/**
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
* elti objects (note there's no actual space between sep and the elti elements). Returns
* "" if collection is empty. If collection contains just elt, then returns elt.toString()
*
* @param separator the string to use to separate objects
* @param objects a collection of objects. the element order is defined by the iterator over objects
* @param the type of the objects
* @return a non-null string
*/
public static String join(final String separator, final Collection objects) {
if (objects.isEmpty()) { // fast path for empty collection
return "";
} else {
final Iterator iter = objects.iterator();
final T first = iter.next();
if ( ! iter.hasNext() ) // fast path for singleton collections
return first.toString();
else { // full path for 2+ collection that actually need a join
final StringBuilder ret = new StringBuilder(first.toString());
while(iter.hasNext()) {
ret.append(separator);
ret.append(iter.next().toString());
}
return ret.toString();
}
}
}
/**
* normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
*
* @param array the array to be normalized
* @return a newly allocated array corresponding the normalized values in array
*/
public static double[] normalizeFromLog10(double[] array) {
return normalizeFromLog10(array, false);
}
/**
* normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
*
* @param array the array to be normalized
* @param takeLog10OfOutput if true, the output will be transformed back into log10 units
* @return a newly allocated array corresponding the normalized values in array, maybe log10 transformed
*/
public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput) {
return normalizeFromLog10(array, takeLog10OfOutput, false);
}
/**
* See #normalizeFromLog10 but with the additional option to use an approximation that keeps the calculation always in log-space
*
* @param array
* @param takeLog10OfOutput
* @param keepInLogSpace
*
* @return
*/
public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput, boolean keepInLogSpace) {
// for precision purposes, we need to add (or really subtract, since they're
// all negative) the largest value; also, we need to convert to normal-space.
double maxValue = arrayMax(array);
// we may decide to just normalize in log space without converting to linear space
if (keepInLogSpace) {
for (int i = 0; i < array.length; i++) {
array[i] -= maxValue;
}
return array;
}
// default case: go to linear space
double[] normalized = new double[array.length];
for (int i = 0; i < array.length; i++)
normalized[i] = Math.pow(10, array[i] - maxValue);
// normalize
double sum = 0.0;
for (int i = 0; i < array.length; i++)
sum += normalized[i];
for (int i = 0; i < array.length; i++) {
double x = normalized[i] / sum;
if (takeLog10OfOutput) {
x = Math.log10(x);
if ( x < LOG10_P_OF_ZERO || Double.isInfinite(x) )
x = array[i] - maxValue;
}
normalized[i] = x;
}
return normalized;
}
public static double arrayMax(final double[] array) {
return array[maxElementIndex(array, array.length)];
}
public static int maxElementIndex(final double[] array) {
return maxElementIndex(array, array.length);
}
public static int maxElementIndex(final double[] array, final int endIndex) {
if (array == null || array.length == 0)
throw new IllegalArgumentException("Array cannot be null!");
int maxI = 0;
for (int i = 1; i < endIndex; i++) {
if (array[i] > array[maxI])
maxI = i;
}
return maxI;
}
public static List cons(final T elt, final List l) {
List l2 = new ArrayList();
l2.add(elt);
if (l != null) l2.addAll(l);
return l2;
}
/**
* Make all combinations of N size of objects
*
* if objects = [A, B, C]
* if N = 1 => [[A], [B], [C]]
* if N = 2 => [[A, A], [B, A], [C, A], [A, B], [B, B], [C, B], [A, C], [B, C], [C, C]]
*
* @param objects
* @param n
* @param
* @param withReplacement if false, the resulting permutations will only contain unique objects from objects
* @return
*/
public static List> makePermutations(final List objects, final int n, final boolean withReplacement) {
final List> combinations = new ArrayList>();
if ( n <= 0 )
;
else if ( n == 1 ) {
for ( final T o : objects )
combinations.add(Collections.singletonList(o));
} else {
final List> sub = makePermutations(objects, n - 1, withReplacement);
for ( List subI : sub ) {
for ( final T a : objects ) {
if ( withReplacement || ! subI.contains(a) )
combinations.add(cons(a, subI));
}
}
}
return combinations;
}
/**
* Compares double values for equality (within 1e-6), or inequality.
*
* @param a the first double value
* @param b the second double value
* @return -1 if a is greater than b, 0 if a is equal to be within 1e-6, 1 if b is greater than a.
*/
public static byte compareDoubles(double a, double b) {
return compareDoubles(a, b, 1e-6);
}
/**
* Compares double values for equality (within epsilon), or inequality.
*
* @param a the first double value
* @param b the second double value
* @param epsilon the precision within which two double values will be considered equal
* @return -1 if a is greater than b, 0 if a is equal to be within epsilon, 1 if b is greater than a.
*/
public static byte compareDoubles(double a, double b, double epsilon) {
if (Math.abs(a - b) < epsilon) {
return 0;
}
if (a > b) {
return -1;
}
return 1;
}
static public final List reverse(final List l) {
final List newL = new ArrayList(l);
Collections.reverse(newL);
return newL;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy