All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.Utils Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils;

import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Iterators;
import com.google.common.primitives.Ints;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.tribble.util.ParsingUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.random.RandomDataGenerator;
import org.apache.commons.math3.random.Well19937c;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.param.ParamUtils;

import javax.annotation.Nullable;
import java.io.*;
import java.lang.reflect.Array;
import java.math.BigInteger;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.FormatStyle;
import java.util.*;
import java.util.concurrent.*;
import java.util.function.Function;
import java.util.function.IntFunction;
import java.util.function.Supplier;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

public final class Utils {

    /**
     * Comparator for strings that sorts null first;
     */
    public static final Comparator COMPARE_STRINGS_NULLS_FIRST = Comparator.nullsFirst(Comparator.naturalOrder());

    private Utils(){}

    private final static DateTimeFormatter longDateTimeFormatter = DateTimeFormatter.ofLocalizedDateTime(FormatStyle.LONG);

    /**
     *  Static random number generator and seed.
     */
    private static final long GATK_RANDOM_SEED = 47382911L;
    private static final Random randomGenerator = new Random(GATK_RANDOM_SEED);
    private static final RandomDataGenerator randomDataGenerator = new RandomDataGenerator(new Well19937c(GATK_RANDOM_SEED));

    public static Random getRandomGenerator() { return randomGenerator; }
    public static RandomDataGenerator getRandomDataGenerator() { return randomDataGenerator; }

    public static void resetRandomGenerator() {
        randomGenerator.setSeed(GATK_RANDOM_SEED);
        randomDataGenerator.reSeed(GATK_RANDOM_SEED);
    }

    private static final int TEXT_WARNING_WIDTH = 68;
    private static final String TEXT_WARNING_PREFIX = "* ";
    private static final String TEXT_WARNING_BORDER = StringUtils.repeat('*', TEXT_WARNING_PREFIX.length() + TEXT_WARNING_WIDTH);
    private static final char ESCAPE_CHAR = '\u001B';

    public static final float JAVA_DEFAULT_HASH_LOAD_FACTOR = 0.75f;

    /** our log, which we want to capture anything from this class */
    private static final Logger logger = LogManager.getLogger(Utils.class);

    public static  List cons(final T elt, final List l) {
        final List l2 = new ArrayList<>();
        l2.add(elt);
        if (l != null) {
            l2.addAll(l);
        }
        return l2;
    }

    public static void warnUser(final String msg) {
        warnUser(logger, msg);
    }

    public static void warnUser(final Logger logger, final String msg) {
        for (final String line: warnUserLines(msg)) {
            logger.warn(line);
        }
    }

    public static List warnUserLines(final String msg) {
        final List results = new ArrayList<>();
        results.add(TEXT_WARNING_BORDER);
        results.add(TEXT_WARNING_PREFIX + "WARNING:");
        results.add(TEXT_WARNING_PREFIX);
        prettyPrintWarningMessage(results, msg);
        results.add(TEXT_WARNING_BORDER);
        return results;
    }

    /**
     * pretty print the warning message supplied
     *
     * @param results the pretty printed message
     * @param message the message
     */
    private static void prettyPrintWarningMessage(final List results, final String message) {
        for (final String line: message.split("\\r?\\n")) {
            final StringBuilder builder = new StringBuilder(line);
            while (builder.length() > TEXT_WARNING_WIDTH) {
                int space = getLastSpace(builder, TEXT_WARNING_WIDTH);
                if (space <= 0) {
                    space = TEXT_WARNING_WIDTH;
                }
                results.add(String.format("%s%s", TEXT_WARNING_PREFIX, builder.substring(0, space)));
                builder.delete(0, space + 1);
            }
            results.add(String.format("%s%s", TEXT_WARNING_PREFIX, builder));
        }
    }

    /**
     * Returns the last whitespace location in string, before width characters.
     * @param message The message to break.
     * @param width The width of the line.
     * @return The last whitespace location.
     */
    private static int getLastSpace(final CharSequence message, final int width) {
        final int length = message.length();
        int stopPos = width;
        int currPos = 0;
        int lastSpace = -1;
        boolean inEscape = false;
        while (currPos < stopPos && currPos < length) {
            final char c = message.charAt(currPos);
            if (c == ESCAPE_CHAR) {
                stopPos++;
                inEscape = true;
            } else if (inEscape) {
                stopPos++;
                if (Character.isLetter(c)) {
                    inEscape = false;
                }
            } else if (Character.isWhitespace(c)) {
                lastSpace = currPos;
            }
            currPos++;
        }
        return lastSpace;
    }

    /**
     * Returns a string of the values in an {@link Object} array joined by a separator.
     *
     * @param separator separator character
     * @param objects  the array with values
     *
     * @throws IllegalArgumentException if {@code separator} or {@code objects} is {@code null}.
     * @return a string with the values separated by the separator
     */
    public static String join(final CharSequence separator, final Object ... objects) {
        Utils.nonNull(separator, "the separator cannot be null");
        Utils.nonNull(objects, "the value array cannot be null");

        if (objects.length == 0) {
            return "";
        } else {
            final StringBuilder ret = new StringBuilder();
            ret.append(objects[0]);
            for (int i = 1; i < objects.length; i++) {
                ret.append(separator).append(objects[i]);
            }
            return ret.toString();
        }
    }

    /**
     * Returns a string of the values in ints joined by separator, such as A,B,C
     *
     * @param separator separator character
     * @param ints   the array with values
     * @return a string with the values separated by the separator
     */
    public static String join(final String separator, final int[] ints) {
        Utils.nonNull(separator, "the separator cannot be null");
        Utils.nonNull(ints, "the ints cannot be null");
        if ( ints.length == 0) {
            return "";
        } else {
            final StringBuilder ret = new StringBuilder();
            ret.append(ints[0]);
            for (int i = 1; i < ints.length; ++i) {
                ret.append(separator);
                ret.append(ints[i]);
            }
            return ret.toString();
        }
    }

    /**
     * Returns a string of the values in joined by separator, such as A,B,C
     *
     * @param separator separator character
     * @param doubles   the array with values
     * @return a string with the values separated by the separator
     */
    public static String join(final String separator, final double[] doubles) {
        Utils.nonNull(separator, "the separator cannot be null");
        Utils.nonNull(doubles, "the doubles cannot be null");
        if ( doubles.length == 0) {
            return "";
        } else {
            final StringBuilder ret = new StringBuilder();
            ret.append(doubles[0]);
            for (int i = 1; i < doubles.length; ++i) {
                ret.append(separator);
                ret.append(doubles[i]);
            }
            return ret.toString();
        }
    }

    /**
     * Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
     * elti objects (note there's no actual space between sep and the elti elements).  Returns
     * "" if collection is empty.  If collection contains just elt, then returns elt.toString()
     *
     * @param separator the string to use to separate objects
     * @param objects a collection of objects.  the element order is defined by the iterator over objects
     * @param  the type of the objects
     * @return a non-null string
     */
    public static  String join(final String separator, final Collection objects) {
        if (objects.isEmpty()) { // fast path for empty collection
            return "";
        } else {
            final Iterator iter = objects.iterator();
            final T first = iter.next();

            if ( ! iter.hasNext() ) // fast path for singleton collections
            {
                return first.toString();
            } else { // full path for 2+ collection that actually need a join
                final StringBuilder ret = new StringBuilder(first.toString());
                while(iter.hasNext()) {
                    ret.append(separator);
                    ret.append(iter.next().toString());
                }
                return ret.toString();
            }
        }
    }

    /**
     * Concatenates byte arrays
     * @return a concat of all bytes in allBytes in order
     */
    public static byte[] concat(final byte[] ... allBytes) {
        if (allBytes.length == 0) {
            return ArrayUtils.EMPTY_BYTE_ARRAY;
        } else if (allBytes.length == 1) {
            return allBytes[0].length == 0 ? allBytes[0] : allBytes[0].clone();
        } else {
            int size = 0;
            for (final byte[] bytes : allBytes) size += bytes.length;
            if (size == 0) {
                return ArrayUtils.EMPTY_BYTE_ARRAY;
            } else {
                final byte[] c = new byte[size];
                int offset = 0;
                for (final byte[] bytes : allBytes) {
                    System.arraycopy(bytes, 0, c, offset, bytes.length);
                    offset += bytes.length;
                }
                return c;
            }
        }
    }

    public static  T[] concat(final T[] a, final T[] b, final IntFunction constructor) {
        Utils.nonNull(a);
        Utils.nonNull(b);
        if (a.length != 0) {
            if (b.length != 0) {
                final T[] c = constructor.apply(a.length + b.length);
                System.arraycopy(a, 0, c, 0, a.length);
                System.arraycopy(b, 0, c, a.length, b.length);
                return c;
            } else {
                return a.clone();
            }
        } else if (b.length != 0) {
            return b.clone();
        } else {
            return a.clone();
        }
    }

    /**
     * Concats two byte arrays.
     * 

* A bit more efficient than calling the more general {@link #concat(byte[]...)}. *

* @param a left array to concat. * @param b right array to concat. * @return never {@code null}; */ public static byte[] concat(final byte[] a, final byte[] b) { final int length = a.length + b.length; if (length == 0) { return ArrayUtils.EMPTY_BYTE_ARRAY; } else if (length == a.length) { return a.clone(); } else if (length == b.length) { return b.clone(); } else { final byte[] c = new byte[length]; int i = 0; for (final byte aa : a) { c[i++] = aa; } for (final byte bb : b) { c[i++] = bb; } return c; } } /** * Returns a {@link List List<Integer>} representation of an primitive int array. * @param values the primitive int array to represent. * @return never code {@code null}. The returned list will be unmodifiable yet it will reflect changes in values in the original array yet * you cannot change the values */ public static List asList(final int ... values) { Utils.nonNull(values, "the input array cannot be null"); return new AbstractList() { @Override public Integer get(final int index) { return values[index]; } @Override public int size() { return values.length; } }; } /** * Returns a {@link List List<Double>} representation of an primitive double array. * @param values the primitive int array to represent. * @return never code {@code null}. The returned list will be unmodifiable yet it will reflect changes in values in the original array yet * you cannot change the values. */ public static List asList(final double ... values) { Utils.nonNull(values, "the input array cannot be null"); return new AbstractList() { @Override public Double get(final int index) { return values[index]; } @Override public int size() { return values.length; } }; } /** * Create a new list that contains the elements of left along with elements elts * @param left a non-null list of elements * @param elts a varargs vector for elts to append in order to left * @return A newly allocated linked list containing left followed by elts */ @SafeVarargs public static List append(final List left, final T ... elts) { Utils.nonNull(left, "left is null"); Utils.nonNull(elts, "the input array cannot be null"); final List l = new LinkedList<>(left); for (final T t : elts){ Utils.nonNull(t, "t is null"); l.add(t); } return l; } /** * Create a new string that's n copies of c * @param c the char to duplicate * @param nCopies how many copies? * @return a string */ public static String dupChar(final char c, final int nCopies) { final char[] chars = new char[nCopies]; Arrays.fill(chars, c); return new String(chars); } /** * Create a new string thats a n duplicate copies of s * @param s the string to duplicate * @param nCopies how many copies? * @return a string */ public static String dupString(final String s, int nCopies) { if ( s == null || s.equals("") ) { throw new IllegalArgumentException("Bad s " + s); } if ( nCopies < 0 ) { throw new IllegalArgumentException("nCopies must be >= 0 but got " + nCopies); } final StringBuilder b = new StringBuilder(); for ( int i = 0; i < nCopies; i++ ) { b.append(s); } return b.toString(); } /** * Create a new byte array that's n copies of b * @param b the byte to duplicate * @param nCopies how many copies? * @return a byte array */ public static byte[] dupBytes(final byte b, final int nCopies) { final byte[] bytes = new byte[nCopies]; Arrays.fill(bytes, b); return bytes; } /** * Returns the number of occurrences of a boolean element in a boolean array. * @param element * @param array cannot be null * @return */ public static int countBooleanOccurrences(final boolean element, final boolean[] array) { Utils.nonNull(array); int count = 0; for (final boolean b : array) { if (element == b) { count++; } } return count; } /** * Splits expressions in command args by spaces and returns the array of expressions. * Expressions may use single or double quotes to group any individual expression, but not both. * @param args Arguments to parse. * @return Parsed expressions. */ public static String[] escapeExpressions(final String args) { Utils.nonNull(args); // special case for ' and " so we can allow expressions if (args.indexOf('\'') != -1) { return escapeExpressions(args, "'"); } else if (args.indexOf('\"') != -1) { return escapeExpressions(args, "\""); } else { return args.trim().split(" +"); } } /** * Splits expressions in command args by spaces and the supplied delimiter and returns the array of expressions. * @param args Arguments to parse. * @param delimiter Delimiter for grouping expressions. * @return Parsed expressions. */ private static String[] escapeExpressions(final String args, final String delimiter) { String[] command = {}; final String[] split = args.split(delimiter); for (int i = 0; i < split.length - 1; i += 2) { final String arg = split[i].trim(); if (!arg.isEmpty()) { // if the unescaped arg has a size command = ArrayUtils.addAll(command, arg.split(" +")); } command = ArrayUtils.addAll(command, split[i + 1]); } final String arg = split[split.length - 1].trim(); if (split.length % 2 == 1 && !arg.isEmpty()) { // if the last unescaped arg has a size command = ArrayUtils.addAll(command, arg.split(" +")); } return command; } /** * makes an array filled with n copies of the given char. */ public static byte[] repeatChars(final char c, final int n) { return repeatBytes((byte)c, n); } /** * makes an array filled with n copies of the given byte. */ public static byte[] repeatBytes(final byte b, final int n) { if (n < 0){ throw new IllegalArgumentException("negative length"); } final byte[] bytes = new byte[n]; Arrays.fill(bytes, b); return bytes; } /** * Make all combinations of N size of objects * * if objects = [A, B, C] * if N = 1 => [[A], [B], [C]] * if N = 2 => [[A, A], [B, A], [C, A], [A, B], [B, B], [C, B], [A, C], [B, C], [C, C]] * * @param objects list of objects * @param n size of each combination * @param withReplacement if false, the resulting permutations will only contain unique objects from objects * @return a list with all combinations with size n of objects. */ public static List> makePermutations(final List objects, final int n, final boolean withReplacement) { final List> combinations = new ArrayList<>(); if ( n == 1 ) { for ( final T o : objects ) { combinations.add(Collections.singletonList(o)); } } else if (n > 1) { final List> sub = makePermutations(objects, n - 1, withReplacement); for ( final List subI : sub ) { for ( final T a : objects ) { if ( withReplacement || ! subI.contains(a) ) { combinations.add(Utils.cons(a, subI)); } } } } return combinations; } /** * @see #calcMD5(byte[]) */ public static String calcMD5(final String s) { Utils.nonNull(s, "s is null"); return calcMD5(s.getBytes()); } /** * Calculate the md5 for bytes, and return the result as a 32 character string * * @param bytes the bytes to calculate the md5 of * @return the md5 of bytes, as a 32-character long string */ public static String calcMD5(final byte[] bytes) { Utils.nonNull(bytes, "the input array cannot be null"); try { return Utils.MD5ToString(MessageDigest.getInstance("MD5").digest(bytes)); } catch ( final NoSuchAlgorithmException e ) { throw new IllegalStateException("MD5 digest algorithm not present", e); } } /** * Calculates the MD5 for the specified file and returns it as a String * * @param file file whose MD5 to calculate * @return file's MD5 in String form * @throws IOException if the file could not be read */ public static String calculateFileMD5( final File file ) throws IOException{ return calculatePathMD5(file.toPath()); } /** * Calculates the MD5 for the specified file and returns it as a String * * @param path file whose MD5 to calculate * @return file's MD5 in String form * @throws IOException if the file could not be read */ public static String calculatePathMD5(final Path path) throws IOException { // This doesn't have as nice error messages as FileUtils, but it's close. String fname = path.toUri().toString(); if (!Files.exists(path)) { throw new FileNotFoundException("File '" + fname + "' does not exist"); } if (Files.isDirectory(path)) { throw new IOException("File '" + fname + "' exists but is a directory"); } if (!Files.isRegularFile(path)) { throw new IOException("File '" + fname + "' exists but is not a regular file"); } try { final MessageDigest md = MessageDigest.getInstance("MD5"); final byte[] buff = new byte[8192]; final InputStream is = Files.newInputStream(path); int bytesRead; while ((bytesRead = is.read(buff)) > 0) { md.update(buff, 0, bytesRead); } return Utils.MD5ToString(md.digest()); } catch ( final NoSuchAlgorithmException e ) { throw new IllegalStateException("MD5 digest algorithm not present", e); } } private static String MD5ToString(final byte[] bytes) { final BigInteger bigInt = new BigInteger(1, bytes); final String md5String = bigInt.toString(16); return StringUtils.repeat("0", 32 - md5String.length()) + md5String; } /** * Checks that an Object {@code object} is not null and returns the same object or throws an {@link IllegalArgumentException} * @param object any Object * @return the same object * @throws IllegalArgumentException if a {@code o == null} */ public static T nonNull(final T object) { return Utils.nonNull(object, "Null object is not allowed here."); } /** * Checks that an {@link Object} is not {@code null} and returns the same object or throws an {@link IllegalArgumentException} * @param object any Object * @param message the text message that would be passed to the exception thrown when {@code o == null}. * @return the same object * @throws IllegalArgumentException if a {@code o == null} */ public static T nonNull(final T object, final String message) { if (object == null) { throw new IllegalArgumentException(message); } return object; } /** * Checks that an {@link Object} is not {@code null} and returns the same object or throws an {@link IllegalArgumentException} * @param object any Object * @param message the text message that would be passed to the exception thrown when {@code o == null}. * @return the same object * @throws IllegalArgumentException if a {@code o == null} */ public static T nonNull(final T object, final Supplier message) { if (object == null) { throw new IllegalArgumentException(message.get()); } return object; } /** * Checks that a {@link Collection} is not {@code null} and that it is not empty. * If it's non-null and non-empty it returns the input, otherwise it throws an {@link IllegalArgumentException} * @param collection any Collection * @param message a message to include in the output * @return the original collection * @throws IllegalArgumentException if collection is null or empty */ public static > T nonEmpty(T collection, String message){ nonNull(collection, "The collection is null: " + message); if(collection.isEmpty()){ throw new IllegalArgumentException("The collection is empty: " + message); } else { return collection; } } /** * Checks that a {@link Collection} is not {@code null} and that it is not empty. * If it's non-null and non-empty it returns the true * @param collection any Collection * @return true if the collection exists and has elements */ public static boolean isNonEmpty(Collection collection){ return collection != null && !collection.isEmpty(); } /** * Checks that a {@link String} is not {@code null} and that it is not empty. * If it's non-null and non-empty it returns the input, otherwise it throws an {@link IllegalArgumentException} * @param string any String * @param message a message to include in the output * @return the original string * @throws IllegalArgumentException if string is null or empty */ public static String nonEmpty(String string, String message){ nonNull(string, "The string is null: " + message); if(string.isEmpty()){ throw new IllegalArgumentException("The string is empty: " + message); } else { return string; } } /** * Checks that a {@link String} is not {@code null} and that it is not empty. * If it's non-null and non-empty it returns the input, otherwise it throws an {@link IllegalArgumentException} * @param string any String * @return the original string * @throws IllegalArgumentException if string is null or empty */ public static String nonEmpty(final String string){ return nonEmpty(string, "string must not be null or empty"); } /** * Checks that a {@link Collection} is not {@code null} and that it is not empty. * If it's non-null and non-empty it returns the input, otherwise it throws an {@link IllegalArgumentException} * @param collection any Collection * @return the original collection * @throws IllegalArgumentException if collection is null or empty */ public static > T nonEmpty(T collection){ return nonEmpty(collection, "collection must not be null or empty."); } /** * Checks that the collection does not contain a {@code null} value (throws an {@link IllegalArgumentException} if it does). * @param collection collection * @param message the text message that would be pass to the exception thrown when c contains a null. * @throws IllegalArgumentException if collection is null or contains any null elements */ public static void containsNoNull(final Collection collection, final String message) { Utils.nonNull(collection, message); //cannot use Collection.contains(null) here because this throws a NullPointerException when used with many Sets if (collection.stream().anyMatch(v -> v == null)){ throw new IllegalArgumentException(message); } } /** * Checks that the collection does not contain a duplicate value (throws an {@link IllegalArgumentException} if it does). * The implementation creates a {@link Set} as an intermediate step or detecting duplicates and returns this Set because * it is sometimes useful to do so. * * @param c collection * @param message A message to emit in case of error, in addition to reporting the first duplicate value found. * @throws IllegalArgumentException if a {@code o == null} */ public static Set checkForDuplicatesAndReturnSet(final Collection c, final String message) { final Set set = new LinkedHashSet<>(); for (final E element : c) { if (!set.add(element)) { throw new IllegalArgumentException(String.format(message + " Value %s appears more than once.", element.toString())); } } return set; } /** * Checks whether an index is within bounds considering a collection or array of a particular size * whose first position index is 0 * @param index the query index. * @param length the collection or array size. * @return same value as the input {@code index}. */ public static int validIndex(final int index, final int length) { if (index < 0) { throw new IllegalArgumentException("the index cannot be negative: " + index); } else if (index >= length) { throw new IllegalArgumentException("the index points past the last element of the collection or array: " + index + " > " + (length -1)); } return index; } /** * Checks whether an index is within bounds considering a collection or array of a particular size * whose first position index is 0 * @param index the query index. * @param length the collection or array size. * @param errorMessage the error message to use in case of an exception is thrown. * @return same value as the input {@code index}. */ public static int validIndex(final int index, final int length, final String errorMessage) { if (index < 0) { throw new IllegalArgumentException(errorMessage); } else if (index >= length) { throw new IllegalArgumentException(errorMessage); } return index; } public static void validateArg(final boolean condition, final String msg){ if (!condition){ throw new IllegalArgumentException(msg); } } public static void validateArg(final boolean condition, final Supplier msg){ if (!condition){ throw new IllegalArgumentException(msg.get()); } } /** * Check a condition that should always be true and throw an {@link IllegalStateException} if false. If msg is not a * String literal i.e. if it requires computation, use the Supplier version, below. */ public static void validate(final boolean condition, final String msg){ if (!condition){ throw new IllegalStateException(msg); } } /** * Check a condition that should always be true and throw an {@link IllegalStateException} if false. */ public static void validate(final boolean condition, final Supplier msg){ if (!condition){ throw new IllegalStateException(msg.get()); } } public static void printIf(final boolean condition, final Supplier msg){ if (condition){ System.out.println(msg.get()); } } /** * Calculates the optimum initial size for a hash table given the maximum number * of elements it will need to hold. The optimum size is the smallest size that * is guaranteed not to result in any rehash/table-resize operations. * * @param maxElements The maximum number of elements you expect the hash table * will need to hold * @return The optimum initial size for the table, given maxElements */ public static int optimumHashSize ( final int maxElements ) { return (int) (maxElements / JAVA_DEFAULT_HASH_LOAD_FACTOR) + 2; } /** * Compares sections from to byte arrays to verify whether they contain the same values. * * @param left first array to compare. * @param leftOffset first position of the first array to compare. * @param right second array to compare. * @param rightOffset first position of the second array to compare. * @param length number of positions to compare. * * @throws IllegalArgumentException if
    *
  • either {@code left} or {@code right} is {@code null} or
  • *
  • any off the offset or length combine point outside any of the two arrays
  • *
* @return {@code true} iff {@code length} is 0 or all the bytes in both ranges are the same two-by-two. */ public static boolean equalRange(final byte[] left, final int leftOffset, final byte[] right, final int rightOffset, final int length) { Utils.nonNull(left, "left cannot be null"); Utils.nonNull(right, "right cannot be null"); validRange(length, leftOffset, left.length, "left"); validRange(length, rightOffset, right.length, "right"); for (int i = 0; i < length; i++) { if (left[leftOffset + i] != right[rightOffset + i]) { return false; } } return true; } private static void validRange(final int length, final int offset, final int size, final String msg){ if (length < 0) { throw new IllegalArgumentException(msg + " length cannot be negative"); } if (offset < 0) { throw new IllegalArgumentException(msg + " offset cannot be negative"); } if (offset + length > size) { throw new IllegalArgumentException(msg + " length goes beyond end of left array"); } } /** * Skims out positions of an array returning a shorter one with the remaning positions in the same order. * @param original the original array to splice. * @param remove for each position in {@code original} indicates whether it should be spliced away ({@code true}), * or retained ({@code false}) * * @param the array type. * * @throws IllegalArgumentException if either {@code original} or {@code remove} is {@code null}, * or {@code remove length is different to {@code original}'s}, or {@code original} is not in * fact an array. * * @return never {@code null}. */ public static T skimArray(final T original, final boolean[] remove) { return skimArray(original,0,null,0,remove,0); } /** * Skims out positions of an array returning a shorter one with the remaining positions in the same order. * *

* If the {@code dest} array provide is not long enough a new one will be created and returned with the * same component type. All elements before {@code destOffset} will be copied from the input to the * result array. If {@code dest} is {@code null}, a brand-new array large enough will be created where * the position preceding {@code destOffset} will be left with the default value. The component type * Will match the one of the {@code source} array. *

* * @param source the original array to splice. * @param sourceOffset the first position to skim. * @param dest the destination array. * @param destOffset the first position where to copy the skimmed array values. * @param remove for each position in {@code original} indicates whether it should be spliced away ({@code true}), * or retained ({@code false}) * @param removeOffset the first position in the remove index array to consider. * * @param the array type. * * @throws IllegalArgumentException if either {@code original} or {@code remove} is {@code null}, * or {@code remove length is different to {@code original}'s}, or {@code original} is not in * fact an array. * * @return never {@code null}. */ public static T skimArray(final T source, final int sourceOffset, final T dest, final int destOffset, final boolean[] remove, final int removeOffset) { Utils.nonNull(source, "the source array cannot be null"); @SuppressWarnings("unchecked") final Class sourceClazz = (Class) source.getClass(); if (!sourceClazz.isArray()) { throw new IllegalArgumentException("the source array is not in fact an array instance"); } final int length = Array.getLength(source) - sourceOffset; if (length < 0) { throw new IllegalArgumentException("the source offset goes beyond the source array length"); } return skimArray(source,sourceOffset,dest,destOffset,remove,removeOffset,length); } /** * Skims out positions of an array returning a shorter one with the remaning positions in the same order. * *

* If the {@code dest} array provide is not long enough a new one will be created and returned with the * same component type. All elements before {@code destOffset} will be copied from the input to the * result array. If {@code dest} is {@code null}, a brand-new array large enough will be created where * the position preceding {@code destOffset} will be left with the default value. The component type * Will match the one of the {@code source} array. *

* * @param source the original array to splice. * @param sourceOffset the first position to skim. * @param dest the destination array. * @param destOffset the first position where to copy the skimed array values. * @param remove for each position in {@code original} indicates whether it should be spliced away ({@code true}), * or retained ({@code false}) * @param removeOffset the first position in the remove index array to consider. * @param length the total number of position in {@code source} to consider. Thus only the {@code sourceOffset} to * {@code sourceOffset + length - 1} region will be skimmed. * * @param the array type. * * @throws IllegalArgumentException if either {@code original} or {@code remove} is {@code null}, * or {@code remove length is different to {@code original}'s}, or {@code original} is not in * fact an array. * * @return never {@code null}. */ public static T skimArray(final T source, final int sourceOffset, final T dest, final int destOffset, final boolean[] remove, final int removeOffset, final int length) { Utils.nonNull(source, "the source array cannot be null"); Utils.nonNull(remove, "the remove array cannot be null"); if (sourceOffset < 0) { throw new IllegalArgumentException("the source array offset cannot be negative"); } if (destOffset < 0) { throw new IllegalArgumentException("the destination array offset cannot be negative"); } if (removeOffset < 0) { throw new IllegalArgumentException("the remove array offset cannot be negative"); } if (length < 0) { throw new IllegalArgumentException("the length provided cannot be negative"); } final int removeLength = Math.min(remove.length - removeOffset,length); if (removeLength < 0) { throw new IllegalArgumentException("the remove offset provided falls beyond the remove array end"); } @SuppressWarnings("unchecked") final Class sourceClazz = (Class) source.getClass(); if (!sourceClazz.isArray()) { throw new IllegalArgumentException("the source array is not in fact an array instance"); } final Class destClazz = skimArrayDetermineDestArrayClass(dest, sourceClazz); final int sourceLength = Array.getLength(source); if (sourceLength < length + sourceOffset) { throw new IllegalArgumentException("the source array is too small considering length and offset"); } // count how many positions are to be removed. int removeCount = 0; final int removeEnd = removeLength + removeOffset; for (int i = removeOffset; i < removeEnd; i++) { if (remove[i]) { removeCount++; } } final int newLength = length - removeCount; @SuppressWarnings("unchecked") final T result = skimArrayBuildResultArray(dest, destOffset, destClazz, newLength); // No removals, just copy the whole thing. if (removeCount == 0) { System.arraycopy(source, sourceOffset, result, destOffset, length); } else if (length > 0) { // if length == 0 nothing to do. int nextOriginalIndex = 0; int nextNewIndex = 0; int nextRemoveIndex = removeOffset; while (nextOriginalIndex < length && nextNewIndex < newLength) { while (nextRemoveIndex < removeEnd && remove[nextRemoveIndex++]) { nextOriginalIndex++; } // skip positions to be spliced. // Since we make the nextNewIndex < newLength check in the while condition // there is no need to include the following break, as is guaranteed not to be true: // if (nextOriginalIndex >= length) break; // we reach the final (last positions are to be spliced. final int copyStart = nextOriginalIndex; while (++nextOriginalIndex < length && (nextRemoveIndex >= removeEnd || !remove[nextRemoveIndex])) { nextRemoveIndex++; } final int copyEnd = nextOriginalIndex; final int copyLength = copyEnd - copyStart; System.arraycopy(source, sourceOffset + copyStart, result, destOffset + nextNewIndex, copyLength); nextNewIndex += copyLength; } } return result; } @SuppressWarnings("unchecked") private static T skimArrayBuildResultArray(final T dest, final int destOffset, final Class destClazz, final int newLength) { final T result; if (dest == null) { result = (T) Array.newInstance(destClazz.getComponentType(), newLength + destOffset); } else if (Array.getLength(dest) < newLength + destOffset) { result = (T) Array.newInstance(destClazz.getComponentType(),newLength + destOffset); if (destOffset > 0) { System.arraycopy(dest, 0, result, 0, destOffset); } } else { result = dest; } return result; } @SuppressWarnings("unchecked") private static Class skimArrayDetermineDestArrayClass(final T dest, final Class sourceClazz) { final Class destClazz; if (dest == null) { destClazz = sourceClazz; } else { destClazz = (Class) dest.getClass(); if (destClazz != sourceClazz) { if (!destClazz.isArray()) { throw new IllegalArgumentException("the destination array class must be an array"); } if (sourceClazz.getComponentType().isAssignableFrom(destClazz.getComponentType())) { throw new IllegalArgumentException("the provided destination array class cannot contain values from the source due to type incompatibility"); } } } return destClazz; } /** * Checks if the read header contains any reads groups from non-Illumina and issue a warning of that's the case. */ public static void warnOnNonIlluminaReadGroups(final SAMFileHeader readsHeader, final Logger logger) { Utils.nonNull(readsHeader, "header"); Utils.nonNull(logger, "logger"); if (readsHeader.getReadGroups().stream().anyMatch(rg -> NGSPlatform.fromReadGroupPL(rg.getPlatform()) != NGSPlatform.ILLUMINA)){ logger.warn("This tool has only been well tested on ILLUMINA-based sequencing data. For other data use at your own risk."); } } /** * Boolean xor operation. Only true if x != y. * * @param x a boolean * @param y a boolean * @return true if x != y */ public static boolean xor(final boolean x, final boolean y) { return x != y; } /** * Find the last occurrence of the query sequence in the reference sequence * * Returns the index of the last occurrence or -1 if the query sequence is not found * * @param reference the reference sequence * @param query the query sequence */ public static int lastIndexOf(final byte[] reference, final byte[] query) { int queryLength = query.length; // start search from the last possible matching position and search to the left for (int r = reference.length - queryLength; r >= 0; r--) { int q = 0; while (q < queryLength && reference[r+q] == query[q]) { q++; } if (q == queryLength) { return r; } } return -1; } /** * Simple wrapper for sticking elements of a int[] array into a List * @param ar - the array whose elements should be listified * @return - a List where each element has the same value as the corresponding index in @ar */ public static List listFromPrimitives(final int[] ar) { Utils.nonNull(ar); return Ints.asList(ar); } /** * Concatenates a series of {@link Iterator}s (all of the same type) into a single {@link Iterator}. * @param iterator an {@link Iterator} of {@link Iterator}s * @param the type of the iterator * @return an {@link Iterator} over the underlying {@link Iterator}s */ public static Iterator concatIterators(final Iterator> iterator) { Utils.nonNull(iterator, "iterator"); return new AbstractIterator() { Iterator subIterator; @Override protected T computeNext() { if (subIterator != null && subIterator.hasNext()) { return subIterator.next(); } while (iterator.hasNext()) { subIterator = iterator.next().iterator(); if (subIterator.hasNext()) { return subIterator.next(); } } return endOfData(); } }; } public static Stream stream(final Enumeration enumeration) { return Utils.stream(Iterators.forEnumeration(enumeration)); } public static Stream stream(final Iterable iterable) { return StreamSupport.stream(iterable.spliterator(), false); } public static Stream stream(final Iterator iterator) { return stream(() -> iterator); } /** * Returns a function that always returns its input argument. Unlike {@link Function#identity()} the returned * function is also serializable. * * @param the type of the input and output objects to the function * @return a function that always returns its input argument */ @SuppressWarnings("unchecked") public static Function identityFunction() { return (Function & Serializable) t -> t; } /** * Like Guava's {@link Iterators#transform(Iterator, com.google.common.base.Function)}, but runs a fixed number * ({@code numThreads}) of transformations in parallel, while maintaining ordering of the output iterator. * This is useful if the transformations are CPU intensive. */ public static Iterator transformParallel(final Iterator fromIterator, final Function function, final int numThreads) { Utils.nonNull(fromIterator, "fromIterator"); Utils.nonNull(function, "function"); Utils.validateArg(numThreads >= 1, "numThreads must be at least 1"); if (numThreads == 1) { // defer to Guava for single-threaded case return Iterators.transform(fromIterator, new com.google.common.base.Function() { @Nullable @Override public T apply(@Nullable final F input) { return function.apply(input); } }); } // use an executor service for the multi-threaded case final ExecutorService executorService = Executors.newFixedThreadPool(numThreads); final Queue> futures = new LinkedList<>(); return new AbstractIterator() { @Override protected T computeNext() { try { while (fromIterator.hasNext()) { if (futures.size() == numThreads) { return futures.remove().get(); } final F next = fromIterator.next(); final Future future = executorService.submit(() -> function.apply(next)); futures.add(future); } if (!futures.isEmpty()) { return futures.remove().get(); } executorService.shutdown(); return endOfData(); } catch (InterruptedException | ExecutionException e) { throw new GATKException("Problem running task", e); } } }; } /** Gets duplicated items in the collection. */ public static Set getDuplicatedItems(final Collection objects) { final Set unique = new HashSet<>(); return objects.stream() .filter(name -> !unique.add(name)) .collect(Collectors.toSet()); } /** * Return the given {@code dateTime} formatted as string for display. * @param dateTime the date/time to be formatted * @return String representing the {@code dateTime}. */ public static String getDateTimeForDisplay(final ZonedDateTime dateTime) { return dateTime.format(longDateTimeFormatter); } /** * Set the Locale to US English so that numbers will always be formatted in the US style. */ public static void forceJVMLocaleToUSEnglish() { Locale.setDefault(Locale.US); } /** * Streams and sorts a collection of objects and returns the integer median entry of the sorted list * @param values List of sortable entries from which to select the median */ public static > T getMedianValue(List values) { final List sorted = values.stream().sorted().collect(Collectors.toList()); return sorted.get(sorted.size() / 2); } /** * Splits a String using indexOf instead of regex to speed things up. * This method produces the same results as {@link String#split(String)} and {@code String.split(String, 0)}, * but has been measured to be ~2x faster (see {@code StringSplitSpeedUnitTest} for details). * * @param str the string to split. * @param delimiter the delimiter used to split the string. * @return A {@link List} of {@link String} tokens. */ public static List split(final String str, final char delimiter) { final List tokens; if ( str.isEmpty() ) { tokens = new ArrayList<>(1); tokens.add(""); } else { tokens = ParsingUtils.split(str, delimiter); removeTrailingEmptyStringsFromEnd(tokens); } return tokens; } /** * Splits a String using indexOf instead of regex to speed things up. * If given an empty delimiter, will return each character in the string as a token. * This method produces the same results as {@link String#split(String)} and {@code String.split(String, 0)}, * but has been measured to be ~2x faster (see {@code StringSplitSpeedUnitTest} for details). * * @param str the string to split. * @param delimiter the delimiter used to split the string. * @return A {@link List} of {@link String} tokens. */ public static List split(final String str, final String delimiter) { // This is 10 because the ArrayList default capacity is 10 (but private). return split(str, delimiter, 10); } /** * Splits a given {@link String} using {@link String#indexOf(String)} instead of regex to speed things up. * If given an empty delimiter, will return each character in the string as a token. * This method produces the same results as {@link String#split(String)} and {@code String.split(String, 0)}, * but has been measured to be ~2x faster (see {@code StringSplitSpeedUnitTest} for details). * * @param str The {@link String} to split. * @param delimiter The delimiter used to split the {@link String}. * @param expectedNumTokens The number of tokens expected (used to initialize the capacity of the {@link ArrayList}). * @return A {@link List} of {@link String} tokens. */ private static List split(final String str, final String delimiter, final int expectedNumTokens) { final List result; if ( str.isEmpty() ) { result = new ArrayList<>(1); result.add(""); } else if ( delimiter.isEmpty() ) { result = new ArrayList<>(str.length()); for ( int i = 0; i < str.length(); ++i ) { result.add(str.substring(i, i + 1)); } } else if ( delimiter.length() == 1 ) { result = split(str, delimiter.charAt(0)); } else { result = new ArrayList<>(expectedNumTokens); int delimiterIdx = -1; int tokenStartIdx = delimiterIdx + 1; do { delimiterIdx = str.indexOf(delimiter, tokenStartIdx); final String token = (delimiterIdx != -1 ? str.substring(tokenStartIdx, delimiterIdx) : str.substring(tokenStartIdx)); result.add(token); tokenStartIdx = delimiterIdx + delimiter.length(); } while ( delimiterIdx != -1 ); removeTrailingEmptyStringsFromEnd(result); } return result; } private static void removeTrailingEmptyStringsFromEnd(final List result) { // Remove all trailing empty strings to emulate the behavior of String.split: // We remove items from the end of the list to our index // so that we can take advantage of better performance of removing items from the end // of certain concrete lists: while ( (!result.isEmpty()) && (result.get(result.size() - 1).isEmpty()) ) { result.remove(result.size() - 1); } } /** * Take a map of a value to a list and reverse it. Note that no assumptions of uniqueness are made, so returned * values are also lists. * *

For example:

* * Input:
* k -> {a,b}
* j -> {a}
* * Output:
* a -> {k,j}
* b -> {k}
* * Any sorting in the input map will be lost in the output. * * @param somethingToListMap a map from a value to a list of values. Never {@code null} * @param class of the key of the input * @param class of the values in the list of the input * @return A new mapping from class of values to set of keys. Never {@code null} */ public static Map> getReverseValueToListMap(final Map> somethingToListMap) { final Map> result = new HashMap<>(); for (final Map.Entry> entry : somethingToListMap.entrySet()) { entry.getValue().forEach(v -> result.computeIfAbsent(v, k -> new HashSet<>()).add(entry.getKey())); } return result; } /** * Convenience function that formats a percentage as a %.2f string * * @param x number of objects part of total that meet some criteria * @param total count of all objects, including x * @return a String percent rate, or NA if total == 0 */ public static String formattedPercent(final long x, final long total) { return total == 0 ? "NA" : String.format("%.2f", (100.0*x) / total); } /** * Convenience function that formats a ratio as a %.2f string * * @param num number of observations in the numerator * @param denom number of observations in the denumerator * @return a String formatted ratio, or NA if all == 0 */ public static String formattedRatio(final long num, final long denom) { return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); } /** * Given a collection of strings and a collection of regular expressions, generates the set of strings that match * any expression * @param sourceValues collection of strings from which to to select * @param filterExpressions list of expressions to use for matching * @param exactMatch If true match filters exactly, otherwise use as both exact and regular expressions * @return A new set strings from sourceValues that satisfy at least one of the expressions in sampleExpressions */ public static Set filterCollectionByExpressions(final Collection sourceValues, final Collection filterExpressions, final boolean exactMatch) { Utils.nonNull(filterExpressions); Utils.nonNull(sourceValues); final Set filteredValues = new LinkedHashSet<>(); Collection patterns = null; if (!exactMatch) { patterns = compilePatterns(filterExpressions); } for (final String value : sourceValues) { if (filterExpressions.contains(value)) { filteredValues.add(value); } else if (!exactMatch) { for (final Pattern pattern : patterns) { if (pattern.matcher(value).find()) { filteredValues.add(value); break; } } } } return filteredValues; } private static Collection compilePatterns(final Collection filters) { final Collection patterns = new ArrayList(); for (final String filter: filters) { patterns.add(Pattern.compile(filter)); } return patterns; } /** * Runs a task in parallel returning it returned result. *

This call will wait until such task is completed.

* @param threads number of threads requested. 0 would result in using a system default, * usually the host number of CPU cores. * @param supplier the task to run. * @param the type of the return. * @return whatever the input task returns in the end, it can be {@code null}. * @throws GATKException if the run was interrupted or resulted in a checked exception. Unchecked exceptions and Error * progragate as they are. */ public static T runInParallel(final int threads, final Supplier supplier) { final ForkJoinPool threadPool = threads == 0 ? new ForkJoinPool() : new ForkJoinPool(threads); try { return threadPool.submit(supplier::get).get(); } catch (final InterruptedException e) { throw new GATKException("task interrupted", e); } catch (final ExecutionException e) { final Throwable cause = e.getCause(); if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } else if (cause instanceof Error) { throw (Error) cause; } else { throw new GATKException("exception when executing parallel task ", cause); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy