All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.testutils.ReadClipperTestUtils Maven / Gradle / Ivy

package org.broadinstitute.hellbender.testutils;

import com.google.common.collect.Lists;
import htsjdk.samtools.*;
import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
import org.broadinstitute.hellbender.utils.read.CigarBuilder;
import org.broadinstitute.hellbender.utils.read.CigarUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.util.*;
import java.util.stream.Collectors;

public final class ReadClipperTestUtils {
    //Should contain all the utils needed for tests to mass produce
    //reads, cigars, and other needed classes

    static final byte[] BASES = {'A', 'C', 'T', 'G'};
    static final byte[] QUALS = {2, 15, 25, 30};

    static final List> LEADING_CLIPS = Arrays.asList(
            Collections.emptyList(),
            Collections.singletonList(new CigarElement(1, CigarOperator.HARD_CLIP)),
            Collections.singletonList(new CigarElement(1, CigarOperator.SOFT_CLIP)),
            Arrays.asList(new CigarElement(1, CigarOperator.HARD_CLIP), new CigarElement(1, CigarOperator.SOFT_CLIP)));

    static final List> TRAILING_CLIPS = LEADING_CLIPS.stream().map(Lists::reverse).collect(Collectors.toList());

    static final CigarElement[] CORE_CIGAR_ELEMENTS = {
            new CigarElement(1, CigarOperator.INSERTION),
            new CigarElement(1, CigarOperator.DELETION),
            new CigarElement(1, CigarOperator.MATCH_OR_MISMATCH)};

    static final CigarElement[] CORE_CIGAR_ELEMENTS_INCLUDING_SKIPS = {
            new CigarElement(1, CigarOperator.INSERTION),
            new CigarElement(1, CigarOperator.DELETION),
            new CigarElement(1, CigarOperator.MATCH_OR_MISMATCH),
            new CigarElement(1, CigarOperator.SKIPPED_REGION)};

    /**
     * Make a read from the CIGAR string
     *
     * @param cigarString string used to create a CIGAR
     * @return artificial read
     */
    public static GATKRead makeReadFromCigar(String cigarString) {
        return makeReadFromCigar(TextCigarCodec.decode(cigarString));
    }

    /**
     * Make a read from the CIGAR.
     *
     * @param cigar
     * @return artificial read
     */
    public static GATKRead makeReadFromCigar(Cigar cigar) {
        return makeReadFromCigar(cigar, 0);
    }

    private static GATKRead makeReadFromCigar(Cigar cigar, int lengthChange) {
        int readLength = cigar.getReadLength();
        if (readLength >= -lengthChange) {
            readLength += lengthChange;
        }
        return ArtificialReadUtils.createArtificialRead(arrayFromArrayWithLength(BASES, readLength), arrayFromArrayWithLength(QUALS, readLength), cigar.toString());
    }

    private static byte [] arrayFromArrayWithLength(final byte[] array, final int length) {
        final byte [] output = new byte[length];
        for (int j = 0; j < length; j++) {
            output[j] = array[(j % array.length)];
        }
        return output;
    }

    /**
     * Make a read from the CIGAR
     *
     * @param cigarString  string used to create a CIGAR
     * @param lengthChange change in read length relative the CIGAR length
     * @return artificial read
     */
    public static GATKRead makeReadFromCigar(String cigarString, int lengthChange) {
        return makeReadFromCigar(TextCigarCodec.decode(cigarString), lengthChange);
    }

    /**
     * This function generates every valid permutation of cigar strings (with a given set of cigarElement) with a given length.
     * 

* A valid cigar object obeys the following rules: * - No Hard/Soft clips in the middle of the read * - No deletions in the beginning / end of the read * - No repeated adjacent element (e.g. 1M2M, this should be 3M) * - No consecutive I/D elements * * @param maximumCigarElements the maximum number of elements in the cigar * @return a list with all valid Cigar objects */ public static List generateCigarList(int maximumCigarElements, final boolean includeSkips) { final CigarElement[] coreElements = includeSkips ? CORE_CIGAR_ELEMENTS_INCLUDING_SKIPS : CORE_CIGAR_ELEMENTS; List cigarList = new ArrayList<>(); for (final List leadingClipElements : LEADING_CLIPS) { for (final List trailingClipElements : TRAILING_CLIPS) { final int maximumCoreElements = maximumCigarElements - leadingClipElements.size() - trailingClipElements.size(); if (maximumCoreElements < 1) { continue; } int numCoreElements = coreElements.length; int[] cigarCombination = new int[maximumCoreElements]; Arrays.fill(cigarCombination, 0); int currentIndex = 0; while (true) { try { final Cigar coreCigar = createCigarFromCombination(cigarCombination, coreElements); // create the cigar if (CigarUtils.isGood(coreCigar)) {// check if it's valid final CigarBuilder builder = new CigarBuilder(); builder.addAll(leadingClipElements); builder.addAll(coreCigar.getCigarElements()); builder.addAll(trailingClipElements); cigarList.add(builder.make()); // add it } } catch (final Exception ex) { } // we are creating random cigars, many of which are invalid. boolean currentIndexChanged = false; while (currentIndex < maximumCoreElements && cigarCombination[currentIndex] == numCoreElements - 1) { currentIndex++; // find the next index to increment currentIndexChanged = true; // keep track of the fact that we have changed indices! } if (currentIndex == maximumCoreElements) // if we hit the end of the array, we're done. break; cigarCombination[currentIndex]++; // otherwise advance the current index if (currentIndexChanged) { // if we have changed index, then... for (int i = 0; i < currentIndex; i++) cigarCombination[i] = 0; // reset everything from 0->currentIndex currentIndex = 0; // go back to the first index } } } } return cigarList; } private static Cigar createCigarFromCombination(int[] cigarCombination, CigarElement[] cigarElements) { return new Cigar(Arrays.stream(cigarCombination).mapToObj(n -> cigarElements[n]).collect(Collectors.toList())); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy