All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.walkers.consensus.MoleculeID Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.walkers.consensus;

import htsjdk.samtools.SAMTag;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.util.List;

/**
 * A container class for the molecule ID, which consists of an integer ID and a binary strand.
 * For example, Reads with the tags 12/A and 12/B originated from the same DNA fragment before PCR,
 * (i.e. from the same library) but they originated from different strands in that library.
 * In other words, one read is F1R2 and the other F2R1.
 *
 * The word "molecule" here refers to the original DNA fragment with barcode before undergoing
 * PCR and sequencing. We amplify this molecule through PCR and end up with many duplicate _fragments_.
 */
public class MoleculeID {
    private int moleculeNumber;
    private String strand;

    public MoleculeID(final GATKRead read){
        this.moleculeNumber = getMoleculeNumberOfRead(read);
        this.strand = getStrandOfRead(read);
    }

    public MoleculeID(final int moleculeNumber, final String strand){
        this.moleculeNumber = moleculeNumber;
        this.strand = strand;
    }

    public int getMoleculeNumber() {
        return moleculeNumber;
    }

    public String getStrand() {
        return strand;
    }

    /** Format the molecule ID as stored in the sam/bam/cram file under the {@link SAMTag.MI} tag **/
    public String getSAMField(){
        return moleculeNumber + ReadsWithSameUMI.FGBIO_MI_TAG_DELIMITER + strand;
    }

    /** Extracts the molecule number portion of the {@link SAMTag.MI} field of the read **/
    public static int getMoleculeNumberOfRead(final GATKRead read){
        final String MITag = read.getAttributeAsString(SAMTag.MI.name());
        return Integer.parseInt(MITag.split(ReadsWithSameUMI.FGBIO_MI_TAG_DELIMITER)[0]);
    }

    /** Extracts the strand portion of the {@link SAMTag.MI} field of the read **/
    public static String getStrandOfRead(final GATKRead read){
        final String MITag = read.getAttributeAsString(SAMTag.MI.name());
        return MITag.split(ReadsWithSameUMI.FGBIO_MI_TAG_DELIMITER)[1];
    }

    /**
     * Assumes that the input reads have the same molecule number in the {@link SAMTag.MI} tag
     * @returns Counts of reads from each strand, the first element is always larger than the second
     **/
    public static Pair countStrands(final List reads){
        final int strandACount = (int) reads.stream().filter(r -> getStrandOfRead(r).equals("A")).count();
        final int strandBCount = (int) reads.stream().filter(r -> getStrandOfRead(r).equals("B")).count();
        return new ImmutablePair<>(strandACount, strandBCount);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy