All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.read.CigarBuilder Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.read;

import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import org.apache.commons.lang3.tuple.Triple;
import org.broadinstitute.hellbender.utils.Utils;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/**
 * This class allows code that manipulates cigars to do so naively by handling complications such as merging consecutive
 * identical operators within the builder.  A CigarBuilder takes care of the following:
 *
 * 1)  Merging consecutive identical operators, eg 10M5M -> 15M
 * 2)  Eliminating leading and trailing deletions, eg 10D10M -> 10M and 10M10D -> 10M
 * 3)  Shifting deletions to the left of adjacent insertions, eg 10M1ID10D -> 10M10D10I
 * 4)  Validating the overall structure of [hard clip] [soft clip] non-clip [soft clip] [hard clip]
 *
 * Edge cases, such as removing a deletion that immediately follows a leading insertion, *are* handled correctly.  See the unit tests.
 *
 * Leading and trailing deletions may be kept by using the non-default CigarBuilder(false) constructor.
 *
 * All of this is achieved simply by invoking add() repeatedly, followed by make().
 */
public class CigarBuilder {

    private final List cigarElements = new ArrayList<>();

    // track the last operator so we can merge consecutive elements with the same operator
    // for example, adding 3M and 4M is equivalent to adding 7M
    // also we ignore leading deletions so for example 10S + 5D = 10S
    private CigarOperator lastOperator = null;

    private Section section = Section.LEFT_HARD_CLIP;

    private final boolean removeDeletionsAtEnds;

    private int leadingDeletionBasesRemoved = 0;
    private int trailingDeletionBasesRemoved = 0;
    private int trailingDeletionBasesRemovedInMake = 0;

    public CigarBuilder(final boolean removeDeletionsAtEnds) {
        this.removeDeletionsAtEnds = removeDeletionsAtEnds;
    }

    public CigarBuilder() {
        this(true);
    }

    public CigarBuilder add(final CigarElement element) {
        if (element.getLength() == 0) {
            return this;
        }

        final CigarOperator operator = element.getOperator();

        // skip a deletion after clipping ie at the beginning of the read
        // note the edge case of a deletion following a leading insertion, which we also skip
        if (removeDeletionsAtEnds && operator == CigarOperator.DELETION) {
            if(lastOperator == null || lastOperator.isClipping()) {
                leadingDeletionBasesRemoved += element.getLength();
                return this;
            } else if (lastOperator == CigarOperator.INSERTION && (cigarElements.size() == 1 || cigarElements.get(cigarElements.size() - 2).getOperator().isClipping())) {
                leadingDeletionBasesRemoved += element.getLength();
                return this;
            }
        }

        advanceSectionAndValidateCigarOrder(operator);

        // merge consecutive elements with the same operator
        if (operator == lastOperator) {
            final int n = cigarElements.size() - 1;
            cigarElements.set(n, new CigarElement(cigarElements.get(n).getLength() + element.getLength(), operator));
        } else {
            if (lastOperator == null) {
                cigarElements.add(element);
                lastOperator = operator;
            } else if (operator.isClipping()) {
                // if we have just started clipping on the right and realize the last operator was a deletion, remove it
                // if we have just started clipping on the right and the last two operators were a deletion and insertion, remove the deletion
                if (removeDeletionsAtEnds && !lastOperator.consumesReadBases() && !lastOperator.isClipping()) {
                    trailingDeletionBasesRemoved += cigarElements.get(cigarElements.size() - 1).getLength();
                    cigarElements.set(cigarElements.size() - 1, element);
                    lastOperator = operator;
                } else if (removeDeletionsAtEnds && lastTwoElementsWereDeletionAndInsertion()) {
                    trailingDeletionBasesRemoved += cigarElements.get(cigarElements.size() - 2).getLength();
                    cigarElements.set(cigarElements.size() - 2, cigarElements.get(cigarElements.size() - 1));
                    cigarElements.set(cigarElements.size() - 1, element);
                } else {
                    cigarElements.add(element);
                    lastOperator = operator;
                }
            } else if (operator == CigarOperator.DELETION && lastOperator == CigarOperator.INSERTION) {
                // The order of deletion and insertion elements is arbitrary, so to standardize we shift deletions to the left
                // that is, we place the deletion before the insertion and shift the insertion right
                // if the element before the insertion is another deletion, we merge in the new deletion
                // note that the last operator remains an insertion
                final int size = cigarElements.size();
                if (size > 1 && cigarElements.get(size - 2).getOperator() == CigarOperator.DELETION) {
                    cigarElements.set(size - 2, new CigarElement(cigarElements.get(size-2).getLength() + element.getLength(), CigarOperator.DELETION));
                } else {
                    cigarElements.add(cigarElements.size() - 1, element);
                }
            } else {
                cigarElements.add(element);
                lastOperator = operator;
            }
        }

        return this;
    }

    private boolean lastTwoElementsWereDeletionAndInsertion() {
        return lastOperator == CigarOperator.INSERTION && cigarElements.size() > 1 && cigarElements.get(cigarElements.size() - 2).getOperator() == CigarOperator.DELETION;
    }

    public CigarBuilder addAll(final Iterable elements) {
        for (final CigarElement element : elements) {
            add(element);
        }
        return this;
    }

    public Cigar make(final boolean allowEmpty) {
        Utils.validate(!(section == Section.LEFT_SOFT_CLIP && cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP), "cigar is completely soft-clipped");
        trailingDeletionBasesRemovedInMake = 0;
        if (removeDeletionsAtEnds && lastOperator == CigarOperator.DELETION) {
            trailingDeletionBasesRemovedInMake = cigarElements.get(cigarElements.size() - 1).getLength();
            cigarElements.remove(cigarElements.size() - 1);
        } else if (removeDeletionsAtEnds && lastTwoElementsWereDeletionAndInsertion()) {
            trailingDeletionBasesRemovedInMake = cigarElements.get(cigarElements.size() - 2).getLength();
            cigarElements.remove(cigarElements.size() - 2);
        }
        Utils.validate(allowEmpty || !cigarElements.isEmpty(), "No cigar elements left after removing leading and trailing deletions.");
        return new Cigar(cigarElements);    // removing flanking deletions may cause an empty cigar to be output.  We do not throw an error or return null.
    }

    public Cigar make() {
        return make(false);
    }

    private enum Section {LEFT_HARD_CLIP, LEFT_SOFT_CLIP, MIDDLE, RIGHT_SOFT_CLIP, RIGHT_HARD_CLIP}

    // validate that cigar structure is hard clip, soft clip, unclipped, soft clip, hard clip
    private void advanceSectionAndValidateCigarOrder(CigarOperator operator) {
        if (operator == CigarOperator.HARD_CLIP) {
            if (section == Section.LEFT_SOFT_CLIP || section == Section.MIDDLE || section == Section.RIGHT_SOFT_CLIP) {
                section = Section.RIGHT_HARD_CLIP;
            }
        } else if (operator == CigarOperator.SOFT_CLIP) {
            Utils.validate(section != Section.RIGHT_HARD_CLIP, "cigar has already reached its right hard clip");
            if (section == Section.LEFT_HARD_CLIP) {
                section = Section.LEFT_SOFT_CLIP;
            } else if(section == Section.MIDDLE) {
                section = Section.RIGHT_SOFT_CLIP;
            }
        } else {
            Utils.validate(section != Section.RIGHT_SOFT_CLIP && section != Section.RIGHT_HARD_CLIP, "cigar has already reached right clip");
            if (section == Section.LEFT_HARD_CLIP || section == Section.LEFT_SOFT_CLIP) {
                section = Section.MIDDLE;
            }
        }
    }

    /**
     * Count the number of leading deletion bases that have been removed by this builder and that will not show up in any call to make().
     * Note that all leading deletions are removed prior to calling make().  For example, successively adding 3S2D10I7D10M would result in
     * the 2D and 7D elements being discarded, for a total of 9 removed deletion bases.
     */
    public int getLeadingDeletionBasesRemoved() {
        return leadingDeletionBasesRemoved;
    }

    /**
     * Counts the number of trailing deletion bases that were removed in the last call to make().  These may be removed
     * before or during make().  For example, adding 3M and 3D does not removed the 3D because the builder does not know that 3D
     * is a terminal element.  If make() is then called, the builder will record the discarded 3D and this method will return 3.
     * Subsequently adding 3M, calling make(), and then calling this method will result in 0.
     */
    public int getTrailingDeletionBasesRemoved() {
        return trailingDeletionBasesRemoved + trailingDeletionBasesRemovedInMake;
    }

    /**
     * Return a Result object containing the output of make() as well as the number of leading and trailing deletion bases
     * removed relative to the cigar elements that were add()ed.  This is very useful when in addition to transforming a cigar we must also
     * keep track of an alignment start or end.
     */
    public Result makeAndRecordDeletionsRemovedResult() {
        final Cigar cigar = make();
        return new Result(cigar, getLeadingDeletionBasesRemoved(), getTrailingDeletionBasesRemoved());
    }

    public static final class Result {
        private Cigar cigar;
        private final int leadingDeletionBasesRemoved;
        private final int trailingDeletionBasesRemoved;

        public Result(final Cigar cigar, final int leadingDeletionBasesRemoved, final int trailingDeletionBasesRemoved) {
            this.cigar = cigar;
            this.leadingDeletionBasesRemoved = leadingDeletionBasesRemoved;
            this.trailingDeletionBasesRemoved = trailingDeletionBasesRemoved;
        }

        public Cigar getCigar() {
            return cigar;
        }

        public int getLeadingDeletionBasesRemoved() {
            return leadingDeletionBasesRemoved;
        }

        public int getTrailingDeletionBasesRemoved() {
            return trailingDeletionBasesRemoved;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy