All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.read.GATKRead Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.utils.read;

import htsjdk.samtools.*;
import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.util.StringUtil;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.param.ParamUtils;

import java.util.Collections;
import java.util.List;
import java.util.Optional;

/**
 * Unified read interface for use throughout the GATK.
 *
 * Adapter classes implementing this interface exist for htsjdk's {@link SAMRecord} ({@link SAMRecordToGATKReadAdapter})
 *
 * Since the adapter classes wrap the raw reads without making a copy, care must be taken to avoid
 * exposing the encapsulated reads, particularly if modifying reads in-place. As a result, this interface
 * should probably only be implemented by core engine-level classes.
 *
 * All GATKRead methods that return mutable reference types make defensive copies, with the exception
 * of the conversion method {@link #convertToSAMRecord}.
 *
 * Note that {@link #getContig} and {@link #getStart} will not expose nominal positions assigned to unmapped
 * reads for sorting purposes -- for unmapped reads, these methods will always return {@code null} or 0,
 * respectively. To access positions assigned to unmapped reads for sorting purposes, use {@link #getAssignedContig}
 * and {@link #getAssignedStart}.
 */
public interface GATKRead extends Locatable {

    /**
     * @return The name of the read (equivalent to QNAME in SAM), or {@code null} if the read has no name.
     */
    String getName();


    int getFlags();

    /**
     * Set the name of the read (equivalent to QNAME in SAM), or set to {@code null} if the read has no name.
     *
     * @param name new name for the read
     */
    void setName( final String name );

    /**
     * @return The number of bases in the read
     *
     * Note: This is not necessarily the same as the number of reference bases the read is aligned to.
     */
    int getLength();


    /**
     * @return True if the read has no bases, otherwise false
     */
    default boolean isEmpty() {
        return getLength() == 0;
    }

    /**
     * Set the position of the read (the contig and the start position). Cannot be used to
     * set the read to an unmapped position; use {@link #setIsUnmapped} for that purpose.
     *
     * @param contig Contig the read is mapped to
     * @param start Start position of the read (1-based, inclusive)
     * @throws IllegalArgumentException if contig is null or "*", or start is less than 1
     */
    void setPosition( final String contig, final int start );

    /**
     * Set the position of the read using the position of an existing {@link Locatable}. Cannot be used to
     * set the read to an unmapped position; use {@link #setIsUnmapped} for that purpose.
     *
     * @param locatable {@link Locatable} representing the 1-based, inclusive position to assign to the read
     * @throws IllegalArgumentException if locatable is null, or its contig is null or "*", or its start position is less than 1
     */
    void setPosition( final Locatable locatable );

    /**
     * @return The actual contig assigned to the read, regardless of unmapped status. Unlike {@link #getContig},
     *         which does not expose positions assigned to unmapped reads, this method will gladly return a contig
     *         assigned to an unmapped read (typically, this will be the contig of its mapped mate). Will return either
     *         {@link ReadConstants#UNSET_CONTIG} or {@code null} for reads with no contig, depending on
     *         the underlying read implementation.
     *
     *         Useful for sorting reads in standard BAM/SAM file order, with unmapped reads interleaved with their mapped
     *         mates -- for other uses, clients should use {@link #getContig}
     */
    String getAssignedContig();

    /**
     * @return The actual start position assigned to the read, regardless of unmapped status. Unlike {@link #getStart},
     *         which does not expose positions assigned to unmapped reads, this method will gladly return a start position
     *         assigned to an unmapped read (typically, this will be the start position of its mapped mate). Will return
     *         {@link ReadConstants#UNSET_POSITION} for reads with no start position.
     *
     *         Useful for sorting reads in standard BAM/SAM file order, with unmapped reads interleaved with their mapped
     *         mates -- for other uses, clients should use {@link #getStart}
     */
    int getAssignedStart();

    /**
     * Returns the alignment start (1-based, inclusive) adjusted for clipped bases.
     * For example, if the read has an alignment start of 100 but the first 4 bases
     * were clipped (hard or soft clipped) then this method will return 96.
     *
     * For unmapped reads, always returns {@link ReadConstants#UNSET_POSITION}
     *
     * @return The alignment start (1-based, inclusive) adjusted for clipped bases,
     *         or {@link ReadConstants#UNSET_POSITION} if the read is unmapped.
     */
    int getUnclippedStart();

    /**
     * Returns the alignment end (1-based, inclusive) adjusted for clipped bases.
     * For example, if the read has an alignment end of 100 but the last 7 bases
     * were clipped (hard or soft clipped) then this method will return 107.
     *
     * For unmapped reads, always returns {@link ReadConstants#UNSET_POSITION}
     *
     * @return The alignment end (1-based, inclusive) adjusted for clipped bases,
     *         or {@link ReadConstants#UNSET_POSITION} if the read is unmapped.
     */
    int getUnclippedEnd();

    /**
     * Calculates the reference coordinate for the beginning of the read taking into account soft clips but not hard clips.
     *
     * Note: {@link #getUnclippedStart} adds soft and hard clips, this method only adds soft clips.
     *
     * @return the unclipped start of the read taking soft clips (but not hard clips) into account
     */
    default int getSoftStart() {
        return ReadUtils.getSoftStart(this);
    }

    /**
     * Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips.
     *
     * Note: {@link #getUnclippedEnd} adds soft and hard clips, this method only adds soft clips.
     *
     * @return the unclipped end of the read taking soft clips (but not hard clips) into account
     */
    default int getSoftEnd() {
        return ReadUtils.getSoftEnd(this);
    }

    /**
     * Finds the adaptor boundary around the read and returns the first base inside the adaptor that is closest to
     * the read boundary. If the read is in the positive strand, this is the first base after the end of the
     * fragment (Picard calls it 'insert'), if the read is in the negative strand, this is the first base before the
     * beginning of the fragment.
     *
     * There are two cases we need to treat here:
     *
     * 1) Our read is in the reverse strand :
     *
     *     <----------------------| *
     *   |--------------------->
     *
     *   in these cases, the adaptor boundary is at the mate start (minus one)
     *
     * 2) Our read is in the forward strand :
     *
     *   |---------------------->   *
     *     <----------------------|
     *
     *   in these cases the adaptor boundary is at the start of the read plus the inferred insert size (plus one)
     *
     * @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read).
     * CANNOT_COMPUTE_ADAPTOR_BOUNDARY if the read is unmapped or the mate is mapped to another contig.
     */
    default int getAdaptorBoundary() {
        return ReadUtils.getAdaptorBoundary(this);
    }

    /**
     * @return The contig that this read's mate is mapped to, or {@code null} if the mate is unmapped
     * @throws IllegalStateException if the read is not paired (has no mate)
     */
    String getMateContig();

    /**
     * @return The alignment start (1-based, inclusive) of this read's mate, or {@link ReadConstants#UNSET_POSITION}
     *         if the mate is unmapped.
     * @throws IllegalStateException if the read is not paired (has no mate)
     */
    int getMateStart();

    /**
     * Set the position of the read's mate (the contig and the start position). Cannot be used to
     * set the read's mate to an unmapped position; use {@link #setMateIsUnmapped} for that purpose.
     *
     * Calling this method has the additional effect of marking the read as paired, as if {@link #setIsPaired}
     * were invoked with true.
     *
     * @param contig Contig the read's mate is mapped to
     * @param start Start position of the read's mate (1-based, inclusive)
     * @throws IllegalArgumentException if contig is null or "*", or start is less than 1
     */
    void setMatePosition( final String contig, final int start );

    /**
     * Set the position of the read's mate using the position of an existing {@link Locatable}. Cannot be used to
     * set the read's mate to an unmapped position; use {@link #setMateIsUnmapped} for that purpose.
     *
     * Calling this method has the additional effect of marking the read as paired, as if {@link #setIsPaired}
     * were invoked with true.
     *
     * @param locatable {@link Locatable} representing the 1-based, inclusive position to assign to the read's mate
     * @throws IllegalArgumentException if locatable is null, or its contig is null or "*", or its start position is less than 1
     */
    void setMatePosition( final Locatable locatable );

    /**
     * Returns the observed length of the read's fragment (equivalent to TLEN in SAM).
     *
     * Warning: the precise meaning of this field is implementation/technology dependent.
     *
     * @return The observed length of the fragment (equivalent to TLEN in SAM), or 0 if unknown.
     *         Negative if the mate maps to a lower position than the read.
     */
    int getFragmentLength();

    /**
     * Set the observed length of the read's fragment (equivalent to TLEN in SAM).
     *
     * Warning: the precise meaning of this field is implementation/technology dependent.
     *
     * @param fragmentLength Observed length of the read's fragment; may be negative
     *                       if the mate maps to a lower position than the read,
     *                       or 0 if unknown.
     */
    void setFragmentLength( final int fragmentLength );

    /**
     * @return The mapping quality of this alignment, representing the phred-scaled likelihood that the read maps
     *         to this position as opposed to other locations. Returns {@link ReadConstants#NO_MAPPING_QUALITY}
     *         if there is none.
     */
    int getMappingQuality();

    /**
     * Set the mapping quality of this alignment, representing how likely the read maps to this position as
     * opposed to other locations. Set to {@link ReadConstants#NO_MAPPING_QUALITY} if there is none.
     *
     * @param mappingQuality mapping quality of this alignment; must be between 0 and 255, inclusive
     * @throws IllegalArgumentException if the mapping quality is less than 0 or greater than 255
     */
    void setMappingQuality( final int mappingQuality );

    /**
     * @return The read sequence as ASCII bytes ACGTN=, or an empty byte[] if no sequence is present.
     *
     * This method makes a defensive copy of the bases array before returning it, so modifying the
     * returned array will not alter the bases in the read.
     */
    byte[] getBases();

    /**
     * @return The read sequence as ASCII bytes ACGTN=, or an empty byte[] if no sequence is present.
     *
     * This method differs from {@link #getBases} in that implementations are free to avoid making a
     * defensive copy, if it's possible to avoid a copy.
     *
     * WARNING: This method MAY NOT make a defensive copy of the bases array before returning it, so modifying the
     * returned array MAY alter the bases in the actual read. CALLER BEWARE!
     */
    default byte[] getBasesNoCopy() {
        // By default we delegate to the copying version. If implementations are able to avoid a copy,
        // they can override with a no-copy implementation.
        return getBases();
    }

    /**
     * Copy the base into an existent byte array.
     *
     * @param offset the first base in the read to copy
     * @param destination the destination array.
     * @param destinationOffset the first base in the array to copy to.
     * @param maxLength the maximum number of bases to copy.
     *
     * @throws IllegalArgumentException if any of:
     * 
    *
  • {@code destination} is {@code null},
  • *
  • any of the offsets is negative or goes beyond the maximum respective valid * index ({@code offset} for the read bases and {@code destinationOffset} for {@code destination}).
  • *
  • if there is not enough space in {@code destination} to hold to all the bases copied * (NOTE: a large {@code maxLength} value won't result in an exception if the read does not have enough bases to overflow {@code destination})
  • *
* @return the number of bases copied, always 0 or greater. */ default int copyBases(final int offset, final byte[] destination, final int destinationOffset, final int maxLength) { Utils.nonNull(destination); ParamUtils.isPositiveOrZero(offset, "read base offset must be 0 or greater"); ParamUtils.isPositiveOrZero(destinationOffset, "destination array offset must be 0 or greater"); ParamUtils.isPositiveOrZero(maxLength, "the requested max-length cannot be negative"); if (maxLength == 0 || !hasBases()) { // short-cut for trival non-copy cases: return 0; } else { final byte[] bases = getBasesNoCopy(); final int basesLength = bases.length; Utils.validIndex(offset, basesLength); final int copyLength = basesLength - offset < maxLength ? basesLength - offset : maxLength; System.arraycopy(bases, offset, destination, destinationOffset, copyLength); return copyLength; } } /** * Copy base qualities into an existent byte array. * * @param offset the first base-quality in the read to copy * @param destination the destination array. * @param destinationOffset the first base-quality in the array to copy to. * @param maxLength the maximum number of base-qualities to copy. * * @throws IllegalArgumentException if any of: *
    *
  • {@code destination} is {@code null},
  • *
  • any of the offsets is negative or goes beyond the maximum respective valid * index ({@code offset} for the read base-qualities and {@code destinationOffset} for {@code destination}).
  • *
  • if there is not enough space in {@code destination} to hold to all the base-qualities copied * (NOTE: a large {@code maxLength} value won't result in an exception if the read does not have enough base-qualities to overflow {@code destination})
  • *
* @return the number of base-qualities copied, always 0 or greater. */ default int copyBaseQualities(final int offset, final byte[] destination, final int destinationOffset, final int maxLength) { Utils.nonNull(destination); ParamUtils.isPositiveOrZero(offset, "read base offset must be 0 or greater"); ParamUtils.isPositiveOrZero(destinationOffset, "destination array offset must be 0 or greater"); ParamUtils.isPositiveOrZero(maxLength, "the requested max-length cannot be negative"); if (maxLength == 0 || !hasBaseQualities()) { // short-cut for trival non-copy cases: return 0; } else { final byte[] quals = getBaseQualitiesNoCopy(); final int qualsLength = quals.length; Utils.validIndex(offset, qualsLength); final int copyLength = qualsLength - offset < maxLength ? qualsLength - offset : maxLength; System.arraycopy(quals, offset, destination, destinationOffset, copyLength); return copyLength; } } /** * Indicates whether there are any bases. */ default boolean hasBases() { final byte[] bases = getBasesNoCopy(); return bases != null && bases.length > 0; } /** * Indicates whether there is any base-qualities. * @return {@code true} iff there is at least one base-quality. */ default boolean hasBaseQualities() { final byte[] quals = getBaseQualitiesNoCopy(); return quals != null && quals.length > 0; } /** * @return The base at index i. * The default implementation returns getBases()[i]. * Subclasses may override to provide a more efficient implementations but must preserve the * semantics equal to getBases()[i] * * @throws IllegalArgumentException if i is negative or of i is not smaller than the number * of bases (as reported by {@link #getLength()}. In particular, if no sequence is present. */ default byte getBase(final int i){ return getBases()[i]; } /** * @return All bases in the read as a single String, or {@link ReadConstants#NULL_SEQUENCE_STRING} * if the read is empty. */ default String getBasesString() { return isEmpty() ? ReadConstants.NULL_SEQUENCE_STRING : StringUtil.bytesToString(getBases()); } /** * Set the read's sequence. * * @param bases The read sequence as ASCII bytes ACGTN=. May be empty or null if no sequence is present. */ void setBases( final byte[] bases ); /** * @return Base qualities as binary phred scores (not ASCII), or an empty byte[] if base qualities are not present. * * This method makes a defensive copy of the base qualities array before returning it, so modifying the * returned array will not alter the base qualities in the read. */ byte[] getBaseQualities(); /** * @return Base qualities as binary phred scores (not ASCII), or an empty byte[] if base qualities are not present. * * This method differs from {@link #getBaseQualities} in that implementations are free to avoid making a * defensive copy, if it's possible to avoid a copy. * * WARNING: This method MAY NOT make a defensive copy of the base qualities array before returning it, so modifying * the returned array MAY alter the base qualities in the read. CALLER BEWARE! */ default byte[] getBaseQualitiesNoCopy() { // By default we delegate to the copying version. If implementations are able to avoid a copy, // they can override with a no-copy implementation. return getBaseQualities(); } /** * @return The number of base qualities in the read sequence. * This default implementation calls getBaseQualities().length * Subclasses may override to provide a more efficient implementation. */ default int getBaseQualityCount(){ return getBaseQualities().length; } /** * @return The base quality at index i. * This default implementation returns getBaseQualities()[i]. * Subclasses may override to provide a more efficient implementations * but must preserve the semantics equal to getBaseQualities()[i] * * @throws IllegalArgumentException if i is negative or of i is not smaller than the number * of base qualities (as reported by {@link #getBaseQualityCount()}. */ default byte getBaseQuality(final int i){ return getBaseQualities()[i]; } /** * Set the read's base qualities. * * @param baseQualities Base qualities as binary phred scores (not ASCII); negative values not allowed. * May be empty or null if no base qualities are present. * @throws IllegalArgumentException if an invalid (negative) base quality is provided */ void setBaseQualities( final byte[] baseQualities ); /** * @return Cigar object describing how the read aligns to the reference, or an empty Cigar object if no cigar is present. * * This method makes a defensive copy of the Cigar within the read if necessary, so modifying the return value of * this method will not modify the read's Cigar. * * Callers of this method that only want to iterate over the elements of the Cigar should call getCigarElements() * instead which may give better performance by avoiding object creation. */ Cigar getCigar(); /** * @return Unmodifiable list of the CigarElements from this read. * * Note: The default implementation returns a unmodifiable view of * the protective copy made by calling getCigar().getCigarElements() * Subclasses may override. */ default List getCigarElements(){ return Collections.unmodifiableList(getCigar().getCigarElements()); } /** * Return the cigar element at a given index. * * Note: the default implementation return getCigarElements().get(i). * Subclasses may override, for example to reduce the memory allocation or improve speed. * @throws IndexOutOfBoundsException if the index is out of range (index < 0 || index >= numCigarElements()) */ default CigarElement getCigarElement(final int i){ return getCigarElements().get(i); } /** * The number of cigar elements in this read. The default implementation returns getCigar().numCigarElements(). * Subclasses may override to provide more efficient implementations. */ default int numCigarElements(){ return getCigar().numCigarElements(); } /** * Set the read's Cigar using an existing {@link Cigar} object describing how the read aligns to the reference. * * @param cigar {@link Cigar} object describing how the read aligns to the reference; May be null or empty * if the read has none. */ void setCigar( final Cigar cigar ); /** * Set the read's Cigar using a textual cigar String describing how the read aligns to the reference. * * @param cigarString Cigar String describing how the read aligns to the reference; May be null or empty * if the read has none. */ void setCigar( final String cigarString ); /** * @return The ID of the read group this read belongs to, or {@code null} for none. */ String getReadGroup(); /** * Set the ID of the read group this read belongs to, or {@code null} for none. * * @param readGroupID ID of the read group this read belongs to, or {@code null} for none. */ void setReadGroup( final String readGroupID ); /** * @return True if this read is paired (ie., has a mate), otherwise false. * @throws GATKException.MissingReadField if this information is not available */ boolean isPaired(); /** * Mark the read as paired (having a mate) or not paired. * * Setting this to false has the additional effect of marking the read as not * properly paired, as if {@link #setIsProperlyPaired} were invoked with false. * * @param isPaired True if this read is paired (ie., has a mate), otherwise false. */ void setIsPaired( final boolean isPaired ); /** * @return True if this read is paired and the orientation and the distance between reads from the fragment are * consistent with the sequencing protocol, otherwise false. * @throws GATKException.MissingReadField if this information is not available */ boolean isProperlyPaired(); /** * Mark the read as properly paired (or not properly paired). * * Setting this to true has the additional effect of marking the read as paired, * as if {@link #setIsPaired} were invoked with true. * * @param isProperlyPaired True if this read is paired and the orientation and the distance between reads from * the fragment are consistent with the sequencing protocol, otherwise false. */ void setIsProperlyPaired( final boolean isProperlyPaired ); /** * @return True if this read is unmapped (this includes reads that have a position but are explicitly marked as unmapped, * as well as reads that lack a fully-defined position but are not explicitly marked as unmapped). Otherwise false. */ boolean isUnmapped(); /** * Mark the read as unmapped (lacking a defined position on the genome). * * To mark a read as mapped, use {@link #setPosition} */ void setIsUnmapped(); /** * Does the read have a position assigned to it for sorting purposes. * @return `true iff this read has no assigned position or contig. */ boolean isUnplaced(); /** * Mark the read as unmapped, and also removes mapping information from the read (i.e. sets contig to {@link ReadConstants#UNSET_CONTIG} and position to {@link ReadConstants#UNSET_POSITION}). * * NOTE: this does not remove the cigar string from the read, use {@link #setCigar(Cigar)} * * To mark a read as mapped, use {@link #setPosition} */ void setIsUnplaced(); /** * @return True if this read's mate is unmapped (this includes mates that have a position but are explicitly marked as unmapped, * as well as mates that lack a fully-defined position but are not explicitly marked as unmapped). Otherwise false. * @throws IllegalStateException if the read is not paired (has no mate) */ boolean mateIsUnmapped(); /** * Mark the read's mate as unmapped (lacking a defined position on the genome). (i.e. sets mate contig to {@link ReadConstants#UNSET_CONTIG} and position to {@link ReadConstants#UNSET_POSITION}). * * To mark the read's mate as mapped, use {@link #setMatePosition} * * Calling this method has the additional effect of marking the read as paired, as if {@link #setIsPaired} * were invoked with true. */ void setMateIsUnmapped(); /** * Does the reads mate have a position assigned to it for sorting purposes.. * @return `true iff this reads mate has no assigned position or contig. */ boolean mateIsUnplaced(); /** * Mark the read's mate as unmapped (lacking a defined position on the genome). In contrast with {@link #setMateIsUnmapped}, * this method will revert the mapping information for the mate (i.e. sets the mate's contig to "*" and position to 0). * * To mark the read's mate as mapped, use {@link #setMatePosition} * * Calling this method has the additional effect of marking the read as paired, as if {@link #setIsPaired} * were invoked with true. */ void setMateIsUnplaced(); /** * @return True if this read is on the reverse strand as opposed to the forward strand, otherwise false. * @throws GATKException.MissingReadField if this information is not available */ boolean isReverseStrand(); /** * Mark the read as being on the reverse (or forward) strand. * * @param isReverseStrand True if this read is on the reverse strand as opposed to the forward strand, otherwise false. */ void setIsReverseStrand( final boolean isReverseStrand ); /** * @return True if this read's mate is on the reverse strand as opposed to the forward strand, otherwise false. * @throws IllegalStateException if the read is not paired (has no mate) * @throws GATKException.MissingReadField if this information is not available */ boolean mateIsReverseStrand(); /** * Mark the read's mate as being on the reverse (or forward) strand. * * Calling this method has the additional effect of marking the read as paired, as if {@link #setIsPaired} * were invoked with true. * * @param mateIsReverseStrand True if this read's mate is on the reverse strand as opposed to the forward strand, * otherwise false. */ void setMateIsReverseStrand( final boolean mateIsReverseStrand ); /** * @return True if this read is paired and is the first read in the pair, otherwise false. * @throws GATKException.MissingReadField if this information is not available */ boolean isFirstOfPair(); /** * Mark the read as the first read of a pair. * * Calling this method has the additional effects of marking the read as paired, as if {@link #setIsPaired} * were invoked with true, and also marks the read as NOT being the second of a pair. */ void setIsFirstOfPair(); /** * @return True if this read is paired and is the second read in the pair, otherwise false. * @throws GATKException.MissingReadField if this information is not available */ boolean isSecondOfPair(); /** * Mark the read as the second read of a pair. * * Calling this method has the additional effects of marking the read as paired, as if {@link #setIsPaired} * were invoked with true, and also marks the read as NOT being the first of a pair. */ void setIsSecondOfPair(); /** * @return True if this is a secondary alignment (an alternative to the primary alignment), otherwise false. * @throws GATKException.MissingReadField if this information is not available */ boolean isSecondaryAlignment(); /** * Mark the read as a secondary alignment (an alternative to the primary alignment) * * @param isSecondaryAlignment True if this is a secondary alignment, otherwise false. */ void setIsSecondaryAlignment( final boolean isSecondaryAlignment ); /** * @return True if this is a supplementary alignment (used in the representation of a chimeric alignment), otherwise false. * @throws GATKException.MissingReadField if this information is not available */ boolean isSupplementaryAlignment(); /** * Mark the read as a supplementary alignment (used in the representation of a chimeric alignment) * * @param isSupplementaryAlignment True if this is a supplementary alignment, otherwise false. */ void setIsSupplementaryAlignment( final boolean isSupplementaryAlignment ); /** * Computes the pair orientation * @throws IllegalArgumentException If the read is not paired, or if either read or mate is unmapped */ default Optional getPairOrientation() { Utils.validateArg(isPaired(), () -> "Invalid read: " + getName() + " is not paired."); // while we throw an error for unpaired reads -- because in that case this method is irrelevant to the sequencing protocol -- // we return Optional.empty() for paired reads whose orientation can't be determined if (isUnmapped() || mateIsUnmapped() || !getContig().equals(getMateContig())) { return Optional.empty(); } final boolean readIsOnReverseStrand = isReverseStrand(); if(readIsOnReverseStrand == mateIsReverseStrand() ) { return Optional.of(SamPairUtil.PairOrientation.TANDEM); } final long positiveStrandFivePrimePos = readIsOnReverseStrand ? getMateStart() : getStart(); final long negativeStrandFivePrimePos = readIsOnReverseStrand ? getEnd() : getStart() + getFragmentLength(); return Optional.of(positiveStrandFivePrimePos < negativeStrandFivePrimePos ? SamPairUtil.PairOrientation.FR : SamPairUtil.PairOrientation.RF); } /** * @return True if this read fails platform/vendor quality checks, otherwise false * @throws GATKException.MissingReadField if this information is not available */ boolean failsVendorQualityCheck(); /** * Mark the read as failing platform/vendor quality checks * * @param failsVendorQualityCheck True if this read fails platform/vendor quality checks, otherwise false */ void setFailsVendorQualityCheck( final boolean failsVendorQualityCheck ); /** * @return True if this read is a PCR or optical duplicate, otherwise false. * @throws GATKException.MissingReadField if this information is not available */ boolean isDuplicate(); /** * Mark the read as a PCR or optical duplicate. * * @param isDuplicate True if this read is a PCR or optical duplicate, otherwise false. */ void setIsDuplicate( final boolean isDuplicate ); /** * Check whether this read has a particular attribute * * @param attributeName name of the attribute to search for * @return true if the read has an attribute with the given name, otherwise false */ boolean hasAttribute( final String attributeName ); /** * Retrieve the value of a particular attribute typed as an integer. * * @param attributeName name of the attribute to retrieve * @return integer value of the requested attribute, or {@code null} if the attribute is not present * @throws GATKException.ReadAttributeTypeMismatch if the attribute * value cannot be typed as an integer */ Integer getAttributeAsInteger( final String attributeName ); /** * Retrieve the value of a particular attribute typed as a floating point value. * * @param attributeName name of the attribute to retrieve * @return float value of the requested attribute, or {@code null} if the attribute is not present * @throws GATKException.ReadAttributeTypeMismatch if the attribute * value cannot be typed as a float */ Float getAttributeAsFloat( final String attributeName ); /** * Retrieve the value of a particular attribute typed as a String. * * @param attributeName name of the attribute to retrieve * @return String value of the requested attribute, or {@code null} if the attribute is not present * @throws GATKException.ReadAttributeTypeMismatch if the attribute * value cannot be typed as a single String value. */ String getAttributeAsString( final String attributeName ); /** * Retrieve the value of a particular attribute typed as a byte array. * * Makes a defensive copy of an existing byte array within the read if necessary, so modifying * the return value will not modify the attribute value within the read. * * @param attributeName name of the attribute to retrieve * @return byte array value of the requested attribute, or {@code null} if the attribute is not present * @throws GATKException.ReadAttributeTypeMismatch if the attribute * value cannot be typed as a byte array. */ byte[] getAttributeAsByteArray( final String attributeName ); /** * This is used to access a transient attribute store provided by the underlying implementation. Transient attributes will not be serialized or written out with a record. * * NOTE: This is an advanced use case for GATKRead and you should probably use getAttribute() instead * @param key key whose value is to be stored */ Object getTransientAttribute(final Object key); /** * Returns a transient attribute value as an {@link Optional}. * * @param key the key to the attribute. * @param clazz the expected clazz of the attribute value. * @param the parametric type of the attribute value. * @return never {@code null} but a not-present optional instead. */ default Optional getOptionalTransientAttribute(final Object key, final Class clazz) { final Object value = getTransientAttribute(key); if (value != null) { if (clazz.isAssignableFrom(value.getClass())) { return Optional.of(clazz.cast(value)); } else { throw new IllegalArgumentException("transient attribute value type (" + value.getClass().getName() + ") is not assignable to does not match the input class (" + clazz.getName() + ")"); } } else { return Optional.empty(); } } /** * Set an integer-valued attribute on the read. * * @param attributeName Name of the attribute to set. Must be legal according to {@link ReadUtils#assertAttributeNameIsLegal} * @param attributeValue Integer value of the attribute (may be {@code null}) * @throws IllegalArgumentException if the attribute name is illegal according to {@link ReadUtils#assertAttributeNameIsLegal} */ void setAttribute( final String attributeName, final Integer attributeValue ); /** * Set a String-valued attribute on the read. * * @param attributeName Name of the attribute to set. Must be legal according to {@link ReadUtils#assertAttributeNameIsLegal} * @param attributeValue String value of the attribute (may be {@code null}) * @throws IllegalArgumentException if the attribute name is illegal according to {@link ReadUtils#assertAttributeNameIsLegal} */ void setAttribute( final String attributeName, final String attributeValue ); /** * Set a byte array attribute on the read. * * @param attributeName Name of the attribute to set. Must be legal according to {@link ReadUtils#assertAttributeNameIsLegal} * @param attributeValue byte array value of the attribute (may be {@code null} or empty) * @throws IllegalArgumentException if the attribute name is illegal according to {@link ReadUtils#assertAttributeNameIsLegal} */ void setAttribute( final String attributeName, final byte[] attributeValue ); /** * This is used to access the transient attribute store in the underlying data type. This is used to store temporary * attributes and cached data that will not be serialized or written out as a record. * * NOTE: This is an advanced use case for GATKRead and you should probably use setAttribute() instead * @param key key under which the value will be stored * @param value value to store */ public void setTransientAttribute(final Object key, final Object value); /** * Clear an individual attribute on the read. * * @param attributeName Name of the attribute to clear. Must be legal according to {@link ReadUtils#assertAttributeNameIsLegal} * @throws IllegalArgumentException if the attribute name is illegal according to {@link ReadUtils#assertAttributeNameIsLegal} */ void clearAttribute( final String attributeName ); /** * Clear all attributes on the read. */ void clearAttributes(); /** * Clear an individual transient attribute on the read. * * @param attributeName Name of the attribute to clear. Must be legal according to {@link ReadUtils#assertAttributeNameIsLegal} * @throws IllegalArgumentException if the attribute name is illegal according to {@link ReadUtils#assertAttributeNameIsLegal} */ void clearTransientAttribute( final String attributeName ); /** * Return a copy of this read. * * @return A copy of this read. The copy will not necessarily be a true deep copy (the fields * encapsulated by the read itself may be shallow copied), but should be safe to use freely in general * given that all GATKRead methods that return mutable reference types make defensive copies * (with the exception of the conversion method {@link #convertToSAMRecord, * but these are safe to call on copies since the encapsulated reads do get shallow copied at a minimum by * this method, so modifications to the fields within a copied read will not alter the original). */ GATKRead copy(); /** * Return a deep copy of this read. * * @return A true deep copy of this read. */ GATKRead deepCopy(); /** * Convert this read into a SAMRecord. * * Warning: the return value is not guaranteed to be independent from this read (eg., if the read * is already in SAMRecord format, no copy will be made). * * @param header required header for the SAMRecord * @return This read as a SAMRecord */ SAMRecord convertToSAMRecord( final SAMFileHeader header ); /** * Get a string representation of this read in SAM string format, terminated with '\n'. Fields are separated by '\t', * * @return SAM string representation of this read. */ String getSAMString(); /** * Modify this read by reverse complementing its bases and reversing its quality scores. Implementations may * also update tags that are known to need updating after the reverse complement operation. */ public void reverseComplement(); /** * A human-digestable representation of the read. * NOTE: java will not let us have a default method override toString so we need this dance. Subclasses should override toString * and call commonToString to get the same toString representation regardless of the underlying adaptee object. */ default String commonToString() { //Note: SAMRecord blows up on getAlignmentEnd when cigar is null. // That would result in a blow up here so we work around this bug // by checking for empty cigar (nulls get converted to empty cigars in SAMRecordToGATKReadAdapter) if (isUnmapped() || getCigar().isEmpty()){ return String.format("%s UNMAPPED", getName()); } else { return String.format("%s %s:%d-%d", getName(), getContig(), getStart(), getEnd()); } } /** * Clip key attributes that may change after a hard clipping operation * @param newStart - zero based offset of the new start of the read bases (in relationship to the original read) * @param newLength - new length of read, after hard clipping * @param originalLength - length of read before clipping */ void hardClipAttributes(final int newStart, final int newLength, int originalLength); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy