All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.samtools.SAMSequenceRecord Maven / Gradle / Ivy

There is a newer version: 4.1.3
Show newest version
/*
 * The MIT License
 *
 * Copyright (c) 2009 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package htsjdk.samtools;


import java.math.BigInteger;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

/**
 * Header information about a reference sequence.  Corresponds to @SQ header record in SAM text header.
 */

public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Cloneable
{
    public static final long serialVersionUID = 1L; // AbstractSAMHeaderRecord implements Serializable
    private final String mSequenceName; // Value must be interned() if it's ever set/modified
    private int mSequenceIndex = -1;
    private int mSequenceLength = 0;
    public static final String SEQUENCE_NAME_TAG = "SN";
    public static final String SEQUENCE_LENGTH_TAG = "LN";
    public static final String MD5_TAG = "M5";
    public static final String ASSEMBLY_TAG = "AS";
    public static final String URI_TAG = "UR";
    public static final String SPECIES_TAG = "SP";
    public static final String DESCRIPTION_TAG = "DS";

    /** If one sequence has this length, and another sequence had a different length, isSameSequence will
     * not complain that they are different sequences. */
    public static final int UNKNOWN_SEQUENCE_LENGTH = 0;


    /**
     * This is not a valid sequence name, because it is reserved in the MRNM field of SAM text format
     * to mean "same reference as RNAME field."
     */
    public static final String RESERVED_MRNM_SEQUENCE_NAME = "=";

    /**
     * The standard tags are stored in text header without type information, because the type of these tags is known.
     */
    public static final Set STANDARD_TAGS =
            new HashSet(Arrays.asList(SEQUENCE_NAME_TAG, SEQUENCE_LENGTH_TAG, ASSEMBLY_TAG, MD5_TAG, URI_TAG,
                                                SPECIES_TAG));

    // Split on any whitespace
    private static final Pattern SEQUENCE_NAME_SPLITTER = Pattern.compile("\\s");
    // These are the chars matched by \\s.
    private static final char[] WHITESPACE_CHARS = {' ', '\t', '\n', '\013', '\f', '\r'}; // \013 is vertical tab

    /**
     * @deprecated Use {@link #SAMSequenceRecord(String, int)} instead.
     * sequenceLength is required for the object to be considered valid.
     */
    @Deprecated
    public SAMSequenceRecord(final String name) {
        this(name, UNKNOWN_SEQUENCE_LENGTH);
    }

    public SAMSequenceRecord(final String name, final int sequenceLength) {
        if (name != null) {
            if (SEQUENCE_NAME_SPLITTER.matcher(name).find()) {
                throw new SAMException("Sequence name contains invalid character: " + name);
            }
            validateSequenceName(name);
            mSequenceName = name.intern();
        } else {
            mSequenceName = null;
        }
        mSequenceLength = sequenceLength;
    }

    public String getSequenceName() { return mSequenceName; }

    public int getSequenceLength() { return mSequenceLength; }
    public void setSequenceLength(final int value) { mSequenceLength = value; }

    public String getAssembly() { return (String) getAttribute(ASSEMBLY_TAG); }
    public void setAssembly(final String value) { setAttribute(ASSEMBLY_TAG, value); }

    public String getSpecies() { return (String) getAttribute(SPECIES_TAG); }
    public void setSpecies(final String value) { setAttribute(SPECIES_TAG, value); }

    public String getMd5() { return (String) getAttribute(MD5_TAG); }
    public void setMd5(final String value) { setAttribute(MD5_TAG, value); }

    public String getDescription() { return getAttribute(DESCRIPTION_TAG);}
    public void setDescription(final String value) { setAttribute(DESCRIPTION_TAG, value);}

    /**
     * @return Index of this record in the sequence dictionary it lives in. 
     */
    public int getSequenceIndex() { return mSequenceIndex; }

    // Private state used only by SAM implementation.
    public void setSequenceIndex(final int value) { mSequenceIndex = value; }

    /**
     * Looser comparison than equals().  We look only at sequence index, sequence length, and MD5 tag value
     * (or sequence names, if there is no MD5 tag in either record.
     */
    public boolean isSameSequence(final SAMSequenceRecord that) {
        if (this == that) return true;
        if (that == null) return false;

        if (mSequenceIndex != that.mSequenceIndex) return false;
        // PIC-439.  Allow undefined length.
        if (mSequenceLength != UNKNOWN_SEQUENCE_LENGTH && that.mSequenceLength != UNKNOWN_SEQUENCE_LENGTH && mSequenceLength != that.mSequenceLength)
            return false;
        if (this.getAttribute(SAMSequenceRecord.MD5_TAG) != null && that.getAttribute(SAMSequenceRecord.MD5_TAG) != null) {
            final BigInteger thisMd5 = new BigInteger((String)this.getAttribute(SAMSequenceRecord.MD5_TAG), 16);
            final BigInteger thatMd5 = new BigInteger((String)that.getAttribute(SAMSequenceRecord.MD5_TAG), 16);
            if (!thisMd5.equals(thatMd5)) {
                return false;
            }
        }
        else {
            if (mSequenceName != that.mSequenceName) return false; // Compare using == since we intern() the Strings
        }

        return true;
    }

    @Override
    public boolean equals(final Object o) {
        if (this == o) return true;
        if (!(o instanceof SAMSequenceRecord)) return false;

        final SAMSequenceRecord that = (SAMSequenceRecord) o;

        if (mSequenceIndex != that.mSequenceIndex) return false;
        if (mSequenceLength != that.mSequenceLength) return false;
        if (!attributesEqual(that)) return false;
        if (mSequenceName != that.mSequenceName) return false; // Compare using == since we intern() the name

        return true;
    }

    @Override
    public int hashCode() {
        return mSequenceName != null ? mSequenceName.hashCode() : 0;
    }

    @Override
    Set getStandardTags() {
        return STANDARD_TAGS;
    }

    @Override
    public final SAMSequenceRecord clone() {
        final SAMSequenceRecord ret = new SAMSequenceRecord(this.mSequenceName, this.mSequenceLength);
        ret.mSequenceIndex = this.mSequenceIndex;
        for (final Map.Entry entry : this.getAttributes()) {
            ret.setAttribute(entry.getKey(), entry.getValue());
        }
        return ret;
    }

    /**
     * Truncate sequence name at first whitespace.
     */
    public static String truncateSequenceName(final String sequenceName) {
        /*
         * Instead of using regex split, do it manually for better performance.
        return SEQUENCE_NAME_SPLITTER.split(sequenceName, 2)[0];
        */
        int truncateAt = sequenceName.length();
        for (final char c : WHITESPACE_CHARS) {
            int index = sequenceName.indexOf(c);
            if (index != -1 && index < truncateAt) {
                truncateAt = index;
            }
        }
        return sequenceName.substring(0, truncateAt);
    }

    /**
     * Throw an exception if the sequence name is not valid.
     */
    public static void validateSequenceName(final String name) {
        if (RESERVED_MRNM_SEQUENCE_NAME.equals(name)) {
            throw new SAMException("'" + RESERVED_MRNM_SEQUENCE_NAME + "' is not a valid sequence name");
        }
    }

    @Override
    public String toString() {
        return String.format(
                "SAMSequenceRecord(name=%s,length=%s,dict_index=%s,assembly=%s)", 
                getSequenceName(), 
                getSequenceLength(), 
                getSequenceIndex(),
                getAssembly()
        );
    }

    @Override
    public String getSAMString() {
        return new SAMTextHeaderCodec().getSQLine(this);
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy