All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.variant.variantcontext.Allele Maven / Gradle / Ivy

There is a newer version: 4.1.3
Show newest version
/*
 * Copyright (c) 2012 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

package htsjdk.variant.variantcontext;

import java.io.Serializable;
import java.nio.charset.StandardCharsets;

/**
 * Immutable representation of an allele.
 *

* Types of alleles: *

*
 Ref: a t C g a // C is the reference base
 : a t G g a // C base is a G in some individuals
 : a t - g a // C base is deleted w.r.t. the reference
 : a t CAg a // A base is inserted w.r.t. the reference sequence
 
*

In these cases, where are the alleles?

*
    *
  • SNP polymorphism of C/G -> { C , G } -> C is the reference allele
  • *
  • 1 base deletion of C -> { tC , t } -> C is the reference allele and we include the preceding reference base (null alleles are not allowed)
  • *
  • 1 base insertion of A -> { C ; CA } -> C is the reference allele (because null alleles are not allowed)
  • *
*

* Suppose I see a the following in the population: *

*
 Ref: a t C g a // C is the reference base
 : a t G g a // C base is a G in some individuals
 : a t - g a // C base is deleted w.r.t. the reference
 
*

* How do I represent this? There are three segregating alleles: *

*
* { C , G , - } *
*

and these are represented as:

*
* { tC, tG, t } *
*

* Now suppose I have this more complex example:

 Ref: a t C g a // C is the reference base
 : a t - g a
 : a t - - a
 : a t CAg a
 
*

* There are actually four segregating alleles: *

*
* { Cg , -g, --, and CAg } over bases 2-4 *
*

represented as:

*
* { tCg, tg, t, tCAg } *
*

* Critically, it should be possible to apply an allele to a reference sequence to create the * correct haplotype sequence:

*
* Allele + reference => haplotype *
*

* For convenience, we are going to create Alleles where the GenomeLoc of the allele is stored outside of the * Allele object itself. So there's an idea of an A/C polymorphism independent of it's surrounding context. * * Given list of alleles it's possible to determine the "type" of the variation

 A / C @ loc => SNP
 - / A => INDEL
 
*

* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion. *

*

* Alelle also supports is concept of a NO_CALL allele. This Allele represents a haplotype that couldn't be * determined. This is usually represented by a '.' allele. *

*

* Note that Alleles store all bases as bytes, in **UPPER CASE**. So 'atc' == 'ATC' from the perspective of an * Allele. *

* @author gatk_team. */ public interface Allele extends Comparable, Serializable { /** A generic static NO_CALL allele for use */ String NO_CALL_STRING = "."; /** A generic static SPAN_DEL allele for use */ String SPAN_DEL_STRING = "*"; /** Non ref allele representations */ char SINGLE_BREAKEND_INDICATOR = '.'; char BREAKEND_EXTENDING_RIGHT = '['; char BREAKEND_EXTENDING_LEFT = ']'; char SYMBOLIC_ALLELE_START = '<'; char SYMBOLIC_ALLELE_END = '>'; String NON_REF_STRING = ""; String UNSPECIFIED_ALTERNATE_ALLELE_STRING = "<*>"; Allele REF_A = new SimpleAllele("A", true); Allele ALT_A = new SimpleAllele("A", false); Allele REF_C = new SimpleAllele("C", true); Allele ALT_C = new SimpleAllele("C", false); Allele REF_G = new SimpleAllele("G", true); Allele ALT_G = new SimpleAllele("G", false); Allele REF_T = new SimpleAllele("T", true); Allele ALT_T = new SimpleAllele("T", false); Allele REF_N = new SimpleAllele("N", true); Allele ALT_N = new SimpleAllele("N", false); Allele SPAN_DEL = new SimpleAllele(SPAN_DEL_STRING, false); Allele NO_CALL = new SimpleAllele(NO_CALL_STRING, false); Allele NON_REF_ALLELE = new SimpleAllele(NON_REF_STRING, false); Allele UNSPECIFIED_ALTERNATE_ALLELE = new SimpleAllele(UNSPECIFIED_ALTERNATE_ALLELE_STRING, false); // for simple deletion, e.g. "ALT==" (note that the spec allows, for now at least, alt alleles like ) @SuppressWarnings("unused") Allele SV_SIMPLE_DEL = StructuralVariantType.DEL.toSymbolicAltAllele(); // for simple insertion, e.g. "ALT==" @SuppressWarnings("unused") Allele SV_SIMPLE_INS = StructuralVariantType.INS.toSymbolicAltAllele(); // for simple inversion, e.g. "ALT==" @SuppressWarnings("unused") Allele SV_SIMPLE_INV = StructuralVariantType.INV.toSymbolicAltAllele(); // for simple generic cnv, e.g. "ALT==" @SuppressWarnings("unused") Allele SV_SIMPLE_CNV = StructuralVariantType.CNV.toSymbolicAltAllele(); // for simple duplication, e.g. "ALT==" @SuppressWarnings("unused") Allele SV_SIMPLE_DUP = StructuralVariantType.DUP.toSymbolicAltAllele(); /** * Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases * == '-', a Null allele is created. If bases == '.', a no call Allele is created. If bases == '*', a spanning deletions Allele is created. * * @param bases the DNA sequence of this variation, '-', '.', or '*' * @param isRef should we make this a reference allele? * @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated */ static Allele create(byte[] bases, boolean isRef) { if ( bases == null ) throw new IllegalArgumentException("create: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele"); if ( bases.length == 1 ) { // optimization to return a static constant Allele for each single base object switch (bases[0]) { case '.': if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); return NO_CALL; case '*': if ( isRef ) throw new IllegalArgumentException("Cannot tag a spanning deletions allele as the reference allele"); return SPAN_DEL; case 'A': case 'a' : return isRef ? REF_A : ALT_A; case 'C': case 'c' : return isRef ? REF_C : ALT_C; case 'G': case 'g' : return isRef ? REF_G : ALT_G; case 'T': case 't' : return isRef ? REF_T : ALT_T; case 'N': case 'n' : return isRef ? REF_N : ALT_N; default: throw new IllegalArgumentException("Illegal base [" + (char)bases[0] + "] seen in the allele"); } } else { return new SimpleAllele(bases.clone(), isRef); } } static Allele create(byte base, boolean isRef) { return create( new byte[]{ base }, isRef); } static Allele create(byte base) { return create( base, false ); } static Allele extend(Allele left, byte[] right) { if (left.isSymbolic()) throw new IllegalArgumentException("Cannot extend a symbolic allele"); byte[] bases = new byte[left.length() + right.length]; System.arraycopy(left.getBases(), 0, bases, 0, left.length()); System.arraycopy(right, 0, bases, left.length(), right.length); return create(bases, left.isReference()); } /** * @param bases bases representing an allele * @return true if the bases represent the null allele */ @Deprecated static boolean wouldBeNullAllele(byte[] bases) { return (bases.length == 1 && bases[0] == htsjdk.variant.vcf.VCFConstants.NULL_ALLELE) || bases.length == 0; } /** * @param bases bases representing an allele * @return true if the bases represent the SPAN_DEL allele */ @Deprecated static boolean wouldBeStarAllele(byte[] bases) { return bases.length == 1 && bases[0] == htsjdk.variant.vcf.VCFConstants.SPANNING_DELETION_ALLELE; } /** * @param bases bases representing an allele * @return true if the bases represent the NO_CALL allele */ @Deprecated static boolean wouldBeNoCallAllele(byte[] bases) { return bases.length == 1 && bases[0] == htsjdk.variant.vcf.VCFConstants.NO_CALL_ALLELE; } /** * @param bases bases representing an allele * @return true if the bases represent a symbolic allele, including breakpoints and breakends */ @Deprecated static boolean wouldBeSymbolicAllele(byte[] bases) { if ( bases.length <= 1 ) return false; else { return bases[0] == Allele.SYMBOLIC_ALLELE_START || bases[bases.length - 1] == Allele.SYMBOLIC_ALLELE_END || wouldBeBreakpoint(bases) || wouldBeSingleBreakend(bases); } } /** * @param bases bases representing an allele * @return true if the bases represent a symbolic allele in breakpoint notation, (ex: G]17:198982] or ]13:123456]T ) */ @Deprecated static boolean wouldBeBreakpoint(byte[] bases) { if (bases.length <= 1) { return false; } for (final byte base : bases) { if (base == Allele.BREAKEND_EXTENDING_LEFT || base == Allele.BREAKEND_EXTENDING_RIGHT) { return true; } } return false; } /** * @param bases bases representing an allele * @return true if the bases represent a symbolic allele in single breakend notation (ex: .A or A. ) */ @Deprecated static boolean wouldBeSingleBreakend(byte[] bases) { if ( bases.length <= 1 ) return false; else { return bases[0] == Allele.SINGLE_BREAKEND_INDICATOR || bases[bases.length - 1] == Allele.SINGLE_BREAKEND_INDICATOR; } } /** * @param bases bases representing a reference allele * @return true if the bases represent the well formatted allele */ static boolean acceptableAlleleBases(String bases) { return acceptableAlleleBases(bases.getBytes(), true); } /** * @param bases bases representing an allele * @param isReferenceAllele is a reference allele * @return true if the bases represent the well formatted allele */ static boolean acceptableAlleleBases(String bases, boolean isReferenceAllele) { return acceptableAlleleBases(bases.getBytes(StandardCharsets.UTF_8), isReferenceAllele); } /** * @param bases bases representing a reference allele * @return true if the bases represent the well formatted allele */ static boolean acceptableAlleleBases(byte[] bases) { return acceptableAlleleBases(bases, true); } /** * * @param bases bases representing an allele * @param isReferenceAllele true if a reference allele * @return true if the bases represent the well formatted allele */ static boolean acceptableAlleleBases(byte[] bases, boolean isReferenceAllele) { if ( wouldBeNullAllele(bases) ) return false; if ( wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) return true; if ( wouldBeStarAllele(bases) ) return !isReferenceAllele; for (byte base : bases ) { switch (base) { case 'A': case 'C': case 'G': case 'T': case 'a': case 'c': case 'g': case 't': case 'N' : case 'n' : break; default: return false; } } return true; } /** * Returns an allele with the given bases and reference status. * * @param bases bases representing an allele * @param isRef is this the reference allele? */ static Allele create(String bases, boolean isRef) { return create(bases.getBytes(), isRef); } /** * Creates a non-Ref allele. @see Allele(byte[], boolean) for full information * * @param bases bases representing an allele */ static Allele create(String bases) { return create(bases, false); } /** * Creates a non-Ref allele. @see Allele(byte[], boolean) for full information * * @param bases bases representing an allele */ static Allele create(byte[] bases) { return create(bases, false); } /** * Creates a new allele based on the provided one. Ref state will be copied unless ignoreRefState is true * (in which case the returned allele will be non-Ref). * * This method is efficient because it can skip the validation of the bases (since the original allele was already validated) * * @param allele the allele from which to copy the bases * @param ignoreRefState should we ignore the reference state of the input allele and use the default ref state? */ static Allele create(Allele allele, boolean ignoreRefState) { return new SimpleAllele(allele.getBases(), allele.isReference() && !ignoreRefState); } static boolean oneIsPrefixOfOther(final Allele a1, final Allele a2) { if ( a2.length() >= a1.length() ) return a1.isPrefixOf(a2); else return a2.isPrefixOf(a1); } boolean isPrefixOf(final Allele other); /** @return true if this is the NO_CALL allele */ boolean isNoCall(); // Returns true if this is not the NO_CALL allele boolean isCalled(); /** @return true if this Allele is the reference allele */ boolean isReference(); /** @return true if this Allele is not the reference allele */ boolean isNonReference(); /** @return true if this Allele is symbolic (i.e. no well-defined base sequence), this includes breakpoints and breakends */ boolean isSymbolic(); /** @return true if this Allele is a breakpoint ( ex: G]17:198982] or ]13:123456]T ) */ boolean isBreakpoint(); /** @return true if this Allele is a single breakend (ex: .A or A.) */ boolean isSingleBreakend(); // Returns a nice string representation of this object String toString(); byte[] getBases(); String getBaseString(); String getDisplayString(); byte[] getDisplayBases(); boolean equals(Object other); int hashCode(); boolean equals(Allele other, boolean ignoreRefState); boolean basesMatch(byte[] test); boolean basesMatch(String test); boolean basesMatch(Allele test); int length(); boolean isNonRefAllele(); }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy