All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.charite.compbio.jannovar.annotation.VariantEffect Maven / Gradle / Ivy

package de.charite.compbio.jannovar.annotation;

import com.google.common.base.Function;
import com.google.common.base.Predicate;

// TODO(holtgrew): For now, only insert most specific variants by default, add switch that adds transitive changes
// TODO(holtgrew): structural variants need more refinement

/**
 * These codes reflect the possible types of variants that we call for an exome.
 *
 * The values in this enum are given in the putative order of impact (more severe to less severe). The documentation
 * gives the sequence ontology (SO) ID and the SO description. Also, the documentation of each value explains whether
 * Jannovar generates this annotation or not.
 *
 * @author Peter Robinson
 * @author Marten Jaeger
 * @author Manuel Holtgrewe
 * @author Max Schubach
 */
public enum VariantEffect {

	//
	// HIGH Putative Impact
	//

	// change of feature structure or larger units
	/**
	 * SO:1000182 A kind of chromosome
	 * variation where the chromosome complement is not an exact multiple of the haploid number (is a
	 * chromosome_variation).
	 *
	 * Not used in Jannovar annotations.
	 */
	CHROMOSOME_NUMBER_VARIATION,
	/**
	 * SO:0001893 A feature ablation
	 * whereby the deleted region includes a transcript feature (is a: feature_ablation)
	 *
	 * Not used in Jannovar annotations.
	 */
	TRANSCRIPT_ABLATION,
	/**
	 * SO:0001572 A sequence variant
	 * whereby an exon is lost from the transcript (is a (is a: {@link #SPLICING_VARIANT}), {@link #TRANSCRIPT_VARIANT}
	 * ).
	 *
	 * Not used in Jannovar annotations.
	 */
	EXON_LOSS_VARIANT,

	// high impact changes in the coding region
	/**
	 * SO:0001909 A frameshift variant
	 * that causes the translational reading frame to be extended relative to the reference feature (is a
	 * {@link #FRAMESHIFT_VARIANT}, internal_feature_elongation).
	 */
	FRAMESHIFT_ELONGATION,
	/**
	 * SO:0001910 A frameshift variant
	 * that causes the translational reading frame to be shortened relative to the reference feature (is a
	 * {@link #FRAMESHIFT_VARIANT}, internal_feature_truncation).
	 */
	FRAMESHIFT_TRUNCATION,
	/**
	 * SO:0001589A sequence variant
	 * which causes a disruption of the translational reading frame, because the number of nucleotides inserted or
	 * deleted is not a multiple of threee (is a: protein_altering_variant).
	 *
	 * Used for frameshift variant for the case where there is no stop codon any more and the rare case in which the
	 * transcript length is retained.
	 */
	FRAMESHIFT_VARIANT,
	/**
	 * SO:0001908 A sequence variant
	 * that causes the extension of a genomic feature from within the feature rather than from the terminus of the
	 * feature, with regard to the reference sequence.
	 *
	 * In Jannovar, used to annotate a {@link #COMPLEX_SUBSTITUTION} that does not lead to a frameshift and increases
	 * the transcript length.
	 */
	INTERNAL_FEATURE_ELONGATION,
	/**
	 * SO:0001906 A sequence variant
	 * that causes the reduction of a genomic feature, with regard to the reference sequence (is a: feature_variant).
	 *
	 * The term 
	 * INTERNAL_FEATURE_TRUNCATION would be more fitting but is not available in SO.
	 *
	 * In Jannovar, used to annotate a {@link #COMPLEX_SUBSTITUTION} that does not lead to a frameshift and decreases
	 * the transcript length.
	 */
	FEATURE_TRUNCATION,
	/**
	 * SO:0002007 An MNV is a multiple
	 * nucleotide variant (substitution) in which the inserted sequence is the same length as the replaced sequence (is
	 * a: substitution).
	 */
	MNV,
	/**
	 * SO:1000005 When no simple or
	 * well defined DNA mutation event describes the observed DNA change, the keyword "complex" should be used. Usually
	 * there are multiple equally plausible explanations for the change (is a: substitution).
	 *
	 * Used together with {@link #INTERNAL_FEATURE_ELONGATION} or {@link #FEATURE_TRUNCATION} to describe an variant
	 * that does not lead to a frameshift but a changed transcript length. Used together with
	 * {@link #FRAMESHIFT_ELONGATION} or {@link #FRAMESHIFT_TRUNCATION} if the substitution leads to a frameshift
	 * variant.
	 */
	COMPLEX_SUBSTITUTION,
	/**
	 * SO:0001587 A sequence variant
	 * whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened
	 * transcript (is a: nonsynonymous_variant, feature_truncation).
	 */
	STOP_GAINED,
	/**
	 * SO:0001578 A sequence variant
	 * where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript (is a:
	 * nonsynonymous variant, terminator_codon_variant, feature_elongation)
	 */
	STOP_LOST,
	/**
	 * SO:0002012 A codon variant that
	 * changes at least one base of the canonical start codon (is a: initiator_codon_variant).
	 */
	START_LOST,

	// splicing changes, might change splicing
	/**
	 * SO:0001574 A splice variant
	 * that changes the 2 base region at the 3' end of an intron (is a {@link #SPLICE_REGION_VARIANT}).
	 */
	SPLICE_ACCEPTOR_VARIANT,
	/**
	 * SO:0001575 A splice variant
	 * that changes the 2 base pair region at the 5' end of an intron (is a {@link #SPLICE_REGION_VARIANT}).
	 */
	SPLICE_DONOR_VARIANT,

	// change in rare amino acids, exotic variant
	/**
	 * SO:0002008 A sequence variant
	 * whereby at least one base of a codon encoding a rare amino acid is changed, resulting in a different encoded
	 * amino acid (children: selenocysteine_loss, pyrrolysine_loss).
	 *
	 * Not used in Jannovar annotations.
	 */
	RARE_AMINO_ACID_VARIANT,
	/**
	 * Marker for smallest {@link VariantEffect} with {@link PutativeImpact#HIGH} impact.
	 */
	_SMALLEST_HIGH_IMPACT,

	//
	// MODERATE Putative Impact
	//

	// moderate impact changes in coding region that
	/**
	 * SO:0001583 A sequence variant,
	 * that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved.
	 */
	MISSENSE_VARIANT,
	/**
	 * SO:0001821 An inframe non
	 * synonymous variant that inserts bases into in the coding sequence (is a: inframe_indel,
	 * internal_feature_elongation).
	 */
	INFRAME_INSERTION,
	/**
	 * SO:0001824 An inframe increase
	 * in cds length that inserts one or more codons into the coding sequence within an existing codon (is a:
	 * {@link #INFRAME_INSERTION}).
	 */
	DISRUPTIVE_INFRAME_INSERTION,
	/**
	 * SO:0001822 An inframe non
	 * synonymous variant that deletes bases from the coding sequence (is a: inframe_indel, feature_truncation).
	 */
	INFRAME_DELETION,
	/**
	 * SO:0001826 An inframe decrease
	 * in cds length that deletes bases from the coding sequence starting within an existing codon (is a:
	 * {@link #INFRAME_DELETION}).
	 */
	DISRUPTIVE_INFRAME_DELETION,

	// changes in the UTR
	/**
	 * SO:0002013 A sequence variant
	 * that causes the reduction of a the 5'UTR with regard to the reference sequence (is a:
	 * {@link #FIVE_PRIME_UTR_EXON_VARIANT} or {@link #FIVE_PRIME_UTR_INTRON_VARIANT})
	 *
	 * Jannovar does not yield use this at the moment.
	 */
	FIVE_PRIME_UTR_TRUNCATION,
	/**
	 * SO:0002015 A sequence variant
	 * that causes the reduction of a the 3' UTR with regard to the reference sequence (is a:
	 * {@link #FIVE_PRIME_UTR_EXON_VARIANT} or {@link #FIVE_PRIME_UTR_INTRON_VARIANT}).
	 *
	 * Jannovar does not yield use this at the moment.
	 */
	THREE_PRIME_UTR_TRUNCATION,

	// changes in the splicing region
	/**
	 * SO:0001630 A sequence variant
	 * in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8
	 * bases of the intron (is a: {@link #SPLICING_VARIANT}).
	 */
	SPLICE_REGION_VARIANT,
	/**
	 * Marker for smallest {@link VariantEffect} with {@link PutativeImpact#MODERATE} impact.
	 */
	_SMALLEST_MODERATE_IMPACT,

	//
	// LOW Putative Impact
	//

	/**
	 * SO:0001567 A sequence variant
	 * where at least one base in the terminator codon is changed, but the terminator remains (is a:
	 * {@link #SYNONYMOUS_VARIANT}, terminator_codon_variant).
	 */
	STOP_RETAINED_VARIANT,
	/**
	 * SO:0001582 A codon variant that
	 * changes at least one base of the first codon of a transcript (is a: {@link #CODING_SEQUENCE_VARIANT}, children:
	 * start_retained_variant, start_lost).
	 */
	INITIATOR_CODON_VARIANT,
	/**
	 * SO:0001819 A sequence variant
	 * where there is no resulting change to the encoded amino acid (is a: {@link #CODING_SEQUENCE_VARIANT}, children:
	 * start_retained_variant, stop_retained_variant).
	 */
	SYNONYMOUS_VARIANT,

	// changes in coding transcripts, exons/introns
	/**
	 * SO:0001969 A sequence variant
	 * that changes non-coding intro sequence in a non-coding transcript (is a: {@link #CODING_TRANSCRIPT_VARIANT},
	 * {@link #INTRON_VARIANT}).
	 */
	CODING_TRANSCRIPT_INTRON_VARIANT,

	// changes in non-coding transcripts, exons/introns
	/**
	 * SO:0001792 A sequence variant
	 * that changes non-coding exon sequence in a non-coding transcript (is a: {@link #NON_CODING_TRANSCRIPT_VARIANT},
	 * {@link #EXON_VARIANT}).
	 */
	NON_CODING_TRANSCRIPT_EXON_VARIANT,
	/**
	 * SO:0001970 A sequence variant
	 * that changes non-coding intro sequence in a non-coding transcript (is a: {@link #NON_CODING_TRANSCRIPT_VARIANT},
	 * {@link #INTRON_VARIANT}).
	 */
	NON_CODING_TRANSCRIPT_INTRON_VARIANT,

	/**
	 * SO:0001983 A 5' UTR variant
	 * where a premature start codon is introduced, moved or lost (is a: {@link #FIVE_PRIME_UTR_EXON_VARIANT} or
	 * {@link #FIVE_PRIME_UTR_INTRON_VARIANT}).
	 *
	 * Not used in Jannovar annotations.
	 */
	// TODO(holtgrem): use
	FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT,
	/**
	 * SO:0002092 A UTR variant of the
	 * 5' UTR (is a: 5_prime_UTR_variant; is a: UTR_variant).
	 */
	FIVE_PRIME_UTR_EXON_VARIANT,
	/**
	 * SO:0002089 A UTR variant of the
	 * 3' UTR (is a: 3_prime_UTR_variant; is a: UTR_variant).
	 */
	THREE_PRIME_UTR_EXON_VARIANT,
	/**
	 * SO:0002091 A UTR variant
	 * between 5' UTRs (is a: 5_prime_UTR_variant; is a: UTR_variant).
	 */
	FIVE_PRIME_UTR_INTRON_VARIANT,
	/**
	 * SO:0002090 A UTR variant
	 * between 3' UTRs (is a: 3_prime_UTR_variant; is a: UTR_variant).
	 */
	THREE_PRIME_UTR_INTRON_VARIANT,

	/**
	 * Marker for smallest {@link VariantEffect} with {@link PutativeImpact#LOW} impact.
	 */
	_SMALLEST_LOW_IMPACT,

	//
	// MODIFIER Putative Impact
	//

	// duplication marker
	/**
	 * SO:1000039 A tandem duplication
	 * where the individual regions are in the same orientation (is a: tandem_duplication).
	 *
	 * In Jannovar used, as an additional marker to describe that an insertion is a duplication.
	 */
	DIRECT_TANDEM_DUPLICATION,

	// variant in custom region
	/**
	 * Variant in a user-specified custom region.
	 *
	 * Not used in Jannovar annotations.
	 */
	// TODO(holtgrem): use?
	CUSTOM,

	// variants with distances to genes/transcripts
	/**
	 * SO:0001631 A sequence variant
	 * located 5' of a gene (is a: {@link #INTERGENIC_VARIANT}).
	 */
	UPSTREAM_GENE_VARIANT,
	/**
	 * SO:0001632 A sequence variant
	 * located 3' of a gene (is a: {@link #INTERGENIC_VARIANT}).
	 */
	DOWNSTREAM_GENE_VARIANT,
	/**
	 * SO:0001628 A sequence variant
	 * located in the intergenic region, between genes (is a: feature_variant).
	 */
	INTERGENIC_VARIANT,

	// regulatory / TFBS variants
	/**
	 * SO:0001782 A sequence variant
	 * located within a transcription factor binding site (is a: {@link #REGULATORY_REGION_VARIANT}).
	 *
	 * Not used in Jannovar annotations.
	 */
	TF_BINDING_SITE_VARIANT,
	/**
	 * SO:0001566 A sequence variant
	 * located within a regulatory region (is a: feature_variant).
	 *
	 * Not used in Jannovar annotations.
	 */
	REGULATORY_REGION_VARIANT,

	// variant in intronic regions
	/**
	 * SO:0002018 A transcript variant
	 * occurring within a conserved region of an intron (is a: {@link #INTRON_VARIANT}).
	 *
	 * Not used in Jannovar annotations.
	 */
	CONSERVED_INTRON_VARIANT,

	/**
	 * SO:0002011 A variant that
	 * occurs within a gene but falls outside of all transcript features. This occurs when alternate transcripts of a
	 * gene do not share overlapping sequence (is a: {@link #TRANSCRIPT_VARIANT} ).
	 */
	// TODO(holtgrem): use?
	INTRAGENIC_VARIANT,
	/**
	 * SO:0002017 A sequence variant
	 * located in a conserved intergenic region, between genes (is a: {@link #INTERGENIC_VARIANT}).
	 */
	CONSERVED_INTERGENIC_VARIANT,

	// general variant types
	/**
	 * SO:0001537 A sequence variant
	 * that changes one or more sequence features (is a: sequence variant).
	 */
	STRUCTURAL_VARIANT,
	/**
	 * SO:0001580 A sequence variant
	 * that changes the coding sequence (is a: {@link #CODING_TRANSCRIPT_VARIANT}, {@link #EXON_VARIANT}).
	 *
	 * Sequence Ontology does not have a term
	 * 
	 * CODING_TRANSCRIPT_EXON_VARIANT, so we use this.
	 *
	 * Not used in Jannovar annotations.
	 */
	CODING_SEQUENCE_VARIANT,
	/**
	 * SO:0001627 A transcript variant
	 * occurring within an intron (is a: {@link #TRANSCRIPT_VARIANT}).
	 *
	 * Jannovar uses {@link #CODING_TRANSCRIPT_INTRON_VARIANT} and {@link #NON_CODING_TRANSCRIPT_INTRON_VARIANT}
	 * instead.
	 */
	INTRON_VARIANT,
	/**
	 * SO:0001791 A sequence variant
	 * that changes exon sequence (is a: {@link #TRANSCRIPT_VARIANT}).
	 */
	EXON_VARIANT,
	/**
	 * SO:0001568 A sequence variant
	 * that changes the process of splicing (is a: {@link #GENE_VARIANT}).
	 *
	 * Not used in Jannovar annotations.
	 */
	SPLICING_VARIANT,
	/**
	 * SO:0000276 Variant affects a
	 * miRNA (is a: miRNA_primary_transcript, small_regulatory_ncRNA).
	 *
	 * Not used in Jannovar annotations.
	 */
	// TODO(holtgrem): use?
	MIRNA,
	/**
	 * SO:0001564 A sequence variant
	 * where the structure of the gene is changed (is a: feature_variant).
	 *
	 * Not used in Jannovar annotations.
	 */
	GENE_VARIANT,
	/**
	 * SO:0001968 A transcript variant
	 * of a protein coding gene (is a: {@link #TRANSCRIPT_VARIANT}).
	 *
	 * Not used in Jannovar annotations.
	 */
	CODING_TRANSCRIPT_VARIANT,
	/**
	 * SO:0001619 (is a:
	 * {@link #TRANSCRIPT_VARIANT}).
	 *
	 * Used for marking splicing variants as non-coding.
	 */
	NON_CODING_TRANSCRIPT_VARIANT,
	/**
	 * SO:0001576 A sequence variant
	 * that changes the structure of the transcript (is a: {@link #GENE_VARIANT}). TRANSCRIPT_VARIANT, /**
	 * SO: (is a: {@link #GENE_VARIANT})).
	 *
	 * Not used in Jannovar annotations.
	 */
	TRANSCRIPT_VARIANT,
	/**
	 * SO:0000605 A region containing
	 * or overlapping no genes that is bounded on either side by a gene, or bounded by a gene and the end of the
	 * chromosome (is a: biological_region).
	 *
	 * Not used in Jannovar annotations.
	 */
	INTERGENIC_REGION,
	/**
	 * SO:0000340 Structural unit
	 * composed of a nucleic acid molecule which controls its own replication through the interaction of specific
	 * proteins at one or more origins of replication (is a: replicon).
	 *
	 * Not used in Jannovar annotations.
	 */
	CHROMOSOME,

	/**
	 * SO:0001060 Top level term for
	 * variants, can be used for marking "uknown effect".
	 *
	 * Not used in Jannovar annotations.
	 */
	SEQUENCE_VARIANT;

	/**
	 * {@link Predicate} for testing whether a {@link VariantEffect} is related to splicing.
	 */
	public static final Predicate IS_SPLICING = new Predicate() {
		@Override
		public boolean apply(VariantEffect arg) {
			return arg.isSplicing();
		}
	};

	/**
	 * {@link Predicate} for testing whether a {@link VariantEffect} is intronic.
	 */
	public static final Predicate IS_INTRONIC = new Predicate() {
		@Override
		public boolean apply(VariantEffect arg) {
			return arg.isIntronic();
		}
	};

	/**
	 * {@link Function} for converting from {@link VariantEffect} to SO term String.
	 */
	public static final Function TO_SO_TERM = new Function() {
		@Override
		public String apply(VariantEffect arg) {
			return arg.getSequenceOntologyTerm();
		}
	};

	/**
	 * {@link Function} for converting from {@link VariantEffect} to legacy term.
	 */
	public static final Function TO_LEGACY_NAME = new Function() {
		@Override
		public String apply(VariantEffect arg) {
			return arg.getLegacyTerm();
		}
	};

	/**
	 * @return true if the effect type denotes a frameshift variant (can only return true only
	 *         small variants, spanning at most one exon, are considered).
	 */
	boolean isFrameshiftVariant() {
		switch (this) {
		case FRAMESHIFT_ELONGATION:
		case FRAMESHIFT_TRUNCATION:
			return true;
		default:
			return false;
		}
	}

	/**
	 * @return Legacy (old Jannovar) id.
	 */
	public String getLegacyTerm() {
		switch (this) {
		case DIRECT_TANDEM_DUPLICATION:
		case DISRUPTIVE_INFRAME_DELETION:
		case FEATURE_TRUNCATION:
		case INFRAME_DELETION:
			return "NON_FS_DELETION";
		case DOWNSTREAM_GENE_VARIANT:
			return "DOWNSTREAM";
		case FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT:
		case FIVE_PRIME_UTR_TRUNCATION:
		case FIVE_PRIME_UTR_EXON_VARIANT:
			return "UTR5";
		case FRAMESHIFT_ELONGATION:
			return "FS_INSERTION";
		case FRAMESHIFT_TRUNCATION:
			return "FS_DELETION";
		case FRAMESHIFT_VARIANT:
			return "FS_SUBSTITUTION";
		case INITIATOR_CODON_VARIANT:
			return "STARTLOSS";
		case CONSERVED_INTERGENIC_VARIANT:
		case INTERGENIC_VARIANT:
			return "INTERGENIC";
		case INFRAME_INSERTION:
		case DISRUPTIVE_INFRAME_INSERTION:
		case INTERNAL_FEATURE_ELONGATION:
			return "NON_FS_INSERTION";
		case INTRAGENIC_VARIANT:
			return "INTRAGENIC";
		case CONSERVED_INTRON_VARIANT:
		case CODING_TRANSCRIPT_INTRON_VARIANT:
		case INTRON_VARIANT:
		case FIVE_PRIME_UTR_INTRON_VARIANT:
		case THREE_PRIME_UTR_INTRON_VARIANT:
			return "INTRONIC";
		case MNV:
			return "NON_FS_SUBSTITUTION";
		case NON_CODING_TRANSCRIPT_EXON_VARIANT:
			return "ncRNA_EXONIC";
		case NON_CODING_TRANSCRIPT_INTRON_VARIANT:
			return "ncRNA_INTRONIC";
		case MISSENSE_VARIANT:
		case RARE_AMINO_ACID_VARIANT:
			return "MISSENSE";
		case SPLICE_ACCEPTOR_VARIANT:
		case SPLICE_DONOR_VARIANT:
		case SPLICE_REGION_VARIANT:
		case SPLICING_VARIANT:
			return "SPLICING";
		case START_LOST:
			return "STARTLOSS";
		case STOP_GAINED:
			return "STOPGAIN";
		case STOP_LOST:
			return "STOPLOSS";
		case STOP_RETAINED_VARIANT:
		case SYNONYMOUS_VARIANT:
			return "SYNONYMOUS";
		case THREE_PRIME_UTR_TRUNCATION:
		case THREE_PRIME_UTR_EXON_VARIANT:
			return "UTR3";
		case TRANSCRIPT_ABLATION:
			return "TRANSCRIPT_ABLATION";
		case UPSTREAM_GENE_VARIANT:
			return "UPSTREAM";
		case SEQUENCE_VARIANT:
			return "UNKNOWN";
		case GENE_VARIANT:
		case CHROMOSOME:
		case CHROMOSOME_NUMBER_VARIATION:
		case CODING_SEQUENCE_VARIANT:
		case CODING_TRANSCRIPT_VARIANT:
		case COMPLEX_SUBSTITUTION:
		case CUSTOM:
		case EXON_LOSS_VARIANT:
		case EXON_VARIANT:
		case MIRNA:
		case INTERGENIC_REGION:
		case NON_CODING_TRANSCRIPT_VARIANT:
		case REGULATORY_REGION_VARIANT:
		case STRUCTURAL_VARIANT:
		case TF_BINDING_SITE_VARIANT:
		case TRANSCRIPT_VARIANT:
		case _SMALLEST_HIGH_IMPACT:
		case _SMALLEST_LOW_IMPACT:
		case _SMALLEST_MODERATE_IMPACT:
		default:
			return null;
		}
	}

	/**
	 * @return {@link PutativeImpact} of this effect annotation.
	 */
	public PutativeImpact getImpact() {
		if (this.ordinal() <= _SMALLEST_HIGH_IMPACT.ordinal())
			return PutativeImpact.HIGH;
		else if (this.ordinal() <= _SMALLEST_MODERATE_IMPACT.ordinal())
			return PutativeImpact.MODERATE;
		else if (this.ordinal() <= _SMALLEST_LOW_IMPACT.ordinal())
			return PutativeImpact.LOW;
		else
			return PutativeImpact.MODIFIER;
	}

	/**
	 * @return String with the Sequence
	 *         Ontology term.
	 */
	public String getSequenceOntologyTerm() {
		switch (this) {
		case CHROMOSOME:
			return "chromosome";
		case CHROMOSOME_NUMBER_VARIATION:
			return "chromosome_number_variation";
		case CODING_SEQUENCE_VARIANT:
			return "coding_sequence_variant";
		case CODING_TRANSCRIPT_INTRON_VARIANT:
			return "coding_transcript_intron_variant";
		case CODING_TRANSCRIPT_VARIANT:
			return "coding_transcript_variant";
		case COMPLEX_SUBSTITUTION:
			return "complex_substitution";
		case CONSERVED_INTERGENIC_VARIANT:
			return "conserved_intergenic_variant";
		case CONSERVED_INTRON_VARIANT:
			return "conserved_intron_variant";
		case CUSTOM:
			return "";
		case DIRECT_TANDEM_DUPLICATION:
			return "direct_tandem_duplication";
		case DISRUPTIVE_INFRAME_DELETION:
			return "disruptive_inframe_deletion";
		case DISRUPTIVE_INFRAME_INSERTION:
			return "disruptive_inframe_insertion";
		case DOWNSTREAM_GENE_VARIANT:
			return "downstream_gene_variant";
		case EXON_LOSS_VARIANT:
			return "exon_loss_variant";
		case EXON_VARIANT:
			return "exon_variant";
		case FEATURE_TRUNCATION:
			return "feature_truncation";
		case FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT:
			return "5_prime_UTR_premature_start_codon_gain_variant";
		case FIVE_PRIME_UTR_TRUNCATION:
			return "5_prime_UTR_truncation";
		case FIVE_PRIME_UTR_EXON_VARIANT:
			return "5_prime_UTR_exon_variant";
		case FIVE_PRIME_UTR_INTRON_VARIANT:
			return "5_prime_UTR_intron_variant";
		case FRAMESHIFT_ELONGATION:
			return "frameshift_elongation";
		case FRAMESHIFT_TRUNCATION:
			return "frameshift_truncation";
		case GENE_VARIANT:
			return "gene_variant";
		case INFRAME_DELETION:
			return "inframe_deletion";
		case INFRAME_INSERTION:
			return "inframe_insertion";
		case INITIATOR_CODON_VARIANT:
			return "initiator_codon_variant";
		case INTERGENIC_REGION:
			return "intergenic_region";
		case INTERGENIC_VARIANT:
			return "intergenic_variant";
		case INTERNAL_FEATURE_ELONGATION:
			return "internal_feature_elongation";
		case INTRAGENIC_VARIANT:
			return "intragenic_variant";
		case INTRON_VARIANT:
			return "intron_variant";
		case MIRNA:
			return "miRNA";
		case MISSENSE_VARIANT:
			return "missense_variant";
		case MNV:
			return "mnv";
		case NON_CODING_TRANSCRIPT_EXON_VARIANT:
			return "non_coding_transcript_exon_variant";
		case NON_CODING_TRANSCRIPT_INTRON_VARIANT:
			return "non_coding_transcript_intron_variant";
		case NON_CODING_TRANSCRIPT_VARIANT:
			return "non_coding_transcript_variant";
		case RARE_AMINO_ACID_VARIANT:
			return "rare_amino_acid_variant";
		case REGULATORY_REGION_VARIANT:
			return "regulatory_region_variant";
		case SEQUENCE_VARIANT:
			return "sequence_variant";
		case SPLICE_ACCEPTOR_VARIANT:
			return "splice_acceptor_variant";
		case SPLICE_DONOR_VARIANT:
			return "splice_donor_variant";
		case SPLICE_REGION_VARIANT:
			return "splice_region_variant";
		case SPLICING_VARIANT:
			return "splicing_variant";
		case START_LOST:
			return "start_lost";
		case STOP_GAINED:
			return "stop_gained";
		case STOP_LOST:
			return "stop_lost";
		case STOP_RETAINED_VARIANT:
			return "stop_retained_variant";
		case STRUCTURAL_VARIANT:
			return "structural_variant";
		case SYNONYMOUS_VARIANT:
			return "synonymous_variant";
		case TF_BINDING_SITE_VARIANT:
			return "tf_binding_site_variant";
		case THREE_PRIME_UTR_TRUNCATION:
			return "3_prime_UTR_truncation";
		case THREE_PRIME_UTR_EXON_VARIANT:
			return "3_prime_UTR_exon_variant";
		case THREE_PRIME_UTR_INTRON_VARIANT:
			return "3_prime_UTR_intron_variant";
		case TRANSCRIPT_ABLATION:
			return "transcript_ablation";
		case TRANSCRIPT_VARIANT:
			return "transcript_variant";
		case UPSTREAM_GENE_VARIANT:
			return "upstream_gene_variant";
		case FRAMESHIFT_VARIANT:
			return "frameshift_variant";
		case _SMALLEST_HIGH_IMPACT:
		case _SMALLEST_LOW_IMPACT:
		case _SMALLEST_MODERATE_IMPACT:
		default:
			return null;
		}
	}

	/**
	 * @return String with the Sequence
	 *         Ontology ID.
	 */
	public String getSequenceOID() {
		switch (this) {
		case CHROMOSOME:
			return "SO:0000340";
		case CHROMOSOME_NUMBER_VARIATION:
			return "SO:1000182";
		case CODING_SEQUENCE_VARIANT:
			return "SO:0001580";
		case CODING_TRANSCRIPT_INTRON_VARIANT:
			return "SO:0001969";
		case CODING_TRANSCRIPT_VARIANT:
			return "SO:0001968";
		case COMPLEX_SUBSTITUTION:
			return "SO:1000005";
		case CONSERVED_INTERGENIC_VARIANT:
			return "SO:0002017";
		case CONSERVED_INTRON_VARIANT:
			return "SO:0002018";
		case CUSTOM:
			return "";
		case DIRECT_TANDEM_DUPLICATION:
			return "SO:1000039";
		case DISRUPTIVE_INFRAME_DELETION:
			return "SO:0001826";
		case DISRUPTIVE_INFRAME_INSERTION:
			return "SO:0001824";
		case DOWNSTREAM_GENE_VARIANT:
			return "SO:0001632";
		case EXON_LOSS_VARIANT:
			return "SO:0001572";
		case EXON_VARIANT:
			return "SO:0001791";
		case FEATURE_TRUNCATION:
			return "SO:0001906";
		case FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT:
			return "SO:0001983";
		case FIVE_PRIME_UTR_TRUNCATION:
			return "SO:0002013";
		case FIVE_PRIME_UTR_EXON_VARIANT:
			return "SO:0002092";
		case FIVE_PRIME_UTR_INTRON_VARIANT:
			return "SO:0002091";
		case FRAMESHIFT_ELONGATION:
			return "SO:0001909";
		case FRAMESHIFT_TRUNCATION:
			return "SO:0001910";
		case GENE_VARIANT:
			return "SO:0001564";
		case INFRAME_DELETION:
			return "SO:0001822";
		case INFRAME_INSERTION:
			return "SO:0001821";
		case INITIATOR_CODON_VARIANT:
			return "SO:0001582";
		case INTERGENIC_REGION:
			return "SO:0000605";
		case INTERGENIC_VARIANT:
			return "SO:0001628";
		case INTERNAL_FEATURE_ELONGATION:
			return "SO:0001908";
		case INTRAGENIC_VARIANT:
			return "SO:0002011";
		case INTRON_VARIANT:
			return "SO:0001627";
		case MIRNA:
			return "SO:0000276";
		case MISSENSE_VARIANT:
			return "SO:0001583";
		case MNV:
			return "SO:0002007";
		case NON_CODING_TRANSCRIPT_EXON_VARIANT:
			return "SO:0001792";
		case NON_CODING_TRANSCRIPT_INTRON_VARIANT:
			return "SO:0001970";
		case NON_CODING_TRANSCRIPT_VARIANT:
			return "SO:0001619";
		case RARE_AMINO_ACID_VARIANT:
			return "SO:0002008";
		case REGULATORY_REGION_VARIANT:
			return "SO:0001566";
		case SEQUENCE_VARIANT:
			return "SO:0001060";
		case SPLICE_ACCEPTOR_VARIANT:
			return "SO:0001574";
		case SPLICE_DONOR_VARIANT:
			return "SO:0001575";
		case SPLICE_REGION_VARIANT:
			return "SO:0001630";
		case SPLICING_VARIANT:
			return "SO:0001568";
		case START_LOST:
			return "SO:0002012";
		case STOP_GAINED:
			return "SO:0001587";
		case STOP_LOST:
			return "SO:0001578";
		case STOP_RETAINED_VARIANT:
			return "SO:0001567";
		case STRUCTURAL_VARIANT:
			return "SO:0001537";
		case SYNONYMOUS_VARIANT:
			return "SO:0001819";
		case TF_BINDING_SITE_VARIANT:
			return "SO:0001782";
		case THREE_PRIME_UTR_TRUNCATION:
			return "SO:0002015";
		case THREE_PRIME_UTR_EXON_VARIANT:
			return "SO:0002089";
		case THREE_PRIME_UTR_INTRON_VARIANT:
			return "SO:0002090";
		case TRANSCRIPT_ABLATION:
			return "SO:0001893";
		case TRANSCRIPT_VARIANT:
			return "SO:0001576";
		case UPSTREAM_GENE_VARIANT:
			return "SO:0001631";
		case FRAMESHIFT_VARIANT:
			return "SO:0001589";
		case _SMALLEST_HIGH_IMPACT:
		case _SMALLEST_LOW_IMPACT:
		case _SMALLEST_MODERATE_IMPACT:
		default:
			return null;
		}
	}

	/**
	 * Forward to ordinal() member function.
	 *
	 * @return int with the number used for sorting values of type {@link VariantEffect}.
	 */
	public int getNumber() {
		return ordinal();
	}

	/**
	 * @return true if this {@link VariantEffect} annotates structural variants.
	 */
	public boolean isStructural() {
		return (this == STRUCTURAL_VARIANT);
	}

	/**
	 * @return true if this {@link VariantEffect} could affect splicing.
	 */
	public boolean isSplicing() {
		switch (this) {
		case SPLICING_VARIANT:
		case SPLICE_ACCEPTOR_VARIANT:
		case SPLICE_DONOR_VARIANT:
		case SPLICE_REGION_VARIANT:
			return true;
		default:
			return false;
		}
	}

	/**
	 * @return true if equal to {@link #CODING_TRANSCRIPT_INTRON_VARIANT} or
	 *         {@link #NON_CODING_TRANSCRIPT_INTRON_VARIANT}.
	 */
	public boolean isIntronic() {
		return (this == CODING_TRANSCRIPT_INTRON_VARIANT || this == NON_CODING_TRANSCRIPT_INTRON_VARIANT);
	}

	/**
	 * @return true if the variant effect does not indicate a change affecting the exome, {@link #CUSTOM}
	 *         is considered on-exome
	 * @see #isOffTranscript
	 */
	public boolean isOffExome() {
		// Note that this function is called by isOffTranscript() which allows intronic and UTR changes.
		switch (this) {
		case CUSTOM:
		case DISRUPTIVE_INFRAME_DELETION:
		case DISRUPTIVE_INFRAME_INSERTION:
		case EXON_LOSS_VARIANT:
		case EXON_VARIANT:
		case FEATURE_TRUNCATION:
		case FIVE_PRIME_UTR_PREMATURE_START_CODON_GAIN_VARIANT:
		case FRAMESHIFT_ELONGATION:
		case FRAMESHIFT_TRUNCATION:
		case INFRAME_DELETION:
		case INFRAME_INSERTION:
		case INITIATOR_CODON_VARIANT:
		case INTERNAL_FEATURE_ELONGATION:
		case INTRON_VARIANT:
		case MISSENSE_VARIANT:
		case MNV:
		case NON_CODING_TRANSCRIPT_EXON_VARIANT:
		case RARE_AMINO_ACID_VARIANT:
		case SPLICE_ACCEPTOR_VARIANT:
		case SPLICE_DONOR_VARIANT:
		case SPLICE_REGION_VARIANT:
		case SPLICING_VARIANT:
		case START_LOST:
		case STOP_GAINED:
		case STOP_LOST:
		case STOP_RETAINED_VARIANT:
		case SYNONYMOUS_VARIANT:
		case TRANSCRIPT_ABLATION:
			return false;
		default:
			return true;
		}
	}

	/**
	 * @return true if the variant effect does not indicate a change affecting a transcript,
	 *         {@link #CUSTOM} is considered on-transcript
	 * @see #isOffExome
	 */
	public boolean isOffTranscript() {
		// This function first calls isOffExome() to check whether the variant effect is off-exome. Then, this function
		// also allows intronic and 5'/3' variants.
		if (!isOffExome())
			return false;

		switch (this) {
		case CODING_TRANSCRIPT_INTRON_VARIANT:
		case FIVE_PRIME_UTR_TRUNCATION:
		case FIVE_PRIME_UTR_INTRON_VARIANT:
		case FIVE_PRIME_UTR_EXON_VARIANT:
		case THREE_PRIME_UTR_TRUNCATION:
		case THREE_PRIME_UTR_EXON_VARIANT:
		case THREE_PRIME_UTR_INTRON_VARIANT:
		case NON_CODING_TRANSCRIPT_INTRON_VARIANT:
		case NON_CODING_TRANSCRIPT_VARIANT:
			return false;
		default:
			return true;
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy