All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.charite.compbio.jannovar.annotation.Annotation Maven / Gradle / Ivy

package de.charite.compbio.jannovar.annotation;

import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import de.charite.compbio.jannovar.Immutable;
import de.charite.compbio.jannovar.hgvs.AminoAcidCode;
import de.charite.compbio.jannovar.hgvs.nts.change.NucleotideChange;
import de.charite.compbio.jannovar.hgvs.protein.change.ProteinChange;
import de.charite.compbio.jannovar.reference.GenomeVariant;
import de.charite.compbio.jannovar.reference.Strand;
import de.charite.compbio.jannovar.reference.TranscriptModel;
import de.charite.compbio.jannovar.reference.VariantDescription;

import java.util.Collection;
import java.util.EnumSet;

// TODO(holtgrem): Test me!

/**
 * Collect the information for one variant's annotation.
 * 

* Implements the {@link VariantDescription} interface for quicker access to the variant description information. * * @author Peter N Robinson * @author Manuel Holtgrewe */ @Immutable public final class Annotation implements VariantDescription, Comparable { private static final ImmutableSet EMPTY_VARIANT_EFFECTS = Sets.immutableEnumSet( EnumSet.noneOf(VariantEffect.class)); private static final ImmutableSet EMPTY_ANNOTATION_MESSAGES = Sets.immutableEnumSet( EnumSet.noneOf(AnnotationMessage.class)); /** * This line is added to the output of a VCF file annotated by Jannovar and describes the new field for the INFO * section entitled EFFECT, which decribes the effects of variants (splicing,missense,stoploss, etc). */ public static final String INFO_EFFECT = "" + "variant effect (UTR5,UTR3,intronic,splicing,missense,stoploss,stopgain," + "startloss,duplication,frameshift-insertion,frameshift-deletion,non-frameshift-deletion," + "non-frameshift-insertion,synonymous)"; /** * This line is added to the output of a VCF file annotated by Jannovar and describes the new field for the INFO * section entitled HGVS, which provides the HGVS encoded variant corresponding to the chromosomal variant in the * original VCF file. */ public static final String INFO_HGVS = "HGVS Nomenclature"; /** * The DESCRIPTION string to use in the VCF header for VCFVariantAnnotation objects */ public static final String VCF_ANN_DESCRIPTION_STRING = "Functional annotations:'Allele|Annotation|" + "Annotation_Impact|Gene_Name|Gene_ID|Feature_Type|Feature_ID|Transcript_BioType|Rank|HGVS.c|HGVS.p|" + "cDNA.pos / cDNA.length|CDS.pos / CDS.length|AA.pos / AA.length|Distance|ERRORS / WARNINGS / INFO'"; /** * the annotated {@link GenomeVariant} */ private final GenomeVariant change; /** * variant types, sorted by internal pathogenicity score */ private final ImmutableSet effects; /** * errors and warnings */ private final ImmutableSet messages; /** * location of the annotation, null if not even nearby a {@link TranscriptModel} */ private final AnnotationLocation annoLoc; /** * Chromosome/genome-level change, to be prepended with "g." */ private final NucleotideChange genomicNTChange; /** * CDS-level {@link NucleotideChange} for coding transcripts (to be prependend with "c.") and transcript level for * non-coding transcripts (to be prepended with "n.") */ private final NucleotideChange cdsNTChange; /** * change on the protein level */ private final ProteinChange proteinChange; /** * the transcript, null for {@link VariantEffect#INTERGENIC_VARIANT} annotations */ private final TranscriptModel transcript; /** * Initialize object with messages only. * * @param messages {@link AnnotationMessage}s to use in this annotation */ public Annotation(Collection messages) { this(null, null, null, null, null, null, null, messages); } /** * Initialize the {@link Annotation} with the given values. *

* The constructor will sort effects by pathogenicity before storing. * * @param change the annotated {@link GenomeVariant} * @param transcript transcript for this annotation * @param effects type of the variants * @param annoLoc location of the variant * @param genomicNTChange ghromosome/genome-level change, to be prepended with "g." * @param cdsNTChange CDS-level {@link NucleotideChange} * @param proteinChange predicted {@link ProteinChange} */ public Annotation(TranscriptModel transcript, GenomeVariant change, Collection effects, AnnotationLocation annoLoc, NucleotideChange genomicNTChange, NucleotideChange cdsNTChange, ProteinChange proteinChange) { this(transcript, change, effects, annoLoc, genomicNTChange, cdsNTChange, proteinChange, EMPTY_ANNOTATION_MESSAGES); } /** * Initialize the {@link Annotation} with the given values. *

* The constructor will sort effects by pathogenicity before storing. * * @param transcript transcript for this annotation * @param change the annotated {@link GenomeVariant} * @param varTypes type of the variants * @param annoLoc location of the variant * @param genomicNTChange ghromosome/genome-level change, to be prepended with "g." * @param cdsNTChange CDS-level {@link NucleotideChange} * @param proteinChange {@link ProteinChange} with a predicted protein change * @param messages {@link Collection} of {@link AnnotationMessage} objects */ public Annotation(TranscriptModel transcript, GenomeVariant change, Collection varTypes, AnnotationLocation annoLoc, NucleotideChange genomicNTChange, NucleotideChange cdsNTChange, ProteinChange proteinChange, Collection messages) { // enforce forward strand this.change = change == null ? null : change.withStrand(Strand.FWD); this.effects = varTypes == null ? EMPTY_VARIANT_EFFECTS : Sets.immutableEnumSet(varTypes); this.annoLoc = annoLoc; this.genomicNTChange = genomicNTChange; this.cdsNTChange = cdsNTChange; this.proteinChange = proteinChange; this.transcript = transcript; this.messages = Sets.immutableEnumSet(messages); } /** * @return the annotated {@link GenomeVariant} */ public GenomeVariant getGenomeVariant() { return change; } /** * @return variant types, sorted by internal pathogenicity score */ public ImmutableSet getEffects() { return effects; } /** * @return errors and warnings */ public ImmutableSet getMessages() { return messages; } /** * @return location of the annotation, null if not even nearby a {@link TranscriptModel} */ public AnnotationLocation getAnnoLoc() { return annoLoc; } /** * @return {@link NucleotideChange} with genomic changes */ public NucleotideChange getGenomicNTChange() { return genomicNTChange; } /** * @return genomic nucleotide change String, including the "g." prefix. */ public String getGenomicNTChangeStr() { return "g." + genomicNTChange.toHGVSString(); } /** * @return {@link NucleotideChange} with changes on the CDS level for coding transcripts and on the transcript level * otherwise, null if the change does not affect any transcript */ public NucleotideChange getCDSNTChange() { return cdsNTChange; } /** * @return CDS nucleotide change String, including the "p." prefix or the empty string if there is no annotation. */ public String getCDSNTChangeStr() { if (cdsNTChange == null || transcript == null) return ""; else if (transcript.isCoding()) return "c." + cdsNTChange.toHGVSString(); else return "n." + cdsNTChange.toHGVSString(); } /** * @return predicted {@link ProteinChange} */ public ProteinChange getProteinChange() { return proteinChange; } /** * @param code Three ore one letter amino acid code * @return protein change String, including the "p." prefix or the empty string if there is no annotation. */ public String getProteinChangeStr(AminoAcidCode code) { if (proteinChange == null) return ""; else return "p." + proteinChange.toHGVSString(code); } /** * @return protein change String, including the "p." prefix or the empty string if there is no annotation, uses * default amino acid code from {@link ProteinChange#toHGVSString()}. */ public String getProteinChangeStr() { if (proteinChange == null) return ""; else return "p." + proteinChange.toHGVSString(); } /** * @return the transcript, null for {@link VariantEffect#INTERGENIC_VARIANT} annotations */ public TranscriptModel getTranscript() { return transcript; } /** * @return highest {@link PutativeImpact} of all {@link #getEffects}. */ public PutativeImpact getPutativeImpact() { if (effects.isEmpty()) return null; VariantEffect worst = effects.iterator().next(); for (VariantEffect vt : effects) if (worst.getImpact().compareTo(vt.getImpact()) > 0) worst = vt; return worst.getImpact(); } /** * Return the standardized VCF variant string for the given ALT allele. *

* The ALT allele has to be given to this function since we trim away at least the first base of * REF/ALT. * * @param alt alt allele * @param code Three or one letter amino acid code * @param escape whether or not to escape the invalid VCF characters, e.g. '='. * @return VCF annotation string */ public String toVCFAnnoString(String alt, boolean escape, AminoAcidCode code) { // public static final String VCF_SVANN_DESCRIPTION_STRING = "Functional SV Annotation:'Effects|Gene_Name|Gene_ID|" + // "Feature_Type|Feature_ID|Transcript_BioType|ERRORS / WARNINGS / INFO'"; VCFAnnotationData data = new VCFAnnotationData(); data.effects = effects; data.impact = getPutativeImpact(); data.setTranscriptAndChange(transcript, change); data.setAnnoLoc(annoLoc); data.isCoding = (transcript == null) ? false : transcript.isCoding(); data.cdsNTChange = cdsNTChange; data.proteinChange = proteinChange; data.messages = messages; if (escape) return data.toString(alt, code); else return data.toUnescapedString(alt, code); } /** * Forward to {@link #toVCFAnnoString(String, boolean, AminoAcidCode) toVCFAnnoString(alt, true, code)}. * * @param alt alternateve allele * @param code Three ore one letter amino acid code * @return vcf annotation string */ public String toVCFAnnoString(String alt, AminoAcidCode code) { return toVCFAnnoString(alt, true, code); } /** * Return the gene annotation or "." if it has no transcript. * * @return gene symbol or "." */ public String getGeneSymbol() { if (transcript == null || transcript.getGeneSymbol() == null) return "."; else return transcript.getGeneSymbol(); } /** * Return the full annotation with the gene symbol. *

* If this annotation does not have a symbol (e.g., for an intergenic annotation) then just return the annotation * string, e.g., "KIAA1751:uc001aim.1:exon18:c.T2287C:p.X763Q". * * @param code Three ore one letter amino acid code * @return full annotation string or null if {@link #transcript} is null */ public String getSymbolAndAnnotation(AminoAcidCode code) { if (transcript == null) return null; return Joiner.on(":").skipNulls().join(transcript.getGeneSymbol(), transcript.getAccession(), getCDSNTChangeStr(), getProteinChangeStr(code)); } /** * @return most pathogenic {@link VariantEffect} link {@link #effects}, null if none. */ // TODO: rename to getMostPathogenicVariantEffect public VariantEffect getMostPathogenicVarType() { if (effects.isEmpty()) return null; return effects.iterator().next(); } @Override public String getChrName() { return change.getChrName(); } @Override public int getChr() { return change.getChr(); } @Override public int getPos() { return change.getPos(); } @Override public String getRef() { return change.getRef(); } @Override public String getAlt() { return change.getAlt(); } @Override public int compareTo(Annotation other) { if (getMostPathogenicVarType() == null && getMostPathogenicVarType() == other.getMostPathogenicVarType()) return 0; else if (other.getMostPathogenicVarType() == null) return -1; else if (getMostPathogenicVarType() == null) return 1; int result = getMostPathogenicVarType().ordinal() - other.getMostPathogenicVarType().ordinal(); if (result != 0) return result; if (transcript == null && other.transcript == null) return 0; else if (other.transcript == null) return -1; else if (transcript == null) return 1; return transcript.compareTo(other.transcript); } @Override public String toString() { return "Annotation [change=" + change + ", effects=" + effects + ", cdsNTChange=" + cdsNTChange + ", proteinChange=" + (proteinChange == null ? "." : proteinChange.toHGVSString()) + ", transcript.getAccession()=" + transcript.getAccession() + "]"; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((proteinChange == null) ? 0 : proteinChange.hashCode()); result = prime * result + ((annoLoc == null) ? 0 : annoLoc.hashCode()); result = prime * result + ((effects == null) ? 0 : effects.hashCode()); result = prime * result + ((messages == null) ? 0 : messages.hashCode()); result = prime * result + ((cdsNTChange == null) ? 0 : cdsNTChange.hashCode()); result = prime * result + ((transcript == null) ? 0 : transcript.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; Annotation other = (Annotation) obj; if (proteinChange == null) { if (other.proteinChange != null) return false; } else if (!proteinChange.equals(other.proteinChange)) return false; if (annoLoc == null) { if (other.annoLoc != null) return false; } else if (!annoLoc.equals(other.annoLoc)) return false; if (effects == null) { if (other.effects != null) return false; } else if (!effects.equals(other.effects)) return false; if (messages == null) { if (other.messages != null) return false; } else if (!messages.equals(other.messages)) return false; if (cdsNTChange == null) { if (other.cdsNTChange != null) return false; } else if (!cdsNTChange.equals(other.cdsNTChange)) return false; if (transcript == null) { if (other.transcript != null) return false; } else if (!transcript.equals(other.transcript)) return false; return true; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy