de.charite.compbio.jannovar.annotation.Annotation Maven / Gradle / Ivy
package de.charite.compbio.jannovar.annotation;
import java.util.Collection;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSortedSet;
import de.charite.compbio.jannovar.Immutable;
import de.charite.compbio.jannovar.hgvs.AminoAcidCode;
import de.charite.compbio.jannovar.hgvs.nts.change.NucleotideChange;
import de.charite.compbio.jannovar.hgvs.protein.change.ProteinChange;
import de.charite.compbio.jannovar.reference.GenomeVariant;
import de.charite.compbio.jannovar.reference.Strand;
import de.charite.compbio.jannovar.reference.TranscriptModel;
import de.charite.compbio.jannovar.reference.VariantDescription;
// TODO(holtgrem): Test me!
/**
* Collect the information for one variant's annotation.
*
* Implements the {@link VariantDescription} interface for quicker access to the variant description information.
*
* @author Peter N Robinson
* @author Manuel Holtgrewe
*/
@Immutable
public final class Annotation implements VariantDescription, Comparable {
/**
* This line is added to the output of a VCF file annotated by Jannovar and describes the new field for the INFO
* section entitled EFFECT, which decribes the effects of variants (splicing,missense,stoploss, etc).
*/
public static final String INFO_EFFECT = ""
+ "variant effect (UTR5,UTR3,intronic,splicing,missense,stoploss,stopgain,"
+ "startloss,duplication,frameshift-insertion,frameshift-deletion,non-frameshift-deletion,"
+ "non-frameshift-insertion,synonymous)";
/**
* This line is added to the output of a VCF file annotated by Jannovar and describes the new field for the INFO
* section entitled HGVS, which provides the HGVS encoded variant corresponding to the chromosomal variant in the
* original VCF file.
*/
public static final String INFO_HGVS = "HGVS Nomenclature";
/** The DESCRIPTION string to use in the VCF header for VCFVariantAnnotation objects */
public static final String VCF_ANN_DESCRIPTION_STRING = "Functional annotations:'Allele|Annotation|"
+ "Annotation_Impact|Gene_Name|Gene_ID|Feature_Type|Feature_ID|Transcript_BioType|Rank|HGVS.c|HGVS.p|"
+ "cDNA.pos / cDNA.length|CDS.pos / CDS.length|AA.pos / AA.length|Distance|ERRORS / WARNINGS / INFO'";
/** the annotated {@link GenomeVariant} */
private final GenomeVariant change;
/** variant types, sorted by internal pathogenicity score */
private final ImmutableSortedSet effects;
/** errors and warnings */
private final ImmutableSortedSet messages;
/** location of the annotation, null
if not even nearby a {@link TranscriptModel} */
private final AnnotationLocation annoLoc;
/** Chromosome/genome-level change, to be prepended with "g." */
private final NucleotideChange genomicNTChange;
/**
* CDS-level {@link NucleotideChange} for coding transcripts (to be prependend with "c.") and transcript level for
* non-coding transcripts (to be prepended with "n.")
*/
private final NucleotideChange cdsNTChange;
/** change on the protein level */
private final ProteinChange proteinChange;
/** the transcript, null
for {@link VariantEffect#INTERGENIC} annotations */
private final TranscriptModel transcript;
/**
* Initialize object with messages only.
*
* @param messages
* {@link AnnotationMessage}s to use in this annotation
*/
public Annotation(Collection messages) {
this(null, null, null, null, null, null, null, messages);
}
/**
* Initialize the {@link Annotation} with the given values.
*
* The constructor will sort effects
by pathogenicity before storing.
*
* @param change
* the annotated {@link GenomeVariant}
* @param transcript
* transcript for this annotation
* @param effects
* type of the variants
* @param annoLoc
* location of the variant
* @param genomicNTChange
* ghromosome/genome-level change, to be prepended with "g."
* @param cdsNTChange
* CDS-level {@link NucleotideChange}
* @param proteinChange
* predicted {@link ProteinChange}
*/
public Annotation(TranscriptModel transcript, GenomeVariant change, Collection effects,
AnnotationLocation annoLoc, NucleotideChange genomicNTChange, NucleotideChange cdsNTChange,
ProteinChange proteinChange) {
this(transcript, change, effects, annoLoc, genomicNTChange, cdsNTChange, proteinChange,
ImmutableSortedSet. of());
}
/**
* Initialize the {@link Annotation} with the given values.
*
* The constructor will sort effects
by pathogenicity before storing.
*
* @param transcript
* transcript for this annotation
* @param change
* the annotated {@link GenomeVariant}
* @param varTypes
* type of the variants
* @param annoLoc
* location of the variant
* @param genomicNTChange
* ghromosome/genome-level change, to be prepended with "g."
* @param cdsNTChange
* CDS-level {@link NucleotideChange}
* @param proteinChange
* {@link ProteinChange} with a predicted protein change
* @param messages
* {@link Collection} of {@link AnnotationMessage} objects
*/
public Annotation(TranscriptModel transcript, GenomeVariant change, Collection varTypes,
AnnotationLocation annoLoc, NucleotideChange genomicNTChange, NucleotideChange cdsNTChange,
ProteinChange proteinChange, Collection messages) {
if (change != null)
change = change.withStrand(Strand.FWD); // enforce forward strand
this.change = change;
if (varTypes == null)
this.effects = ImmutableSortedSet. of();
else
this.effects = ImmutableSortedSet.copyOf(varTypes);
this.annoLoc = annoLoc;
this.genomicNTChange = genomicNTChange;
this.cdsNTChange = cdsNTChange;
this.proteinChange = proteinChange;
this.transcript = transcript;
this.messages = ImmutableSortedSet.copyOf(messages);
}
/** @return the annotated {@link GenomeVariant} */
public GenomeVariant getGenomeVariant() {
return change;
}
/** @return variant types, sorted by internal pathogenicity score */
public ImmutableSortedSet getEffects() {
return effects;
}
/** @return errors and warnings */
public ImmutableSortedSet getMessages() {
return messages;
}
/** @return location of the annotation, null
if not even nearby a {@link TranscriptModel} */
public AnnotationLocation getAnnoLoc() {
return annoLoc;
}
/**
* @return {@link NucleotideChange} with genomic changes
*/
public NucleotideChange getGenomicNTChange() {
return genomicNTChange;
}
/**
* @return genomic nucleotide change String, including the "g." prefix.
*/
public String getGenomicNTChangeStr() {
return "g." + genomicNTChange.toHGVSString();
}
/**
* @return {@link NucleotideChange} with changes on the CDS level for coding transcripts and on the transcript level
* otherwise, null if the change does not affect any transcript
*/
public NucleotideChange getCDSNTChange() {
return cdsNTChange;
}
/**
* @return CDS nucleotide change String, including the "p." prefix or the empty string if there is no annotation.
*/
public String getCDSNTChangeStr() {
if (cdsNTChange == null || transcript == null)
return "";
else if (transcript.isCoding())
return "c." + cdsNTChange.toHGVSString();
else
return "n." + cdsNTChange.toHGVSString();
}
/** @return predicted {@link ProteinChange} */
public ProteinChange getProteinChange() {
return proteinChange;
}
/**
* @param code
* the protein change code to use.
* @return protein change String, including the "p." prefix or the empty string if there is no annotation.
*/
public String getProteinChangeStr(AminoAcidCode code) {
if (proteinChange == null)
return "";
else
return "p." + proteinChange.toHGVSString(code);
}
/**
* @return protein change String, including the "p." prefix or the empty string if there is no annotation, uses
* default amino acid code from {@link ProteinChange#toHGVSString()}.
*/
public String getProteinChangeStr() {
if (proteinChange == null)
return "";
else
return "p." + proteinChange.toHGVSString();
}
/** @return the transcript, null
for {@link VariantEffect#INTERGENIC_VARIANT} annotations */
public TranscriptModel getTranscript() {
return transcript;
}
/**
* @return highest {@link PutativeImpact} of all {@link #getEffects}.
*/
public PutativeImpact getPutativeImpact() {
if (effects.isEmpty())
return null;
VariantEffect worst = effects.first();
for (VariantEffect vt : effects)
if (worst.getImpact().compareTo(vt.getImpact()) > 0)
worst = vt;
return worst.getImpact();
}
/**
* Return the standardized VCF variant string for the given ALT
allele.
*
* The ALT
allele has to be given to this function since we trim away at least the first base of
* REF
/ALT
.
*
* @param alt
* alt allele
* @param escape
* whether or not to escape the invalid VCF characters, e.g. '='
.
* @return VCF annotation string
*/
public String toVCFAnnoString(String alt, boolean escape) {
VCFAnnotationData data = new VCFAnnotationData();
data.effects = effects;
data.impact = getPutativeImpact();
data.setTranscriptAndChange(transcript, change);
data.setAnnoLoc(annoLoc);
data.isCoding = (transcript == null) ? false : transcript.isCoding();
data.cdsNTChange = cdsNTChange;
data.proteinChange = proteinChange;
data.messages = messages;
if (escape)
return data.toString(alt);
else
return data.toUnescapedString(alt);
}
/**
* Forward to {@link #toVCFAnnoString(String, boolean) toVCFAnnoString(alt, true)}.
*
* @param alt
* alternateve allele
* @return vcf annotation string
*/
public String toVCFAnnoString(String alt) {
return toVCFAnnoString(alt, true);
}
/**
* Return the gene annotation or "."
if it has no transcript.
*
* @return gene symbol or "."
*/
public String getGeneSymbol() {
if (transcript == null || transcript.getGeneSymbol() == null)
return ".";
else
return transcript.getGeneSymbol();
}
/**
* Return the full annotation with the gene symbol.
*
* If this annotation does not have a symbol (e.g., for an intergenic annotation) then just return the annotation
* string, e.g., "KIAA1751:uc001aim.1:exon18:c.T2287C:p.X763Q"
.
*
* @param code
* the amino acid code
*
* @return full annotation string or null
if {@link #transcript} is null
*/
public String getSymbolAndAnnotation(AminoAcidCode code) {
if (transcript == null)
return null;
return Joiner.on(":").skipNulls().join(transcript.getGeneSymbol(), transcript.getAccession(),
getCDSNTChangeStr(), getProteinChangeStr(code));
}
/**
* @return most pathogenic {@link VariantEffect} link {@link #effects}, null
if none.
*/
public VariantEffect getMostPathogenicVarType() {
if (effects.isEmpty())
return null;
return effects.first();
}
@Override
public String getChrName() {
return change.getChrName();
}
@Override
public int getChr() {
return change.getChr();
}
@Override
public int getPos() {
return change.getPos();
}
@Override
public String getRef() {
return change.getRef();
}
@Override
public String getAlt() {
return change.getAlt();
}
@Override
public int compareTo(Annotation other) {
if (getMostPathogenicVarType() == null && getMostPathogenicVarType() == other.getMostPathogenicVarType())
return 0;
else if (other.getMostPathogenicVarType() == null)
return -1;
else if (getMostPathogenicVarType() == null)
return 1;
int result = getMostPathogenicVarType().ordinal() - other.getMostPathogenicVarType().ordinal();
if (result != 0)
return result;
if (transcript == null && other.transcript == null)
return 0;
else if (other.transcript == null)
return -1;
else if (transcript == null)
return 1;
return transcript.compareTo(other.transcript);
}
@Override
public String toString() {
return "Annotation [change=" + change + ", effects=" + effects + ", cdsNTChange=" + cdsNTChange
+ ", proteinChange=" + (proteinChange == null ? "." : proteinChange.toHGVSString())
+ ", transcript.getAccession()=" + transcript.getAccession() + "]";
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((proteinChange == null) ? 0 : proteinChange.hashCode());
result = prime * result + ((annoLoc == null) ? 0 : annoLoc.hashCode());
result = prime * result + ((effects == null) ? 0 : effects.hashCode());
result = prime * result + ((messages == null) ? 0 : messages.hashCode());
result = prime * result + ((cdsNTChange == null) ? 0 : cdsNTChange.hashCode());
result = prime * result + ((transcript == null) ? 0 : transcript.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Annotation other = (Annotation) obj;
if (proteinChange == null) {
if (other.proteinChange != null)
return false;
} else if (!proteinChange.equals(other.proteinChange))
return false;
if (annoLoc == null) {
if (other.annoLoc != null)
return false;
} else if (!annoLoc.equals(other.annoLoc))
return false;
if (effects == null) {
if (other.effects != null)
return false;
} else if (!effects.equals(other.effects))
return false;
if (messages == null) {
if (other.messages != null)
return false;
} else if (!messages.equals(other.messages))
return false;
if (cdsNTChange == null) {
if (other.cdsNTChange != null)
return false;
} else if (!cdsNTChange.equals(other.cdsNTChange))
return false;
if (transcript == null) {
if (other.transcript != null)
return false;
} else if (!transcript.equals(other.transcript))
return false;
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy