All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.charite.compbio.jannovar.reference.GenomeVariant Maven / Gradle / Ivy

package de.charite.compbio.jannovar.reference;

import com.google.common.base.Joiner;
import com.google.common.collect.ComparisonChain;

import de.charite.compbio.jannovar.Immutable;
import de.charite.compbio.jannovar.annotation.Annotation;
import de.charite.compbio.jannovar.impl.util.DNAUtils;

// TODO(holtgrewe): We only want genome changes on the forward strand, make sure this does not lead to problems downstream.
// TODO(holtgrewe): Add support for symbolic alleles in all the members.

/**
 * Denote a change with a "REF" and an "ALT" string using genome coordinates.
 *
 * GenomeChange objects are immutable, the members are automatically adjusted for the longest common suffix and prefix
 * in REF and ALT.
 *
 * Symbolic alleles, as in the VCF standard, are also possible, but methods like {@link #getType} etc. do not return
 * sensible results.
 *
 * @author Manuel Holtgrewe
 * @author Max Schubach
 * @author Peter N Robinson
 */
@Immutable
public final class GenomeVariant implements VariantDescription {

	/** position of the change */
	private final GenomePosition pos;
	/** nucleic acid reference string */
	private final String ref;
	/** nucleic acid alternative string */
	private final String alt;

	/**
	 * Construct object given the position, reference, and alternative nucleic acid string.
	 *
	 * On construction, pos, ref, and alt are automatically adjusted to the right/incremented by the length of the
	 * longest common prefix and suffix of ref and alt.
	 */
	public GenomeVariant(GenomePosition pos, String ref, String alt) {
		if (wouldBeSymbolicAllele(ref) || wouldBeSymbolicAllele(alt)) {
			this.pos = pos;
			this.ref = ref;
			this.alt = alt;
			return;
		}

		VariantDataCorrector corr = new VariantDataCorrector(ref, alt, pos.getPos());
		this.pos = new GenomePosition(pos.getRefDict(), pos.getStrand(), pos.getChr(), corr.position,
				PositionType.ZERO_BASED);
		this.ref = corr.ref;
		this.alt = corr.alt;
	}

	/**
	 * Construct object given the position, reference, alternative nucleic acid string, and strand.
	 *
	 * On construction, pos, ref, and alt are automatically adjusted to the right/incremented by the length of the
	 * longest common prefix and suffix of ref and alt. Further, the position is adjusted to the given strand.
	 */
	public GenomeVariant(GenomePosition pos, String ref, String alt, Strand strand) {
		if (wouldBeSymbolicAllele(ref) || wouldBeSymbolicAllele(alt)) {
			this.pos = pos.withStrand(strand);
			if (strand == pos.getStrand()) {
				this.ref = ref;
				this.alt = alt;
			} else {
				this.ref = DNAUtils.reverseComplement(ref);
				this.alt = DNAUtils.reverseComplement(alt);
			}
			return;
		}

		// Correct variant data.
		VariantDataCorrector corr = new VariantDataCorrector(ref, alt, pos.getPos());
		if (strand == pos.getStrand()) {
			this.ref = corr.ref;
			this.alt = corr.alt;
		} else {
			this.ref = DNAUtils.reverseComplement(corr.ref);
			this.alt = DNAUtils.reverseComplement(corr.alt);
		}

		int delta = 0;
		if (strand != pos.getStrand() && ref.length() == 0)
			delta = -1;
		else if (strand != pos.getStrand() /* && ref.length() != 0 */)
			delta = ref.length() - 1;

		this.pos = new GenomePosition(pos.getRefDict(), pos.getStrand(), pos.getChr(), corr.position,
				PositionType.ZERO_BASED).shifted(delta).withStrand(strand);
	}

	/**
	 * @return true if this is a symbolic allele, as described
	 */
	public boolean isSymbolic() {
		return (wouldBeSymbolicAllele(ref) || wouldBeSymbolicAllele(alt));
	}

	/**
	 * @return true if the given allele string describes a symbolic allele (events not
	 *         described by replacement of bases, e.g. break-ends or duplications that are described in one line).
	 */
	private static boolean wouldBeSymbolicAllele(String allele) {
		if (allele.length() <= 1)
			return false;
		return (allele.charAt(0) == '<' || allele.charAt(allele.length() - 1) == '>') || // symbolic or large insertion
				(allele.charAt(0) == '.' || allele.charAt(allele.length() - 1) == '.') || // single breakend
				(allele.contains("[") || allele.contains("]")); // mated
																// breakend
	}

	@Override
	public String getChrName() {
		return this.pos.getRefDict().getContigIDToName().get(this.pos.getChr());
	}

	public GenomePosition getGenomePos() {
		return this.pos;
	}

	@Override
	public int getPos() {
		return this.pos.getPos();
	}

	@Override
	public String getRef() {
		return this.ref;
	}

	@Override
	public String getAlt() {
		return this.alt;
	}

	/**
	 * Construct object and enforce strand.
	 */
	public GenomeVariant(GenomeVariant other, Strand strand) {
		if (strand == other.pos.getStrand()) {
			this.ref = other.ref;
			this.alt = other.alt;
		} else {
			this.ref = DNAUtils.reverseComplement(other.ref);
			this.alt = DNAUtils.reverseComplement(other.alt);
		}

		// Get position as 0-based position.

		if (strand == other.pos.getStrand()) {
			this.pos = other.pos;
		} else {
			this.pos = other.pos.shifted(this.ref.length() - 1).withStrand(strand);
		}
	}

	/**
	 * @return numeric ID of chromosome this change is on
	 */
	@Override
	public int getChr() {
		return pos.getChr();
	}

	/**
	 * @return interval of the genome change
	 */
	public GenomeInterval getGenomeInterval() {
		if (isSymbolic())
			return new GenomeInterval(pos, 1);

		return new GenomeInterval(pos, ref.length());
	}

	/**
	 * @return the GenomeChange on the given strand
	 */
	public GenomeVariant withStrand(Strand strand) {
		return new GenomeVariant(this, strand);
	}

	/**
	 * @return human-readable {@link String} describing the genome change
	 */
	@Override
	public String toString() {
		if (pos.getStrand() != Strand.FWD)
			return withStrand(Strand.FWD).toString();
		else if (ref.equals("")) // handle insertion as special case
			return Joiner.on("").join(getChrName(), ":g.", getPos(), "_", getPos() + 1, "ins", alt);
		else if (alt.equals("") && ref.length() == 1) // single-base deletion
			return Joiner.on("").join(getChrName(), ":g.", getPos() + 1, "del", ref);
		else if (alt.equals("") && ref.length() > 1) // multi-base deletion
			return Joiner.on("").join(getChrName(), ":g.", getPos() + 1, "_", getPos() + ref.length(), "del", ref);
		else if (ref.length() == 1 && alt.length() > 1)
			return Joiner.on("").join(getChrName(), ":g.", getPos() + 1, "del", ref, "ins", alt);
		else if (ref.length() > 1 && alt.length() != 0)
			return Joiner.on("").join(getChrName(), ":g.", getPos() + 1, "_", getPos() + ref.length(), "del", ref,
					"ins", alt);
		else
			return Joiner.on("").join(pos, (ref.equals("") ? "-" : ref), ">", (alt.equals("") ? "-" : alt));
	}

	/**
	 * @return the {@link GenomeVariantType} of this GenomeChange
	 */
	public GenomeVariantType getType() {
		if (ref.length() > 0 && alt.length() == 0)
			return GenomeVariantType.DELETION;
		else if (ref.length() == 0 && alt.length() > 0)
			return GenomeVariantType.INSERTION;
		else if (ref.length() == 1 && alt.length() == 1)
			return GenomeVariantType.SNV;
		else
			return GenomeVariantType.BLOCK_SUBSTITUTION;
	}

	/**
	 * A transition is between purine and purine or between pyrimidine and pyrimidine. Only applies to single nucleotide
	 * subsitutions.
	 *
	 * @return true if the variant is a SNV and a transition.
	 */
	public boolean isTransition() {
		if (getType() != GenomeVariantType.SNV)
			return false;
		// purine to purine change
		if (this.ref.equals("A") && this.alt.equals("G"))
			return true;
		else if (this.ref.equals("G") && this.alt.equals("A"))
			return true;
		// pyrimidine to pyrimidine change
		if (this.ref.equals("C") && this.alt.equals("T"))
			return true;
		else if (this.ref.equals("T") && this.alt.equals("C"))
			return true;
		// If we get here, the variant must be a transversion.
		return false;
	}

	/**
	 * A transversion is purine <-> pyrimidine. Only applies to single nucleotide subsitutions.
	 *
	 * @return true if the variant is a SNV and a transversion.
	 */
	public boolean isTransversion() {
		if (getType() != GenomeVariantType.SNV)
			return false;
		// purine to purine change
		if (this.ref.equals("A") && this.alt.equals("G"))
			return false;
		else if (this.ref.equals("G") && this.alt.equals("A"))
			return false;
		// pyrimidine to pyrimidine change
		if (this.ref.equals("C") && this.alt.equals("T"))
			return false;
		else if (this.ref.equals("T") && this.alt.equals("C"))
			return false;
		// If we get here, the variant must be a SNV and a transversion.
		return true;
	}

	/*
	 * (non-Javadoc)
	 *
	 * @see java.lang.Object#hashCode()
	 */
	@Override
	public int hashCode() {
		if (pos != null && pos.getStrand() != null && pos.getStrand().isReverse())
			return withStrand(Strand.FWD).hashCode();
		final int prime = 31;
		int result = 1;
		result = prime * result + ((alt == null) ? 0 : alt.hashCode());
		result = prime * result + ((pos == null) ? 0 : pos.hashCode());
		result = prime * result + ((ref == null) ? 0 : ref.hashCode());
		return result;
	}

	/*
	 * (non-Javadoc)
	 *
	 * @see java.lang.Object#equals(java.lang.Object)
	 */
	@Override
	public boolean equals(Object obj) {
		if (this == obj)
			return true;
		if (obj == null)
			return false;
		if (getClass() != obj.getClass())
			return false;

		GenomeVariant other = (GenomeVariant) obj;
		if (pos != null && pos.getStrand() != Strand.FWD)
			return withStrand(Strand.FWD).equals(obj);
		other = other.withStrand(Strand.FWD);

		if (alt == null) {
			if (other.alt != null)
				return false;
		} else if (!alt.equals(other.alt))
			return false;
		if (pos == null) {
			if (other.pos != null)
				return false;
		} else if (!pos.equals(other.pos))
			return false;
		if (ref == null) {
			if (other.ref != null)
				return false;
		} else if (!ref.equals(other.ref))
			return false;
		return true;
	}

	@Override
	public int compareTo(Annotation other) {
		return ComparisonChain.start().compare(pos, other.getPos()).compare(ref, other.getRef())
				.compare(alt, other.getAlt()).result();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy