All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.snpeff.interval.ExonSpliceCharacterizer Maven / Gradle / Ivy

The newest version!
package org.snpeff.interval;

import java.util.HashMap;

import org.snpeff.interval.Exon.ExonSpliceType;
import org.snpeff.snpEffect.Config;
import org.snpeff.snpEffect.SnpEffectPredictor;
import org.snpeff.stats.CountByType;
import org.snpeff.util.Gpr;
import org.snpeff.util.Timer;

/**
 * Characterize exons based on alternative splicing
 *
 * References: "Alternative splicing and evolution - diversification, exon definition and function"  (see Box 1)
 *
 * @author pablocingolani
 */
public class ExonSpliceCharacterizer {

	public static final int MAX_EXONS = 1000; // Do not characterize transcripts having more than this number of exons
	public static final int SHOW_EVERY = 1000;

	boolean verbose = false;
	Genome genome;
	HashMap typeByExon;
	CountByType countByType = new CountByType();

	public ExonSpliceCharacterizer(Genome genome) {
		this.genome = genome;
		typeByExon = new HashMap();
	}

	public ExonSpliceCharacterizer(String genomeVer) {
		Config config = new Config(genomeVer);
		SnpEffectPredictor snpEffectPredictor = config.loadSnpEffectPredictor();
		genome = snpEffectPredictor.getGenome();
		typeByExon = new HashMap();
	}

	/**
	 * Characterize all exons
	 */
	public CountByType characterize() {
		type();
		return countByType;
	}

	/**
	 * Count number of exons
	 */
	int countExons() {
		int count = 0;
		for (Gene g : genome.getGenes())
			for (Transcript tr : g)
				count += tr.numChilds();
		return count;
	}

	/**
	 * Does the marker intersect any exon in 'tr'?
	 * @param m
	 * @param tr
	 * @return
	 */
	boolean intersectsAnyExon(Marker m, Transcript tr) {
		for (Exon e : tr)
			if (m.intersects(e)) return true;
		return false;
	}

	/**
	 * Is thins an ALTERNATIVE_3SS exon?
	 * @param exon
	 * @param gene
	 * @return
	 */
	boolean isAlt3ss(Exon exon, Gene gene) {
		for (Transcript tr : gene)
			for (Exon e : tr) {
				if (exon.intersects(e)) {
					if (exon.isStrandPlus()) {
						// Same exon end, different exon start?
						if ((exon.getStart() != e.getStart()) && (exon.getEnd() == e.getEnd())) return true;
					} else {
						// Same exon end, different exon start? (negative strand)
						if ((exon.getStart() == e.getStart()) && (exon.getEnd() != e.getEnd())) return true;
					}
				}
			}

		return false;
	}

	/**
	 * Is thins an ALTERNATIVE_5SS exon?
	 * @param exon
	 * @param gene
	 * @return
	 */
	boolean isAlt5ss(Exon exon, Gene gene) {
		for (Transcript tr : gene)
			for (Exon e : tr) {
				if (exon.intersects(e)) {
					if (exon.isStrandPlus()) {
						// Same exon start, different exon end?
						if ((exon.getStart() == e.getStart()) && (exon.getEnd() != e.getEnd())) return true;
					} else {
						// Same exon start, different exon end? (negative strand)
						if ((exon.getStart() != e.getStart()) && (exon.getEnd() == e.getEnd())) return true;
					}
				}
			}

		return false;
	}

	/**
	 * Is this exon mutually exclusive with another exon?
	 * @param exon
	 * @param gene
	 * @return
	 */
	boolean isMutEx(Exon exon, Gene gene) {
		if (gene.numChilds() <= 1) return false;

		//---
		// Make a list of all unique exons
		//---
		String exonKey = key(exon);
		HashMap uniqEx = new HashMap();
		for (Transcript tr : gene)
			for (Exon e : tr) {
				String ekey = key(e);
				if (!exonKey.equals(ekey)) uniqEx.put(ekey, e);
			}

		//---
		// For each unique exon, compare if it is mutually exclusive with 'exon'
		//---
		Transcript exonTr = (Transcript) exon.getParent();
		for (Exon e : uniqEx.values()) {
			ExonSpliceType type = typeByExon.get(e);;
			if (type == Exon.ExonSpliceType.SKIPPED) { // Only check for these exons (avoid all ALT*)
				boolean xor = true;
				for (Transcript tr : gene) {
					if (exonTr.intersects(tr) && exon.intersects(tr) && e.intersects(exonTr)) { // Make sure both exons intersect both transcripts (otherwise they cannot be mutually exclusive)
						xor &= intersectsAnyExon(e, tr) ^ intersectsAnyExon(exon, tr);
					} else xor = false;
				}

				// XOR is true? => Mutually exclusive
				if (xor) return true;
			}
		}

		return false;
	}

	/**
	 * Create a simple hash key based on choromosomal position
	 * @param m
	 * @return
	 */
	String key(Marker m) {
		return m.getChromosomeName() + ":" + m.getStart() + "-" + m.getEnd();
	}

	public void setVerbose(boolean verbose) {
		this.verbose = verbose;
	}

	/**
	 * Mark exons types
	 */
	void type() {
		if (verbose) {
			Timer.showStdErr("Caracterizing exons by splicing (stage 1) : ");
			System.out.print("\t");
		}

		// Find retained exons
		int numExon = 1;
		for (Gene g : genome.getGenes()) {

			// Count exons
			CountByType count = new CountByType();
			for (Transcript tr : g)
				for (Exon e : tr)
					count.inc(key(e));

			// Label exons
			int countTr = g.numChilds();
			for (Transcript tr : g) {
				for (Exon e : tr) {
					if (verbose) Gpr.showMark(numExon++, SHOW_EVERY, "\t");

					String eKey = key(e);
					int countEx = (int) count.get(eKey);

					// Is this exon maintained in all transcripts?
					if (countEx == countTr) type(e, Exon.ExonSpliceType.RETAINED);
					else {
						if (isAlt3ss(e, g)) type(e, Exon.ExonSpliceType.ALTTENATIVE_3SS);
						else if (isAlt5ss(e, g)) type(e, Exon.ExonSpliceType.ALTTENATIVE_5SS);
						else if (tr.numChilds() > 1) {
							if (e.getRank() == 1) type(e, Exon.ExonSpliceType.ALTTENATIVE_PROMOMOTER);
							else if (e.getRank() == tr.numChilds()) type(e, Exon.ExonSpliceType.ALTTENATIVE_POLY_A);
							else type(e, Exon.ExonSpliceType.SKIPPED);
						}
					}
				}
			}
		}

		if (verbose) {
			System.err.println("");
			Timer.showStdErr("Caracterizing exons by splicing (stage 2) : ");
			System.out.print("\t");
		}

		// Now analyze if there are mutually exclusive exons
		numExon = 1;
		for (Gene g : genome.getGenes()) {
			for (Transcript tr : g) {
				if (tr.numChilds() < MAX_EXONS) {
					for (Exon e : tr) {
						if (verbose) Gpr.showMark(numExon++, SHOW_EVERY, "\t");
						ExonSpliceType type = typeByExon.get(e);
						if (type == ExonSpliceType.SKIPPED) { // Try to re-annotate only these
							if (isMutEx(e, g)) type(e, Exon.ExonSpliceType.MUTUALLY_EXCLUSIVE);
						}
					}
				} else {
					System.err.println("");
					Gpr.debug("WARNING: Gene '" + g.getId() + "', transcript '" + tr.getId() + "' has too many exons (" + tr.numChilds() + " exons). Skipped");
				}
			}
		}

		if (verbose) Timer.showStdErr("done.");
	}

	/**
	 * Mark this exons as 'type'
	 * @param e
	 * @param type
	 */
	void type(Exon e, Exon.ExonSpliceType type) {
		e.spliceType = type;
		countByType.inc(type.toString());
		typeByExon.put(e, type);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy