All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.snpeff.snpEffect.testCases.integration.CompareToEnsembl Maven / Gradle / Ivy

The newest version!
package org.snpeff.snpEffect.testCases.integration;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Random;

import org.snpeff.interval.Chromosome;
import org.snpeff.interval.Gene;
import org.snpeff.interval.Genome;
import org.snpeff.interval.Marker;
import org.snpeff.interval.Transcript;
import org.snpeff.interval.Variant;
import org.snpeff.snpEffect.Config;
import org.snpeff.snpEffect.EffectType;
import org.snpeff.snpEffect.SnpEffectPredictor;
import org.snpeff.snpEffect.VariantEffect;
import org.snpeff.snpEffect.VariantEffects;
import org.snpeff.util.Gpr;
import org.snpeff.util.GprSeq;
import org.snpeff.util.Timer;

/**
 * Compare our results to ENSEML's Variant Effect predictor's output
 *
 * @author pcingola
 */
public class CompareToEnsembl {

	boolean throwException = false;
	boolean verbose = false;
	Random rand;
	Config config;
	Genome genome;
	SnpEffectPredictor snpEffectPredictor;

	/**
	 * Main
	 */
	public static void main(String args[]) {
		//---
		// Parse command line arguments
		//---
		if ((args.length != 2) && (args.length != 3)) {
			System.err.println("Usage: " + CompareToEnsembl.class.getSimpleName() + " genomeName ensemblFile [transcriptId]");
			System.exit(1);
		}

		String genomeName = args[0];
		String ensemblFile = args[1];
		String trName = null;
		if (args.length > 2) trName = args[2];

		//---
		// Run
		//---
		CompareToEnsembl compareToEnsembl = new CompareToEnsembl(genomeName, false);
		compareToEnsembl.compareEnsembl(ensemblFile, trName);
	}

	public CompareToEnsembl(String genomeName, boolean throwException) {
		this.throwException = throwException;
		if (verbose) Timer.showStdErr("Loading predictor");
		config = new Config(genomeName, Config.DEFAULT_CONFIG_FILE);
		config.loadSnpEffectPredictor();
		snpEffectPredictor = config.getSnpEffectPredictor();
		genome = config.getGenome();
		snpEffectPredictor.buildForest();
	}

	/**
	 * Transform 'change' into an ENSEMBL-like string
	 */
	String change2str(VariantEffect change) {
		String str = effTranslate(change.getEffectType());

		if (change.getCodonsRef().isEmpty() && change.getCodonsAlt().isEmpty()) str += " -";
		else str += " " + change.getCodonsRef() + "/" + change.getCodonsAlt();

		if (change.getAaRef().isEmpty() && change.getAaAlt().isEmpty()) str += " -";
		else if (change.getAaRef().equals(change.getAaAlt())) str += " " + change.getAaAlt();
		else str += " " + change.getAaRef() + "/" + change.getAaAlt();

		return str;
	}

	/**
	 * Compare our results to some ENSEMBL annotations
	 */
	public void compareEnsembl(String ensemblFile, String trName) {
		HashMap seqChanges = readEnsemblFile(ensemblFile);
		ArrayList list = new ArrayList();
		list.addAll(seqChanges.keySet());
		Collections.sort(list);

		for (Variant seqChange : list) {
			VariantEffects changes = snpEffectPredictor.variantEffect(seqChange);

			boolean ok = false;
			StringBuffer changesSb = new StringBuffer();
			StringBuffer changesAllSb = new StringBuffer();

			// Compare to all changes found by SnpEff
			for (VariantEffect change : changes) {
				Marker m = change.getMarker();

				// Find transcript
				Transcript tr = null;
				while ((m != null) && (tr == null)) {
					if (m instanceof Transcript) tr = (Transcript) m;
					m = m.getParent();
				}

				// Compare changes?
				if ((tr != null) && ((trName == null) || tr.getId().equals(trName))) {
					String id = change2str(change);
					changesAllSb.append("\tSnpEff  :\t" + change + "\n");

					if (id.equals(seqChange.getId())) {
						changesSb.append(id + "\t");
						ok = true;
					}
				}
			}

			// Was the change found?
			if (verbose) if (ok && verbose) System.out.println("OK   :\t" + seqChange + "\t'" + changesSb + "'\n\tEnsembl :\t" + seqChanges.get(seqChange) + "\n" + changesAllSb);
			else {
				String line = "DIFF :\t" + seqChange + "\t'" + changesSb + "'\n\tEnsembl :\t" + seqChanges.get(seqChange) + "\n" + changesAllSb;
				if (verbose) System.out.println(line);
				if (throwException) throw new RuntimeException(line);
			}
		}
	}

	/**
	 * Translate an effect to make it compatible to ENSEMBL's outputS
	 */
	String effTranslate(EffectType eff) {
		switch (eff) {
		case UTR_5_PRIME:
		case START_GAINED:
			return "5PRIME_UTR";
		case UTR_3_PRIME:
			return "3PRIME_UTR";
		case NON_SYNONYMOUS_START:
		case START_LOST:
			return "NON_SYNONYMOUS_CODING";
		case INTRON:
			return "INTRONIC";
		}
		return eff.toString();
	}

	/**
	 * Find a transcript
	 */
	Transcript findTranscriptByName(String trName) {
		for (Gene gene : genome.getGenes()) {
			for (Transcript tr : gene)
				if (tr.getId().equals(trName)) return tr;
		}
		return null;
	}

	/**
	 * Read a file and create a list of SeqChanges
	 */
	HashMap readEnsemblFile(String fileName) {
		String lines[] = Gpr.readFile(fileName).split("\n");

		if (lines.length <= 0) throw new RuntimeException("Cannot open file '" + fileName + "' (or it's empty).");

		HashMap seqChanges = new HashMap();

		for (String line : lines) {
			Variant seqChange = str2seqChange(line);
			seqChanges.put(seqChange, line);
		}

		return seqChanges;
	}

	public void setVerbose(boolean verbose) {
		this.verbose = verbose;
	}

	/**
	 * Create a SeqChange from an ENSEMBL line
	 */
	Variant str2seqChange(String line) {
		try {
			String recs[] = line.split("\t");

			// Parse chomo, position, ref and alt from recs[1]
			String chrPos[] = recs[0].split("_");

			Chromosome chromo = genome.getChromosome(chrPos[0]);
			int pos = Gpr.parseIntSafe(chrPos[1]) - 1;

			// Parse 'ALT'
			String alt = chrPos[2];
			if (chrPos[2].indexOf('/') > 0) {
				String ra[] = chrPos[2].split("/");
				alt = ra[1];
			}

			// We don't care about the reference (as long as it's different that 'ALT'
			String ref = "A";
			for (char base : GprSeq.BASES) {
				ref = "" + base;
				if (!ref.equals(alt)) break;
			}

			// ID
			String eff = recs[6];
			if (eff.indexOf(',') > 0) eff = eff.split(",")[0];
			String id = eff + " " + recs[11] + " " + recs[10];

			// Create SeqChange
			Variant seqChange = new Variant(chromo, pos, ref, alt, id);
			return seqChange;
		} catch (Exception e) {
			throw new RuntimeException("Error parsing line:\n" + line, e);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy