org.snpeff.snpEffect.testCases.integration.TestCasesIntegrationBase Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.snpEffect.testCases.integration;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.Set;

import org.apache.commons.io.FileUtils;
import org.snpeff.SnpEff;
import org.snpeff.fileIterator.VcfFileIterator;
import org.snpeff.interval.Chromosome;
import org.snpeff.interval.Exon;
import org.snpeff.interval.Gene;
import org.snpeff.interval.Genome;
import org.snpeff.interval.Marker;
import org.snpeff.interval.Markers;
import org.snpeff.interval.Transcript;
import org.snpeff.interval.Variant;
import org.snpeff.snpEffect.Config;
import org.snpeff.snpEffect.EffectType;
import org.snpeff.snpEffect.Hgvs;
import org.snpeff.snpEffect.LossOfFunction;
import org.snpeff.snpEffect.SnpEffectPredictor;
import org.snpeff.snpEffect.VariantEffect;
import org.snpeff.snpEffect.VariantEffect.EffectImpact;
import org.snpeff.snpEffect.VariantEffect.ErrorWarningType;
import org.snpeff.snpEffect.VariantEffects;
import org.snpeff.snpEffect.commandLine.SnpEffCmdBuild;
import org.snpeff.snpEffect.commandLine.SnpEffCmdEff;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryEmbl;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryGenBank;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryGff3;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryGtf22;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryRefSeq;
import org.snpeff.util.Diff;
import org.snpeff.util.Gpr;
import org.snpeff.vcf.EffFormatVersion;
import org.snpeff.vcf.VcfEffect;
import org.snpeff.vcf.VcfEntry;

import junit.framework.Assert;

/**
 * Base class: Provides common methods used for testing
 */
public class TestCasesIntegrationBase {

	public static int SHOW_EVERY = 10;
	public static final String BASE_DIR = "tests";

	public boolean debug = false;
	public boolean verbose = false || debug;

	protected boolean ignoreErrors = false;
	protected boolean shiftHgvs; // Do or do not shift variants according to HGVS notation (for test cases that
									// were created before the feature was implemented)

	public String testsDir;
	protected String testType;
	protected List prefixes;

	public TestCasesIntegrationBase() {
		super();
		init();
		testType = "integration";

		prefixes = new LinkedList<>();
		prefixes.add("TestCases");
		prefixes.add("TestsCase");
		prefixes.add("Unit");
		prefixes.add("Integration");
	}

	/**
	 * Apply a variant to a transcript
	 */
	public Transcript applyTranscript(String genome, String trId, String vcfFileName) {
		// Load database
		SnpEffectPredictor sep = loadSnpEffectPredictor(genome, false);

		// Find transcript
		Transcript tr = sep.getGenome().getGenes().findTranscript(trId);
		if (tr == null)
			throw new RuntimeException("Could not find transcript ID '" + trId + "'");

		// Apply first variant
		VcfFileIterator vcf = new VcfFileIterator(vcfFileName);
		for (VcfEntry ve : vcf) {
			for (Variant var : ve.variants()) {
				Transcript trNew = tr.apply(var);
				if (debug)
					Gpr.debug(trNew);
				return trNew;
			}
		}

		throw new RuntimeException("Could not apply any variant!");
	}

	/**
	 * Build a genome
	 */
	public SnpEffectPredictor build(String genome) {
		String args[] = { "build", genome };
		SnpEff snpeff = new SnpEff(args);
		snpeff.setVerbose(verbose);
		SnpEffCmdBuild snpeffBuild = (SnpEffCmdBuild) snpeff.cmd();
		snpeffBuild.run();
		return snpeffBuild.getConfig().getSnpEffectPredictor();
	}

	/**
	 * Build a genome from a RefSeq file and compare results to 'expected' results
	 */
	public SnpEffectPredictor buildAndCompare(String genome, String refSeqFile, String fastaFile, String resultFile,
			boolean hideProtein) {
		String expectedResult = Gpr.readFile(resultFile).trim();

		// Build
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryRefSeq factory = new SnpEffPredictorFactoryRefSeq(config);
		factory.setFileName(refSeqFile);
		factory.setVerbose(verbose);

		// Set fasta file (or don't read sequences)
		if (fastaFile != null)
			factory.setFastaFile(fastaFile);
		else
			factory.setReadSequences(false);

		SnpEffectPredictor sep = factory.create();

		// Compare result
		String result = showTranscripts(sep.getGenome(), hideProtein).trim();
		if (verbose)
			System.out.println(result);
		Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result));

		return sep;
	}

	/**
	 * Build a genome from a embl file and compare results to 'expected' results
	 */
	public SnpEffectPredictor buildEmbl(String genome, String emblFile) {
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryEmbl sepEmbl = new SnpEffPredictorFactoryEmbl(config, emblFile);
		SnpEffectPredictor sep = sepEmbl.create();
		return sep;
	}

	public SnpEffectPredictor buildGeneBank(String genome, String genBankFile) {
		return buildGeneBank(genome, genBankFile, false);
	}

	/**
	 * Build a genome from a genbank file and compare results to 'expected' results
	 */
	public SnpEffectPredictor buildGeneBank(String genome, String genBankFile, boolean circularCorrectlargeGap) {
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryGenBank sepfg = new SnpEffPredictorFactoryGenBank(config, genBankFile);
		sepfg.setVerbose(verbose);
		sepfg.setCircularCorrectLargeGap(circularCorrectlargeGap);
		SnpEffectPredictor sep = sepfg.create();
		return sep;
	}

	/**
	 * Build a genome from a GFF3 file and compare results to 'expected' results
	 */
	public SnpEffectPredictor buildGff3AndCompare(String genome, String gff3File, String resultFile, boolean readSeqs,
			boolean createRandSequences) {

		// Build
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryGff3 fgff3 = new SnpEffPredictorFactoryGff3(config);
		fgff3.setVerbose(verbose);
		if (gff3File != null)
			fgff3.setFileName(gff3File);
		fgff3.setReadSequences(readSeqs);
		fgff3.setCreateRandSequences(createRandSequences);
		fgff3.setRandom(new Random(20140410)); // Note: we want consistent results in our test cases, so we always
												// initialize the random generator in the same way

		SnpEffectPredictor sep = fgff3.create();

		if (resultFile != null) {
			// Compare result
			String result = showTranscripts(sep.getGenome()).trim();
			String expectedResult = Gpr.readFile(resultFile).trim();

			// Remove spaces and compare
			String erNs = Gpr.noSpaces(expectedResult);
			String rNs = Gpr.noSpaces(result);
			if (verbose || !erNs.equals(rNs)) {
				System.out.println("Result:\n----------\n" + result + "\n----------\n");
				System.out.println("Expected (" + resultFile + "):\n----------\n" + expectedResult + "\n----------\n");
				System.out.println(new Diff(expectedResult, result));
			}
			Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result));
		}

		return sep;
	}

	/**
	 * Build a genome from a GTF file and compare results to 'expected' results
	 */
	public void buildGtfAndCompare(String genome, String gtf22, String fastaFile, String resultFile) {
		String expectedResult = Gpr.readFile(resultFile).trim();

		// Build
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryGtf22 fgtf22 = new SnpEffPredictorFactoryGtf22(config);
		fgtf22.setFileName(gtf22);
		fgtf22.setVerbose(verbose);

		// Set fasta file (or don't read sequences)
		if (fastaFile != null)
			fgtf22.setFastaFile(fastaFile);
		else
			fgtf22.setReadSequences(false);

		SnpEffectPredictor sep = fgtf22.create();

		// Compare result
		String result = showTranscripts(sep.getGenome(), true).trim();
		if (verbose)
			System.out.println(result);
		Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result));
	}

	Marker cdsMarker(Transcript tr) {
		int start = tr.isStrandPlus() ? tr.getCdsStart() : tr.getCdsEnd();
		int end = tr.isStrandPlus() ? tr.getCdsEnd() : tr.getCdsStart();
		return new Marker(tr.getParent(), start, end, false, "");
	}

	void checkAnnotations(SnpEffectPredictor sep, String chr, int pos, String ref, String alt, String hgvsP,
			String hgvsC, String eff) {
		Genome genome = sep.getGenome();
		Variant var = new Variant(genome.getChromosome(chr), pos, ref, alt, "");
		VariantEffects varEffs = sep.variantEffect(var);
		for (VariantEffect varEff : varEffs) {
			VcfEffect vcfEff = new VcfEffect(varEff, EffFormatVersion.FORMAT_ANN_1);
			if (verbose)
				System.out.println("\t" + vcfEff);

			Assert.assertEquals(hgvsP, vcfEff.getHgvsProt());
			Assert.assertEquals(hgvsC, vcfEff.getHgvsDna());
			Assert.assertEquals(eff, vcfEff.getEffectsStrSo());
		}
	}

	/**
	 * Check HGVS annotations
	 */
	public void checkHgvs(String genome, String vcfFile, int minCheck) {
		List list = snpEffect(genome, vcfFile, null);

		int countCheck = 0;
		for (VcfEntry ve : list) {
			if (verbose)
				System.out.println(ve);

			String transcriptId = ve.getInfo("TR");
			if (verbose)
				System.out.println("\tLooking for transcript '" + transcriptId + "'");
			for (VcfEffect veff : ve.getVcfEffects()) {

				if (veff.getTranscriptId().equals(transcriptId)) {
					if (verbose) {
						System.out.println("\t" + veff);
						System.out.println("\t\tHGVS.p: " + veff.getHgvsP() + "\t\tHGVS.c: " + veff.getHgvsC());
					}

					// Compare against expected result
					String expectedHgvsC = ve.getInfo("HGVSC");
					if (expectedHgvsC != null) {
						String actualHgvsC = veff.getHgvsC();
						Assert.assertEquals("HGVS.c mismatch", expectedHgvsC, actualHgvsC);
						countCheck++;
					}

					String expectedHgvsP = ve.getInfo("HGVSP");
					if (expectedHgvsP != null) {
						String actualHgvsP = veff.getHgvsP();
						Assert.assertEquals("HGVS.p mismatch", expectedHgvsP, actualHgvsP);
						countCheck++;
					}
				}
			}
		}

		if (verbose)
			System.out.println("Total checked: " + countCheck);
		Assert.assertTrue("Too few variants checked: " + countCheck, countCheck >= minCheck);
	}

	// Check variant's HGVS.c nomenclature
	void checkHgvscForTr(List list, String trId) {
		boolean found = false;
		for (VcfEntry ve : list) {
			if (verbose)
				System.out.println(ve);

			for (VcfEffect veff : ve.getVcfEffects()) {
				if (veff.getTranscriptId().equals(trId)) {
					if (verbose) {
						System.out.println("\t" + veff);
						System.out.println("\t\tHGVS.c: " + veff.getHgvsC());
					}

					// Compare against expected result
					String expectedHgvsC = ve.getInfo("HGVSC");
					String actualHgvsC = veff.getHgvsC();
					Assert.assertEquals(expectedHgvsC, actualHgvsC);
					found = true;
				}
			}
		}

		Assert.assertTrue("No annotations found for transcript " + trId, found);
	}

	void checkMotif(String genomeVer, String vcfFile, String effectDetails, EffectImpact impact, boolean useAnn) {
		String args[] = { "-classic", "-motif", "-ud", "0", genomeVer, vcfFile };
		String argsAnn[] = { "-ud", "0", genomeVer, vcfFile };
		if (useAnn)
			args = argsAnn;

		SnpEff cmd = new SnpEff(args);

		// Run
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setVerbose(verbose);
		cmdEff.setSupressOutput(!verbose);
		List vcfEntries = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		// Check results
		int numNextProt = 0;
		for (VcfEntry ve : vcfEntries) {
			for (VcfEffect veff : ve.getVcfEffects()) {
				if (verbose)
					System.out.println("\t" + veff.getVcfFieldString());

				// Is it motif?
				if (veff.getEffectType() == EffectType.MOTIF) {

					boolean ok = false;
					if (useAnn) {
						// Motif ID and impact match?
						ok = effectDetails.equals(veff.getFeatureId()) && (impact == veff.getImpact());
					} else {
						// Motif ID and impact match?
						ok = effectDetails.equals(veff.getEffectDetails()) && (impact == veff.getImpact());
					}

					if (ok)
						numNextProt++;
				}
			}
		}

		Assert.assertEquals(1, numNextProt);
	}

	void checkNextProt(String genomeVer, String vcfFile, String effectDetails, EffectImpact impact, boolean useAnn) {
		String args[] = { "-classic", "-nextProt", genomeVer, vcfFile };
		String argsAnn[] = { genomeVer, vcfFile };
		if (useAnn)
			args = argsAnn;

		SnpEff cmd = new SnpEff(args);
		cmd.setVerbose(verbose);
		cmd.setSupressOutput(!verbose);

		// Run
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		List vcfEntries = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		// Check results
		int numNextProt = 0;
		for (VcfEntry ve : vcfEntries) {
			for (VcfEffect veff : ve.getVcfEffects()) {

				if ((veff.hasEffectType(EffectType.NEXT_PROT)) // Is it nextProt?
						&& (impact == veff.getImpact())) // Is impact OK?
				{
					// Are details OK?
					boolean match = false;
					if (!useAnn && effectDetails.equals(veff.getEffectDetails()))
						match = true;
					if (useAnn && effectDetails.equals(veff.getFeatureType()))
						match = true;

					if (match)
						numNextProt++;
				}

				if (verbose) //
					System.out.println("\t" + veff //
							+ "\n\t\tEffect            : " + veff.getVcfFieldString() //
							+ "\n\t\tEffect type       : " + veff.getEffectType() //
							+ "\n\t\tEffect details    : '" + veff.getEffectDetails() + "'" //
							+ "\n\t\tEffect impact     : '" + veff.getImpact() + "'" //
							+ "\n\t\tExpected details  : '" + effectDetails + "'" //
							+ "\n\t\tExpected impact   : '" + impact + "'" //
							+ "\n\t\tCount matches     : " + numNextProt //
							+ "\thasEffectType : " + veff.hasEffectType(EffectType.NEXT_PROT) //
							+ "\tmatch details : " + effectDetails.equals(veff.getEffectDetails()) //
							+ "\tmatch impact: " + (impact == veff.getImpact()) //
					);
			}
		}

		Assert.assertEquals(1, numNextProt);
	}

	/**
	 * Check that NMD works for a given transcript
	 */
	void checkNmd(Config config, Gene gene, Transcript tr) {
		int pos = 0;
		boolean isNmd[] = new boolean[tr.cds().length()];
		HashSet codingExons = new HashSet<>();

		StringBuilder nmdStr = new StringBuilder();
		StringBuilder nmdStrSimple = new StringBuilder();
		for (Exon exon : tr.sortedStrand()) {
			int step = exon.isStrandPlus() ? 1 : -1;
			int from = exon.isStrandPlus() ? exon.getStart() : exon.getEnd();

			for (int expos = from; (exon.getStart() <= expos) && (expos <= exon.getEnd()); expos += step) {
				// Not in UTR? => Test
				if (!tr.isUtr(expos)) {
					codingExons.add(exon);

					// Create a seqChange
					// SeqChange seqChange = new SeqChange(tr.getChromosome(), expos, expos, "");
					Variant variant = new Variant(tr.getChromosome(), expos, "A", "C"); // Create a seqChange

					// Create a STOP_GAIN effect
					VariantEffect variantEffect = new VariantEffect(variant);
					variantEffect.set(exon, EffectType.STOP_GAINED, EffectType.STOP_GAINED.effectImpact(), "");
					LinkedList changeEffects = new LinkedList<>();
					changeEffects.add(variantEffect);

					// Create a LOF object and analyze the effect
					LossOfFunction lof = new LossOfFunction(config, changeEffects);
					isNmd[pos] = lof.isNmd();

					nmdStr.append(isNmd[pos] ? '+' : '.');
					nmdStrSimple.append(isNmd[pos] ? '+' : '.');
					pos++;
				} else
					nmdStr.append('U');
			}
			nmdStr.append('\t');
			nmdStrSimple.append('\t');
		}

		// Show string
		if (verbose)
			System.err.println(nmdStr);
		if (debug)
			System.err.println("\tCoding Exons:" + codingExons.size());

		// ---
		// Check that NMP prediction is 'correct'
		// ---
		// We need a splice event in the coding part
		if (codingExons.size() > 1) {
			// Use the 'simple' string to check
			StringBuilder sb = new StringBuilder();
			String ex[] = nmdStrSimple.toString().split("\t");
			for (int i = 0; i < (ex.length - 1); i++)
				sb.append(ex[i]);

			// Check that last 50 bases are '.'
			String simpleNoLast = sb.toString();
			int lastNmd = Math.max(0, simpleNoLast.length() - LossOfFunction.MND_BASES_BEFORE_LAST_JUNCTION);
			String points = simpleNoLast.substring(lastNmd) + ex[ex.length - 1];
			String plus = simpleNoLast.substring(0, lastNmd);

			if (debug)
				System.err.println("\tPoints: " + points + "\n\tPlus :" + plus);

			// Check
			Assert.assertEquals(0, points.replace('.', ' ').trim().length());
			Assert.assertEquals(0, plus.replace('+', ' ').trim().length());
		}
	}

	public void checkNoChange(String args[]) {
		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff snpeff = (SnpEffCmdEff) cmd.cmd();
		snpeff.setVerbose(verbose);
		snpeff.setSupressOutput(!verbose);
		List vcfEntries = snpeff.run(true);

		for (VcfEntry ve : vcfEntries) {
			if (verbose)
				System.out.println(ve);
			for (VcfEffect veff : ve.getVcfEffects()) {
				EffectImpact imp = veff.getImpact();
				if (verbose)
					System.out.println("\t" + imp + "\t" + veff);
				Assert.assertEquals(EffectImpact.MODIFIER, imp);
			}
		}
	}

	/**
	 * Run a predictor and check if the expected warnings appear
	 */
	public void checkTranscriptError(String args[], ErrorWarningType warningType) {
		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff snpeff = (SnpEffCmdEff) cmd.cmd();
		snpeff.setVerbose(verbose);
		snpeff.setSupressOutput(!verbose);
		List vcfEntries = snpeff.run(true);

		boolean hasWarning = false;
		for (VcfEntry ve : vcfEntries) {
			if (verbose)
				System.out.println(ve);
			for (VcfEffect veff : ve.getVcfEffects()) {
				EffectImpact imp = veff.getImpact();
				if (verbose)
					System.out.println("\t" + imp + "\t" + veff);

				// Check if the warning type we expect is there
				if (veff.getErrorsWarning() != null)
					hasWarning |= veff.getErrorsWarning().indexOf(warningType.toString()) >= 0;
			}
		}

		Assert.assertEquals(true, hasWarning);
	}

	public void compareHgvs(String genome, String vcfFileName) {
		compareHgvs(genome, vcfFileName, true);
	}

	public void compareHgvs(String genome, String vcfFileName, boolean compareProt) {
		// Create SnpEff
		String args[] = { genome, vcfFileName };
		SnpEffCmdEff snpeff = new SnpEffCmdEff();
		snpeff.parseArgs(args);
		snpeff.setDebug(debug);
		snpeff.setVerbose(verbose);
		snpeff.setSupressOutput(!verbose);
		snpeff.setUpDownStreamLength(0);
		snpeff.setShiftHgvs(shiftHgvs);
		snpeff.setFormatVersion(EffFormatVersion.FORMAT_EFF_4);

		// Run & get result (single line)
		List results = snpeff.run(true);
		Set trNotFoundSet = new HashSet<>();

		// Make sure entries are annotated as expected
		int countOkC = 0, countErrC = 0, countOkP = 0, countErrP = 0, countTrFound = 0;
		for (VcfEntry ve : results) {
			// Extract expected HGVS values
			String hgvsCexp = ve.getInfo("HGVS_C") != null ? ve.getInfo("HGVS_C") : "";
			String trIdC = Hgvs.parseTranscript(hgvsCexp);
			hgvsCexp = Hgvs.removeTranscript(hgvsCexp);

			String hgvsPexp = "";
			String trIdP = "";
			if (compareProt) {
				hgvsPexp = ve.getInfo("HGVS_P") != null ? ve.getInfo("HGVS_P") : "";
				trIdP = Hgvs.parseTranscript(hgvsPexp);
				hgvsPexp = Hgvs.removeTranscript(hgvsPexp);
			}

			if (verbose) {
				System.out.println(ve);
				if (trIdC != null)
					System.out.println("\tExpected HGVS_C: " + trIdC + ":" + hgvsCexp);
				if (trIdP != null)
					System.out.println("\tExpected HGVS_P: " + trIdP + ":" + hgvsPexp + "\n");
			}

			// Check all effects
			boolean okC = false, okP = false, trFound = false;
			for (VcfEffect veff : ve.getVcfEffects()) {
				// Parse calculated HGVS values
				String trId = veff.getTranscriptId();
				String hgvsCactual = veff.getHgvsDna() != null ? veff.getHgvsDna() : "";
				String hgvsPactual = veff.getHgvsProt() != null ? veff.getHgvsProt() : "";

				// Compare results for HGVS_DNA
				boolean foundC = false, foundP = false;
				if (trId != null && trId.equals(trIdC)) {
					trFound = true;
					if (!hgvsCexp.equals(hgvsCactual)) {
						if (!ignoreErrors)
							Assert.assertEquals(hgvsCexp, hgvsCactual);
						countErrC++;
					} else {
						okC = foundC = true;
						countOkC++;
					}
				}

				// Compare results for HGVS_PROT
				if (compareProt && trId != null && trId.equals(trIdP)) {
					if (!hgvsPexp.equals(hgvsPactual)) {
						if (!ignoreErrors)
							Assert.assertEquals(hgvsPexp, hgvsPactual);
						countErrP++;
					} else {
						okP = foundP = true;
						countOkP++;
					}
				}

				if (verbose) {
					System.out.println("\t" + veff //
							+ "\n\t\tEFF    : " + veff.getEffectsStr() //
							+ "\n\t\tHGVS_C : " + trId + ":" + hgvsCactual + "\t\tExpected: " + trIdC + ":" + hgvsCexp
							+ "\t" + (foundC ? "OK" : "NO") //
							+ (compareProt
									? "\n\t\tHGVS_P : " + trId + ":" + hgvsPactual + "\t\tExpected: " + trIdP + ":"
											+ hgvsPexp + "\t" + (foundP ? "OK" : "NO")
									: "") //
							+ "\n");
				}

			}

			if (!trFound) {
				System.out.println("Transcript '" + trIdC + "' not found.");
				countTrFound++;
				trNotFoundSet.add(trIdC);
			}

			if (!ignoreErrors) {
				Assert.assertTrue("HGVS (DNA) not found: '" + hgvsCexp + "'", okC);
				if (!hgvsPexp.isEmpty())
					Assert.assertTrue("HGVS (Protein) not found: '" + hgvsPexp + "'", okP);
			} else {
				// Show errors
				if (!okC)
					System.err.println("HGVS (DNA) not found : '" + hgvsCexp + "', vcf entry:\t" + ve);
				if (compareProt && !okP)
					System.err.println("HGVS (Prot) not found: '" + hgvsPexp + "', vcf entry:\t" + ve);
			}
		}

		if (verbose || ignoreErrors) {
			System.out.println("Count OKs   :\tHGVS (DNA): " + countOkC + "\tHGVS (Protein): " + countOkP);
			System.out.println("Count Errors:\tHGVS (DNA): " + countErrC + "\tHGVS (Protein): " + countErrP);
			System.out.println("Transcripts not found:\t" + countTrFound + ", unique: " + trNotFoundSet.size() + "\n"
					+ trNotFoundSet);
		}
	}

	/**
	 * Compare with results from ENSEMBL's VEP on transcript ENST00000268124
	 */
	public void compareVep(String genome, String vcf, String trId) {
		String args[] = { "-classic", genome, vcf };

		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setSupressOutput(!verbose);

		List vcfEnties = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		for (VcfEntry ve : vcfEnties) {

			StringBuilder msg = new StringBuilder();

			// Check effects
			boolean ok = false;
			for (VcfEffect veff : ve.getVcfEffects()) {
				// Find transcript
				if (veff.getTranscriptId() != null && veff.getTranscriptId().equals(trId)) {
					// Check that reported effect is the same
					String vep = ve.getInfo("EFF_V");
					String eff = veff.getEffectType().toString();

					if (vep.equals(eff))
						ok = true;
					else {
						if (vep.equals("CODON_INSERTION") && eff.equals("CODON_CHANGE_PLUS_CODON_INSERTION"))
							ok = true; // OK. I consider these the same
						else if (vep.equals("STOP_GAINED,CODON_INSERTION") && eff.equals("STOP_GAINED"))
							ok = true; // OK. I consider these the same
						else if (eff.equals("SPLICE_SITE_REGION"))
							ok = true; // OK. I'm not checking these
						else {
							String line = "\n" + ve + "\n\tSnpEff:" + veff + "\n\tVEP   :" + ve.getInfo("EFF_V") + "\t"
									+ ve.getInfo("AA") + "\t" + ve.getInfo("CODON") + "\n";
							msg.append(line);
						}
					}
				}
			}

			if (!ok)
				throw new RuntimeException(msg.toString());
		}
	}

	/**
	 * Benchmarking: Compare with results from ENSEMBL's VEP
	 */
	public void compareVepSO(String genome, String vcf, String trId) {
		String args[] = { "-classic", "-sequenceOntology", genome, vcf };

		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setVerbose(verbose);
		cmdEff.setSupressOutput(!verbose);

		List vcfEnties = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		for (VcfEntry ve : vcfEnties) {
			// Create a set of found variants
			HashSet vepSos = new HashSet<>();
			String vepSo = ve.getInfo("SO");
			for (String so : vepSo.split(",")) {
				if (so.equals("feature_elongation"))
					so = null; // This one is useless, if the variant is an insertion, it obviously causes an
								// elongation
				else if (so.equals("feature_truncation"))
					so = null; // This one is useless, if the variant is an insertion, it obviously causes an
								// elongation

				if (so != null)
					vepSos.add(so);
			}

			// Get effects for transcript 'trId'
			HashSet effSos = new HashSet<>();
			List veffs = ve.getVcfEffects();
			for (VcfEffect veff : veffs) {
				if (veff.getTranscriptId().equals(trId)) {
					String effs = veff.getEffString();

					for (String eff : effs.split("\\" + EffFormatVersion.EFFECT_TYPE_SEPARATOR_OLD)) {
						// OK. I consider these the same
						if (eff.equals("5_prime_UTR_premature_start_codon_gain_variant"))
							eff = "5_prime_UTR_variant";
						if (eff.equals("disruptive_inframe_insertion"))
							eff = "inframe_insertion";
						if (eff.equals("conservative_inframe_insertion"))
							eff = "inframe_insertion";
						if (eff.equals("start_lost"))
							eff = "initiator_codon_variant";
						effSos.add(eff);
					}
				}
			}

			// Make sure both sets are equal
			boolean error = !effSos.containsAll(vepSos);

			if (error) {
				String msg = "\n" + ve;
				msg += "\n\tSnpEff    : ";
				for (String e : effSos)
					msg += e + " ";

				msg += "\n\tVEP       : ";
				for (String e : vepSos)
					msg += e + " ";

				msg += "\n\tMarker    : " + ve.getChromosomeName() + ":" + ve.getStart() + "-" + ve.getEnd();
				Gpr.debug(msg);
				throw new RuntimeException(msg);
			}

		}
	}

	/**
	 * Find a marker that intersects variant
	 */
	Marker findMarker(SnpEffectPredictor sep, Variant variant, EffectType effectType, Transcript tr,
			Marker markerFilter) {
		Markers markers = sep.queryDeep(variant);

		for (Marker m : markers) {
			Marker mfilter = null;
			if (markerFilter != null)
				mfilter = (Marker) m.findParent(markerFilter.getClass());

			Transcript mtr = (Transcript) m.findParent(Transcript.class);

			if (debug)
				Gpr.debug("\tLooking for '" + effectType + "' in '"
						+ (markerFilter != null ? markerFilter.getId() : "NULL") //
						+ "', class: " + (markerFilter != null ? markerFilter.getClass().getSimpleName() : "") //
						+ "\t\tFound: '" + m.getType() + "', mfilter: " + (mfilter != null ? mfilter.getId() : "NULL") //
						+ ", parent: " + m.getParent().getClass().getSimpleName() //
				);

			if ((m.getType() == effectType) && (mfilter != null) && (mtr != null)) {
				if (markerFilter != null) {
					// Id filter?
					if (mfilter.getId().equals(markerFilter.getId()))
						return m;
				} else if (tr != null) {
					// Transcript filter?
					if (mtr.getId().equals(tr.getId()))
						return m;
				} else
					return m; // No exon reference? => just return this
			}
		}

		throw new RuntimeException("Cannot find '" + effectType + "' "
				+ (markerFilter != null ? "for exon " + markerFilter.getId() : "") + ", seqChange: " + variant);
	}

	public void init() {
		// Nothing done
	}

	/**
	 * Load predictor and create interval forest
	 */
	public SnpEffectPredictor loadSnpEffectPredictor(String genome, boolean build) {
		Config config = new Config(genome);
		SnpEffectPredictor sep = SnpEffectPredictor.load(config);
		sep.createGenomicRegions();
		if (build)
			sep.buildForest();
		return sep;
	}

	public String path(String fileName) {
		return BASE_DIR + "/" + testType + "/" + pathClassName() + "/" + fileName;
	}

	protected String pathClassName() {
		String sname = this.getClass().getSimpleName();
		for (String prefix : prefixes)
			if (sname.startsWith(prefix))
				sname = sname.substring(prefix.length());
		return sname.substring(0, 1).toLowerCase() + sname.substring(1);
	}

	/**
	 * Used to migrate test files in old path
	 * 
	 * @param fileName
	 * @return
	 */
	public String pathMigrate(String fileName) {
		String dir = BASE_DIR + "/" + testType + "/" + pathClassName();
		String path = dir + "/" + fileName;
		String oldPath = BASE_DIR + "/old/" + fileName;

		if (!Gpr.exists(dir)) {
			Gpr.debug("File migration: Creating dir:" + dir);
			File d = new File(dir);
			d.mkdir();
		}

		if (!Gpr.exists(oldPath)) {
			if (Gpr.exists(oldPath + ".gz")) {
				oldPath += ".gz";
				path += ".gz";
			} else
				Gpr.debug("File migration: Cannot find original file:" + oldPath);
		}

		if (!Gpr.exists(path) && Gpr.exists(oldPath)) {
			Gpr.debug("File migration: Moving file:" + path);
			try {
				FileUtils.moveFile(new File(oldPath), new File(path));
			} catch (IOException e) {
				throw new RuntimeException("Cannot copy files:\n\tsrc: " + oldPath + "\n\tdst: " + path, e);

			}
		}
		return path;
	}

	public String showTranscripts(Genome genome) {
		return showTranscripts(genome, false);
	}

	/**
	 * Show a genome in a 'standard' way
	 */
	public String showTranscripts(Genome genome, boolean hideProtein) {
		StringBuilder sb = new StringBuilder();

		// Genome
		sb.append(genome.getVersion() + "\n");

		// Chromosomes
		for (Chromosome chr : genome)
			sb.append(chr + "\n");

		// Genes
		ArrayList genes = new ArrayList<>();

		// Sort genes
		for (Gene gene : genome.getGenes())
			genes.add(gene);
		Collections.sort(genes);

		// We don't compare protein codding in this test
		// Show genes
		for (Gene gene : genes) {

			if (hideProtein) { // Don't show protein information
				for (Transcript tr : gene.sortedStrand())
					tr.setProteinCoding(false);
			}

			sb.append(gene);

			for (Transcript tr : gene.sortedStrand()) {
				sb.append("\t\tCDS '" + tr.getId() + "': " + tr.cds() + "\n");
			}
		}

		return sb.toString();
	}

	/**
	 * Calculate snp effect for an input VCF file
	 */
	public List snpEffect(String genome, String vcfFile, String otherArgs[]) {
		return snpEffect(genome, vcfFile, otherArgs, EffFormatVersion.FORMAT_EFF_4);
	}

	/**
	 * Calculate snp effect for an input VCF file
	 */
	public List snpEffect(String genome, String vcfFile, String otherArgs[],
			EffFormatVersion effFormatVersion) {
		// Arguments
		ArrayList args = new ArrayList<>();
		if (otherArgs != null) {
			for (String a : otherArgs)
				args.add(a);
		}
		args.add(genome);
		args.add(vcfFile);

		SnpEff cmd = new SnpEff(args.toArray(new String[0]));
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setVerbose(verbose);
		cmdEff.setSupressOutput(!verbose);
		if (effFormatVersion != null)
			cmdEff.setFormatVersion(effFormatVersion);

		// Run command
		List list = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		return list;
	}

	/**
	 * Calculate snp effect for a list of snps using cancer samples
	 */
	public void snpEffectCancer(String vcfFile, String txtFile, String genome, boolean classic, String hgsvP,
			String hgvsC, String genotype, String trId) {
		// Create command
		List argList = new ArrayList<>();
		argList.add("-cancer");
		argList.add("-hgvs");
		if (classic)
			argList.add("-classic");
		if (txtFile != null) {
			argList.add("-cancerSamples");
			argList.add(txtFile);
		}
		argList.add(genome);
		argList.add(vcfFile);

		String args[] = argList.toArray(new String[0]);
		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setVerbose(verbose);
		cmdEff.setSupressOutput(!verbose);

		// Run command
		List list = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		// Find AA change for a genotype
		boolean found = false;
		for (VcfEntry vcfEntry : list) {
			if (debug)
				System.err.println(vcfEntry);
			for (VcfEffect eff : vcfEntry.getVcfEffects()) {
				if (debug)
					System.err.println("\t" + eff + "\n\t\tHGVS.p : " + eff.getHgvsProt() + "\n\t\tHGVS.c : "
							+ eff.getHgvsDna() + "\n\t\tGenotype: " + eff.getGenotype());
				if (trId != null && !trId.equals(eff.getTranscriptId()))
					continue;
				if (genotype.equals(eff.getGenotype())) {
					if (hgsvP != null)
						Assert.assertEquals(hgsvP, eff.getHgvsP());
					if (hgvsC != null)
						Assert.assertEquals(hgvsC, eff.getHgvsDna());
					found = true;
				}
			}
		}

		// Not found? Error
		if (!found)
			throw new RuntimeException("Genotype '" + genotype + "' not found.");
	}

	/**
	 * Create change effects
	 */
	LinkedList variantEffects(Variant variant, EffectType effectType, Marker marker) {
		VariantEffect changeEffect = new VariantEffect(variant);
		changeEffect.set(marker, effectType, effectType.effectImpact(), "");
		LinkedList variantEffects = new LinkedList<>();
		variantEffects.add(changeEffect);
		return variantEffects;
	}

}