All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.snpeff.snpEffect.testCases.integration.TestCasesIntegrationBase Maven / Gradle / Ivy

The newest version!
package org.snpeff.snpEffect.testCases.integration;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.Set;

import org.apache.commons.io.FileUtils;
import org.snpeff.SnpEff;
import org.snpeff.fileIterator.VcfFileIterator;
import org.snpeff.interval.Chromosome;
import org.snpeff.interval.Exon;
import org.snpeff.interval.Gene;
import org.snpeff.interval.Genome;
import org.snpeff.interval.Marker;
import org.snpeff.interval.Markers;
import org.snpeff.interval.Transcript;
import org.snpeff.interval.Variant;
import org.snpeff.snpEffect.Config;
import org.snpeff.snpEffect.EffectType;
import org.snpeff.snpEffect.Hgvs;
import org.snpeff.snpEffect.LossOfFunction;
import org.snpeff.snpEffect.SnpEffectPredictor;
import org.snpeff.snpEffect.VariantEffect;
import org.snpeff.snpEffect.VariantEffect.EffectImpact;
import org.snpeff.snpEffect.VariantEffect.ErrorWarningType;
import org.snpeff.snpEffect.VariantEffects;
import org.snpeff.snpEffect.commandLine.SnpEffCmdBuild;
import org.snpeff.snpEffect.commandLine.SnpEffCmdEff;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryEmbl;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryGenBank;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryGff3;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryGtf22;
import org.snpeff.snpEffect.factory.SnpEffPredictorFactoryRefSeq;
import org.snpeff.util.Diff;
import org.snpeff.util.Gpr;
import org.snpeff.vcf.EffFormatVersion;
import org.snpeff.vcf.VcfEffect;
import org.snpeff.vcf.VcfEntry;

import junit.framework.Assert;

/**
 * Base class: Provides common methods used for testing
 */
public class TestCasesIntegrationBase {

	public static int SHOW_EVERY = 10;
	public static final String BASE_DIR = "tests";

	public boolean debug = false;
	public boolean verbose = false || debug;

	protected boolean ignoreErrors = false;
	protected boolean shiftHgvs; // Do or do not shift variants according to HGVS notation (for test cases that
									// were created before the feature was implemented)

	public String testsDir;
	protected String testType;
	protected List prefixes;

	public TestCasesIntegrationBase() {
		super();
		init();
		testType = "integration";

		prefixes = new LinkedList<>();
		prefixes.add("TestCases");
		prefixes.add("TestsCase");
		prefixes.add("Unit");
		prefixes.add("Integration");
	}

	/**
	 * Apply a variant to a transcript
	 */
	public Transcript applyTranscript(String genome, String trId, String vcfFileName) {
		// Load database
		SnpEffectPredictor sep = loadSnpEffectPredictor(genome, false);

		// Find transcript
		Transcript tr = sep.getGenome().getGenes().findTranscript(trId);
		if (tr == null)
			throw new RuntimeException("Could not find transcript ID '" + trId + "'");

		// Apply first variant
		VcfFileIterator vcf = new VcfFileIterator(vcfFileName);
		for (VcfEntry ve : vcf) {
			for (Variant var : ve.variants()) {
				Transcript trNew = tr.apply(var);
				if (debug)
					Gpr.debug(trNew);
				return trNew;
			}
		}

		throw new RuntimeException("Could not apply any variant!");
	}

	/**
	 * Build a genome
	 */
	public SnpEffectPredictor build(String genome) {
		String args[] = { "build", genome };
		SnpEff snpeff = new SnpEff(args);
		snpeff.setVerbose(verbose);
		SnpEffCmdBuild snpeffBuild = (SnpEffCmdBuild) snpeff.cmd();
		snpeffBuild.run();
		return snpeffBuild.getConfig().getSnpEffectPredictor();
	}

	/**
	 * Build a genome from a RefSeq file and compare results to 'expected' results
	 */
	public SnpEffectPredictor buildAndCompare(String genome, String refSeqFile, String fastaFile, String resultFile,
			boolean hideProtein) {
		String expectedResult = Gpr.readFile(resultFile).trim();

		// Build
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryRefSeq factory = new SnpEffPredictorFactoryRefSeq(config);
		factory.setFileName(refSeqFile);
		factory.setVerbose(verbose);

		// Set fasta file (or don't read sequences)
		if (fastaFile != null)
			factory.setFastaFile(fastaFile);
		else
			factory.setReadSequences(false);

		SnpEffectPredictor sep = factory.create();

		// Compare result
		String result = showTranscripts(sep.getGenome(), hideProtein).trim();
		if (verbose)
			System.out.println(result);
		Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result));

		return sep;
	}

	/**
	 * Build a genome from a embl file and compare results to 'expected' results
	 */
	public SnpEffectPredictor buildEmbl(String genome, String emblFile) {
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryEmbl sepEmbl = new SnpEffPredictorFactoryEmbl(config, emblFile);
		SnpEffectPredictor sep = sepEmbl.create();
		return sep;
	}

	public SnpEffectPredictor buildGeneBank(String genome, String genBankFile) {
		return buildGeneBank(genome, genBankFile, false);
	}

	/**
	 * Build a genome from a genbank file and compare results to 'expected' results
	 */
	public SnpEffectPredictor buildGeneBank(String genome, String genBankFile, boolean circularCorrectlargeGap) {
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryGenBank sepfg = new SnpEffPredictorFactoryGenBank(config, genBankFile);
		sepfg.setVerbose(verbose);
		sepfg.setCircularCorrectLargeGap(circularCorrectlargeGap);
		SnpEffectPredictor sep = sepfg.create();
		return sep;
	}

	/**
	 * Build a genome from a GFF3 file and compare results to 'expected' results
	 */
	public SnpEffectPredictor buildGff3AndCompare(String genome, String gff3File, String resultFile, boolean readSeqs,
			boolean createRandSequences) {

		// Build
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryGff3 fgff3 = new SnpEffPredictorFactoryGff3(config);
		fgff3.setVerbose(verbose);
		if (gff3File != null)
			fgff3.setFileName(gff3File);
		fgff3.setReadSequences(readSeqs);
		fgff3.setCreateRandSequences(createRandSequences);
		fgff3.setRandom(new Random(20140410)); // Note: we want consistent results in our test cases, so we always
												// initialize the random generator in the same way

		SnpEffectPredictor sep = fgff3.create();

		if (resultFile != null) {
			// Compare result
			String result = showTranscripts(sep.getGenome()).trim();
			String expectedResult = Gpr.readFile(resultFile).trim();

			// Remove spaces and compare
			String erNs = Gpr.noSpaces(expectedResult);
			String rNs = Gpr.noSpaces(result);
			if (verbose || !erNs.equals(rNs)) {
				System.out.println("Result:\n----------\n" + result + "\n----------\n");
				System.out.println("Expected (" + resultFile + "):\n----------\n" + expectedResult + "\n----------\n");
				System.out.println(new Diff(expectedResult, result));
			}
			Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result));
		}

		return sep;
	}

	/**
	 * Build a genome from a GTF file and compare results to 'expected' results
	 */
	public void buildGtfAndCompare(String genome, String gtf22, String fastaFile, String resultFile) {
		String expectedResult = Gpr.readFile(resultFile).trim();

		// Build
		Config config = new Config(genome, Config.DEFAULT_CONFIG_FILE);
		SnpEffPredictorFactoryGtf22 fgtf22 = new SnpEffPredictorFactoryGtf22(config);
		fgtf22.setFileName(gtf22);
		fgtf22.setVerbose(verbose);

		// Set fasta file (or don't read sequences)
		if (fastaFile != null)
			fgtf22.setFastaFile(fastaFile);
		else
			fgtf22.setReadSequences(false);

		SnpEffectPredictor sep = fgtf22.create();

		// Compare result
		String result = showTranscripts(sep.getGenome(), true).trim();
		if (verbose)
			System.out.println(result);
		Assert.assertEquals(Gpr.noSpaces(expectedResult), Gpr.noSpaces(result));
	}

	Marker cdsMarker(Transcript tr) {
		int start = tr.isStrandPlus() ? tr.getCdsStart() : tr.getCdsEnd();
		int end = tr.isStrandPlus() ? tr.getCdsEnd() : tr.getCdsStart();
		return new Marker(tr.getParent(), start, end, false, "");
	}

	void checkAnnotations(SnpEffectPredictor sep, String chr, int pos, String ref, String alt, String hgvsP,
			String hgvsC, String eff) {
		Genome genome = sep.getGenome();
		Variant var = new Variant(genome.getChromosome(chr), pos, ref, alt, "");
		VariantEffects varEffs = sep.variantEffect(var);
		for (VariantEffect varEff : varEffs) {
			VcfEffect vcfEff = new VcfEffect(varEff, EffFormatVersion.FORMAT_ANN_1);
			if (verbose)
				System.out.println("\t" + vcfEff);

			Assert.assertEquals(hgvsP, vcfEff.getHgvsProt());
			Assert.assertEquals(hgvsC, vcfEff.getHgvsDna());
			Assert.assertEquals(eff, vcfEff.getEffectsStrSo());
		}
	}

	/**
	 * Check HGVS annotations
	 */
	public void checkHgvs(String genome, String vcfFile, int minCheck) {
		List list = snpEffect(genome, vcfFile, null);

		int countCheck = 0;
		for (VcfEntry ve : list) {
			if (verbose)
				System.out.println(ve);

			String transcriptId = ve.getInfo("TR");
			if (verbose)
				System.out.println("\tLooking for transcript '" + transcriptId + "'");
			for (VcfEffect veff : ve.getVcfEffects()) {

				if (veff.getTranscriptId().equals(transcriptId)) {
					if (verbose) {
						System.out.println("\t" + veff);
						System.out.println("\t\tHGVS.p: " + veff.getHgvsP() + "\t\tHGVS.c: " + veff.getHgvsC());
					}

					// Compare against expected result
					String expectedHgvsC = ve.getInfo("HGVSC");
					if (expectedHgvsC != null) {
						String actualHgvsC = veff.getHgvsC();
						Assert.assertEquals("HGVS.c mismatch", expectedHgvsC, actualHgvsC);
						countCheck++;
					}

					String expectedHgvsP = ve.getInfo("HGVSP");
					if (expectedHgvsP != null) {
						String actualHgvsP = veff.getHgvsP();
						Assert.assertEquals("HGVS.p mismatch", expectedHgvsP, actualHgvsP);
						countCheck++;
					}
				}
			}
		}

		if (verbose)
			System.out.println("Total checked: " + countCheck);
		Assert.assertTrue("Too few variants checked: " + countCheck, countCheck >= minCheck);
	}

	// Check variant's HGVS.c nomenclature
	void checkHgvscForTr(List list, String trId) {
		boolean found = false;
		for (VcfEntry ve : list) {
			if (verbose)
				System.out.println(ve);

			for (VcfEffect veff : ve.getVcfEffects()) {
				if (veff.getTranscriptId().equals(trId)) {
					if (verbose) {
						System.out.println("\t" + veff);
						System.out.println("\t\tHGVS.c: " + veff.getHgvsC());
					}

					// Compare against expected result
					String expectedHgvsC = ve.getInfo("HGVSC");
					String actualHgvsC = veff.getHgvsC();
					Assert.assertEquals(expectedHgvsC, actualHgvsC);
					found = true;
				}
			}
		}

		Assert.assertTrue("No annotations found for transcript " + trId, found);
	}

	void checkMotif(String genomeVer, String vcfFile, String effectDetails, EffectImpact impact, boolean useAnn) {
		String args[] = { "-classic", "-motif", "-ud", "0", genomeVer, vcfFile };
		String argsAnn[] = { "-ud", "0", genomeVer, vcfFile };
		if (useAnn)
			args = argsAnn;

		SnpEff cmd = new SnpEff(args);

		// Run
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setVerbose(verbose);
		cmdEff.setSupressOutput(!verbose);
		List vcfEntries = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		// Check results
		int numNextProt = 0;
		for (VcfEntry ve : vcfEntries) {
			for (VcfEffect veff : ve.getVcfEffects()) {
				if (verbose)
					System.out.println("\t" + veff.getVcfFieldString());

				// Is it motif?
				if (veff.getEffectType() == EffectType.MOTIF) {

					boolean ok = false;
					if (useAnn) {
						// Motif ID and impact match?
						ok = effectDetails.equals(veff.getFeatureId()) && (impact == veff.getImpact());
					} else {
						// Motif ID and impact match?
						ok = effectDetails.equals(veff.getEffectDetails()) && (impact == veff.getImpact());
					}

					if (ok)
						numNextProt++;
				}
			}
		}

		Assert.assertEquals(1, numNextProt);
	}

	void checkNextProt(String genomeVer, String vcfFile, String effectDetails, EffectImpact impact, boolean useAnn) {
		String args[] = { "-classic", "-nextProt", genomeVer, vcfFile };
		String argsAnn[] = { genomeVer, vcfFile };
		if (useAnn)
			args = argsAnn;

		SnpEff cmd = new SnpEff(args);
		cmd.setVerbose(verbose);
		cmd.setSupressOutput(!verbose);

		// Run
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		List vcfEntries = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		// Check results
		int numNextProt = 0;
		for (VcfEntry ve : vcfEntries) {
			for (VcfEffect veff : ve.getVcfEffects()) {

				if ((veff.hasEffectType(EffectType.NEXT_PROT)) // Is it nextProt?
						&& (impact == veff.getImpact())) // Is impact OK?
				{
					// Are details OK?
					boolean match = false;
					if (!useAnn && effectDetails.equals(veff.getEffectDetails()))
						match = true;
					if (useAnn && effectDetails.equals(veff.getFeatureType()))
						match = true;

					if (match)
						numNextProt++;
				}

				if (verbose) //
					System.out.println("\t" + veff //
							+ "\n\t\tEffect            : " + veff.getVcfFieldString() //
							+ "\n\t\tEffect type       : " + veff.getEffectType() //
							+ "\n\t\tEffect details    : '" + veff.getEffectDetails() + "'" //
							+ "\n\t\tEffect impact     : '" + veff.getImpact() + "'" //
							+ "\n\t\tExpected details  : '" + effectDetails + "'" //
							+ "\n\t\tExpected impact   : '" + impact + "'" //
							+ "\n\t\tCount matches     : " + numNextProt //
							+ "\thasEffectType : " + veff.hasEffectType(EffectType.NEXT_PROT) //
							+ "\tmatch details : " + effectDetails.equals(veff.getEffectDetails()) //
							+ "\tmatch impact: " + (impact == veff.getImpact()) //
					);
			}
		}

		Assert.assertEquals(1, numNextProt);
	}

	/**
	 * Check that NMD works for a given transcript
	 */
	void checkNmd(Config config, Gene gene, Transcript tr) {
		int pos = 0;
		boolean isNmd[] = new boolean[tr.cds().length()];
		HashSet codingExons = new HashSet<>();

		StringBuilder nmdStr = new StringBuilder();
		StringBuilder nmdStrSimple = new StringBuilder();
		for (Exon exon : tr.sortedStrand()) {
			int step = exon.isStrandPlus() ? 1 : -1;
			int from = exon.isStrandPlus() ? exon.getStart() : exon.getEnd();

			for (int expos = from; (exon.getStart() <= expos) && (expos <= exon.getEnd()); expos += step) {
				// Not in UTR? => Test
				if (!tr.isUtr(expos)) {
					codingExons.add(exon);

					// Create a seqChange
					// SeqChange seqChange = new SeqChange(tr.getChromosome(), expos, expos, "");
					Variant variant = new Variant(tr.getChromosome(), expos, "A", "C"); // Create a seqChange

					// Create a STOP_GAIN effect
					VariantEffect variantEffect = new VariantEffect(variant);
					variantEffect.set(exon, EffectType.STOP_GAINED, EffectType.STOP_GAINED.effectImpact(), "");
					LinkedList changeEffects = new LinkedList<>();
					changeEffects.add(variantEffect);

					// Create a LOF object and analyze the effect
					LossOfFunction lof = new LossOfFunction(config, changeEffects);
					isNmd[pos] = lof.isNmd();

					nmdStr.append(isNmd[pos] ? '+' : '.');
					nmdStrSimple.append(isNmd[pos] ? '+' : '.');
					pos++;
				} else
					nmdStr.append('U');
			}
			nmdStr.append('\t');
			nmdStrSimple.append('\t');
		}

		// Show string
		if (verbose)
			System.err.println(nmdStr);
		if (debug)
			System.err.println("\tCoding Exons:" + codingExons.size());

		// ---
		// Check that NMP prediction is 'correct'
		// ---
		// We need a splice event in the coding part
		if (codingExons.size() > 1) {
			// Use the 'simple' string to check
			StringBuilder sb = new StringBuilder();
			String ex[] = nmdStrSimple.toString().split("\t");
			for (int i = 0; i < (ex.length - 1); i++)
				sb.append(ex[i]);

			// Check that last 50 bases are '.'
			String simpleNoLast = sb.toString();
			int lastNmd = Math.max(0, simpleNoLast.length() - LossOfFunction.MND_BASES_BEFORE_LAST_JUNCTION);
			String points = simpleNoLast.substring(lastNmd) + ex[ex.length - 1];
			String plus = simpleNoLast.substring(0, lastNmd);

			if (debug)
				System.err.println("\tPoints: " + points + "\n\tPlus :" + plus);

			// Check
			Assert.assertEquals(0, points.replace('.', ' ').trim().length());
			Assert.assertEquals(0, plus.replace('+', ' ').trim().length());
		}
	}

	public void checkNoChange(String args[]) {
		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff snpeff = (SnpEffCmdEff) cmd.cmd();
		snpeff.setVerbose(verbose);
		snpeff.setSupressOutput(!verbose);
		List vcfEntries = snpeff.run(true);

		for (VcfEntry ve : vcfEntries) {
			if (verbose)
				System.out.println(ve);
			for (VcfEffect veff : ve.getVcfEffects()) {
				EffectImpact imp = veff.getImpact();
				if (verbose)
					System.out.println("\t" + imp + "\t" + veff);
				Assert.assertEquals(EffectImpact.MODIFIER, imp);
			}
		}
	}

	/**
	 * Run a predictor and check if the expected warnings appear
	 */
	public void checkTranscriptError(String args[], ErrorWarningType warningType) {
		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff snpeff = (SnpEffCmdEff) cmd.cmd();
		snpeff.setVerbose(verbose);
		snpeff.setSupressOutput(!verbose);
		List vcfEntries = snpeff.run(true);

		boolean hasWarning = false;
		for (VcfEntry ve : vcfEntries) {
			if (verbose)
				System.out.println(ve);
			for (VcfEffect veff : ve.getVcfEffects()) {
				EffectImpact imp = veff.getImpact();
				if (verbose)
					System.out.println("\t" + imp + "\t" + veff);

				// Check if the warning type we expect is there
				if (veff.getErrorsWarning() != null)
					hasWarning |= veff.getErrorsWarning().indexOf(warningType.toString()) >= 0;
			}
		}

		Assert.assertEquals(true, hasWarning);
	}

	public void compareHgvs(String genome, String vcfFileName) {
		compareHgvs(genome, vcfFileName, true);
	}

	public void compareHgvs(String genome, String vcfFileName, boolean compareProt) {
		// Create SnpEff
		String args[] = { genome, vcfFileName };
		SnpEffCmdEff snpeff = new SnpEffCmdEff();
		snpeff.parseArgs(args);
		snpeff.setDebug(debug);
		snpeff.setVerbose(verbose);
		snpeff.setSupressOutput(!verbose);
		snpeff.setUpDownStreamLength(0);
		snpeff.setShiftHgvs(shiftHgvs);
		snpeff.setFormatVersion(EffFormatVersion.FORMAT_EFF_4);

		// Run & get result (single line)
		List results = snpeff.run(true);
		Set trNotFoundSet = new HashSet<>();

		// Make sure entries are annotated as expected
		int countOkC = 0, countErrC = 0, countOkP = 0, countErrP = 0, countTrFound = 0;
		for (VcfEntry ve : results) {
			// Extract expected HGVS values
			String hgvsCexp = ve.getInfo("HGVS_C") != null ? ve.getInfo("HGVS_C") : "";
			String trIdC = Hgvs.parseTranscript(hgvsCexp);
			hgvsCexp = Hgvs.removeTranscript(hgvsCexp);

			String hgvsPexp = "";
			String trIdP = "";
			if (compareProt) {
				hgvsPexp = ve.getInfo("HGVS_P") != null ? ve.getInfo("HGVS_P") : "";
				trIdP = Hgvs.parseTranscript(hgvsPexp);
				hgvsPexp = Hgvs.removeTranscript(hgvsPexp);
			}

			if (verbose) {
				System.out.println(ve);
				if (trIdC != null)
					System.out.println("\tExpected HGVS_C: " + trIdC + ":" + hgvsCexp);
				if (trIdP != null)
					System.out.println("\tExpected HGVS_P: " + trIdP + ":" + hgvsPexp + "\n");
			}

			// Check all effects
			boolean okC = false, okP = false, trFound = false;
			for (VcfEffect veff : ve.getVcfEffects()) {
				// Parse calculated HGVS values
				String trId = veff.getTranscriptId();
				String hgvsCactual = veff.getHgvsDna() != null ? veff.getHgvsDna() : "";
				String hgvsPactual = veff.getHgvsProt() != null ? veff.getHgvsProt() : "";

				// Compare results for HGVS_DNA
				boolean foundC = false, foundP = false;
				if (trId != null && trId.equals(trIdC)) {
					trFound = true;
					if (!hgvsCexp.equals(hgvsCactual)) {
						if (!ignoreErrors)
							Assert.assertEquals(hgvsCexp, hgvsCactual);
						countErrC++;
					} else {
						okC = foundC = true;
						countOkC++;
					}
				}

				// Compare results for HGVS_PROT
				if (compareProt && trId != null && trId.equals(trIdP)) {
					if (!hgvsPexp.equals(hgvsPactual)) {
						if (!ignoreErrors)
							Assert.assertEquals(hgvsPexp, hgvsPactual);
						countErrP++;
					} else {
						okP = foundP = true;
						countOkP++;
					}
				}

				if (verbose) {
					System.out.println("\t" + veff //
							+ "\n\t\tEFF    : " + veff.getEffectsStr() //
							+ "\n\t\tHGVS_C : " + trId + ":" + hgvsCactual + "\t\tExpected: " + trIdC + ":" + hgvsCexp
							+ "\t" + (foundC ? "OK" : "NO") //
							+ (compareProt
									? "\n\t\tHGVS_P : " + trId + ":" + hgvsPactual + "\t\tExpected: " + trIdP + ":"
											+ hgvsPexp + "\t" + (foundP ? "OK" : "NO")
									: "") //
							+ "\n");
				}

			}

			if (!trFound) {
				System.out.println("Transcript '" + trIdC + "' not found.");
				countTrFound++;
				trNotFoundSet.add(trIdC);
			}

			if (!ignoreErrors) {
				Assert.assertTrue("HGVS (DNA) not found: '" + hgvsCexp + "'", okC);
				if (!hgvsPexp.isEmpty())
					Assert.assertTrue("HGVS (Protein) not found: '" + hgvsPexp + "'", okP);
			} else {
				// Show errors
				if (!okC)
					System.err.println("HGVS (DNA) not found : '" + hgvsCexp + "', vcf entry:\t" + ve);
				if (compareProt && !okP)
					System.err.println("HGVS (Prot) not found: '" + hgvsPexp + "', vcf entry:\t" + ve);
			}
		}

		if (verbose || ignoreErrors) {
			System.out.println("Count OKs   :\tHGVS (DNA): " + countOkC + "\tHGVS (Protein): " + countOkP);
			System.out.println("Count Errors:\tHGVS (DNA): " + countErrC + "\tHGVS (Protein): " + countErrP);
			System.out.println("Transcripts not found:\t" + countTrFound + ", unique: " + trNotFoundSet.size() + "\n"
					+ trNotFoundSet);
		}
	}

	/**
	 * Compare with results from ENSEMBL's VEP on transcript ENST00000268124
	 */
	public void compareVep(String genome, String vcf, String trId) {
		String args[] = { "-classic", genome, vcf };

		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setSupressOutput(!verbose);

		List vcfEnties = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		for (VcfEntry ve : vcfEnties) {

			StringBuilder msg = new StringBuilder();

			// Check effects
			boolean ok = false;
			for (VcfEffect veff : ve.getVcfEffects()) {
				// Find transcript
				if (veff.getTranscriptId() != null && veff.getTranscriptId().equals(trId)) {
					// Check that reported effect is the same
					String vep = ve.getInfo("EFF_V");
					String eff = veff.getEffectType().toString();

					if (vep.equals(eff))
						ok = true;
					else {
						if (vep.equals("CODON_INSERTION") && eff.equals("CODON_CHANGE_PLUS_CODON_INSERTION"))
							ok = true; // OK. I consider these the same
						else if (vep.equals("STOP_GAINED,CODON_INSERTION") && eff.equals("STOP_GAINED"))
							ok = true; // OK. I consider these the same
						else if (eff.equals("SPLICE_SITE_REGION"))
							ok = true; // OK. I'm not checking these
						else {
							String line = "\n" + ve + "\n\tSnpEff:" + veff + "\n\tVEP   :" + ve.getInfo("EFF_V") + "\t"
									+ ve.getInfo("AA") + "\t" + ve.getInfo("CODON") + "\n";
							msg.append(line);
						}
					}
				}
			}

			if (!ok)
				throw new RuntimeException(msg.toString());
		}
	}

	/**
	 * Benchmarking: Compare with results from ENSEMBL's VEP
	 */
	public void compareVepSO(String genome, String vcf, String trId) {
		String args[] = { "-classic", "-sequenceOntology", genome, vcf };

		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setVerbose(verbose);
		cmdEff.setSupressOutput(!verbose);

		List vcfEnties = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		for (VcfEntry ve : vcfEnties) {
			// Create a set of found variants
			HashSet vepSos = new HashSet<>();
			String vepSo = ve.getInfo("SO");
			for (String so : vepSo.split(",")) {
				if (so.equals("feature_elongation"))
					so = null; // This one is useless, if the variant is an insertion, it obviously causes an
								// elongation
				else if (so.equals("feature_truncation"))
					so = null; // This one is useless, if the variant is an insertion, it obviously causes an
								// elongation

				if (so != null)
					vepSos.add(so);
			}

			// Get effects for transcript 'trId'
			HashSet effSos = new HashSet<>();
			List veffs = ve.getVcfEffects();
			for (VcfEffect veff : veffs) {
				if (veff.getTranscriptId().equals(trId)) {
					String effs = veff.getEffString();

					for (String eff : effs.split("\\" + EffFormatVersion.EFFECT_TYPE_SEPARATOR_OLD)) {
						// OK. I consider these the same
						if (eff.equals("5_prime_UTR_premature_start_codon_gain_variant"))
							eff = "5_prime_UTR_variant";
						if (eff.equals("disruptive_inframe_insertion"))
							eff = "inframe_insertion";
						if (eff.equals("conservative_inframe_insertion"))
							eff = "inframe_insertion";
						if (eff.equals("start_lost"))
							eff = "initiator_codon_variant";
						effSos.add(eff);
					}
				}
			}

			// Make sure both sets are equal
			boolean error = !effSos.containsAll(vepSos);

			if (error) {
				String msg = "\n" + ve;
				msg += "\n\tSnpEff    : ";
				for (String e : effSos)
					msg += e + " ";

				msg += "\n\tVEP       : ";
				for (String e : vepSos)
					msg += e + " ";

				msg += "\n\tMarker    : " + ve.getChromosomeName() + ":" + ve.getStart() + "-" + ve.getEnd();
				Gpr.debug(msg);
				throw new RuntimeException(msg);
			}

		}
	}

	/**
	 * Find a marker that intersects variant
	 */
	Marker findMarker(SnpEffectPredictor sep, Variant variant, EffectType effectType, Transcript tr,
			Marker markerFilter) {
		Markers markers = sep.queryDeep(variant);

		for (Marker m : markers) {
			Marker mfilter = null;
			if (markerFilter != null)
				mfilter = (Marker) m.findParent(markerFilter.getClass());

			Transcript mtr = (Transcript) m.findParent(Transcript.class);

			if (debug)
				Gpr.debug("\tLooking for '" + effectType + "' in '"
						+ (markerFilter != null ? markerFilter.getId() : "NULL") //
						+ "', class: " + (markerFilter != null ? markerFilter.getClass().getSimpleName() : "") //
						+ "\t\tFound: '" + m.getType() + "', mfilter: " + (mfilter != null ? mfilter.getId() : "NULL") //
						+ ", parent: " + m.getParent().getClass().getSimpleName() //
				);

			if ((m.getType() == effectType) && (mfilter != null) && (mtr != null)) {
				if (markerFilter != null) {
					// Id filter?
					if (mfilter.getId().equals(markerFilter.getId()))
						return m;
				} else if (tr != null) {
					// Transcript filter?
					if (mtr.getId().equals(tr.getId()))
						return m;
				} else
					return m; // No exon reference? => just return this
			}
		}

		throw new RuntimeException("Cannot find '" + effectType + "' "
				+ (markerFilter != null ? "for exon " + markerFilter.getId() : "") + ", seqChange: " + variant);
	}

	public void init() {
		// Nothing done
	}

	/**
	 * Load predictor and create interval forest
	 */
	public SnpEffectPredictor loadSnpEffectPredictor(String genome, boolean build) {
		Config config = new Config(genome);
		SnpEffectPredictor sep = SnpEffectPredictor.load(config);
		sep.createGenomicRegions();
		if (build)
			sep.buildForest();
		return sep;
	}

	public String path(String fileName) {
		return BASE_DIR + "/" + testType + "/" + pathClassName() + "/" + fileName;
	}

	protected String pathClassName() {
		String sname = this.getClass().getSimpleName();
		for (String prefix : prefixes)
			if (sname.startsWith(prefix))
				sname = sname.substring(prefix.length());
		return sname.substring(0, 1).toLowerCase() + sname.substring(1);
	}

	/**
	 * Used to migrate test files in old path
	 * 
	 * @param fileName
	 * @return
	 */
	public String pathMigrate(String fileName) {
		String dir = BASE_DIR + "/" + testType + "/" + pathClassName();
		String path = dir + "/" + fileName;
		String oldPath = BASE_DIR + "/old/" + fileName;

		if (!Gpr.exists(dir)) {
			Gpr.debug("File migration: Creating dir:" + dir);
			File d = new File(dir);
			d.mkdir();
		}

		if (!Gpr.exists(oldPath)) {
			if (Gpr.exists(oldPath + ".gz")) {
				oldPath += ".gz";
				path += ".gz";
			} else
				Gpr.debug("File migration: Cannot find original file:" + oldPath);
		}

		if (!Gpr.exists(path) && Gpr.exists(oldPath)) {
			Gpr.debug("File migration: Moving file:" + path);
			try {
				FileUtils.moveFile(new File(oldPath), new File(path));
			} catch (IOException e) {
				throw new RuntimeException("Cannot copy files:\n\tsrc: " + oldPath + "\n\tdst: " + path, e);

			}
		}
		return path;
	}

	public String showTranscripts(Genome genome) {
		return showTranscripts(genome, false);
	}

	/**
	 * Show a genome in a 'standard' way
	 */
	public String showTranscripts(Genome genome, boolean hideProtein) {
		StringBuilder sb = new StringBuilder();

		// Genome
		sb.append(genome.getVersion() + "\n");

		// Chromosomes
		for (Chromosome chr : genome)
			sb.append(chr + "\n");

		// Genes
		ArrayList genes = new ArrayList<>();

		// Sort genes
		for (Gene gene : genome.getGenes())
			genes.add(gene);
		Collections.sort(genes);

		// We don't compare protein codding in this test
		// Show genes
		for (Gene gene : genes) {

			if (hideProtein) { // Don't show protein information
				for (Transcript tr : gene.sortedStrand())
					tr.setProteinCoding(false);
			}

			sb.append(gene);

			for (Transcript tr : gene.sortedStrand()) {
				sb.append("\t\tCDS '" + tr.getId() + "': " + tr.cds() + "\n");
			}
		}

		return sb.toString();
	}

	/**
	 * Calculate snp effect for an input VCF file
	 */
	public List snpEffect(String genome, String vcfFile, String otherArgs[]) {
		return snpEffect(genome, vcfFile, otherArgs, EffFormatVersion.FORMAT_EFF_4);
	}

	/**
	 * Calculate snp effect for an input VCF file
	 */
	public List snpEffect(String genome, String vcfFile, String otherArgs[],
			EffFormatVersion effFormatVersion) {
		// Arguments
		ArrayList args = new ArrayList<>();
		if (otherArgs != null) {
			for (String a : otherArgs)
				args.add(a);
		}
		args.add(genome);
		args.add(vcfFile);

		SnpEff cmd = new SnpEff(args.toArray(new String[0]));
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setVerbose(verbose);
		cmdEff.setSupressOutput(!verbose);
		if (effFormatVersion != null)
			cmdEff.setFormatVersion(effFormatVersion);

		// Run command
		List list = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		return list;
	}

	/**
	 * Calculate snp effect for a list of snps using cancer samples
	 */
	public void snpEffectCancer(String vcfFile, String txtFile, String genome, boolean classic, String hgsvP,
			String hgvsC, String genotype, String trId) {
		// Create command
		List argList = new ArrayList<>();
		argList.add("-cancer");
		argList.add("-hgvs");
		if (classic)
			argList.add("-classic");
		if (txtFile != null) {
			argList.add("-cancerSamples");
			argList.add(txtFile);
		}
		argList.add(genome);
		argList.add(vcfFile);

		String args[] = argList.toArray(new String[0]);
		SnpEff cmd = new SnpEff(args);
		SnpEffCmdEff cmdEff = (SnpEffCmdEff) cmd.cmd();
		cmdEff.setVerbose(verbose);
		cmdEff.setSupressOutput(!verbose);

		// Run command
		List list = cmdEff.run(true);
		Assert.assertTrue("Errors while executing SnpEff", cmdEff.getTotalErrs() <= 0);

		// Find AA change for a genotype
		boolean found = false;
		for (VcfEntry vcfEntry : list) {
			if (debug)
				System.err.println(vcfEntry);
			for (VcfEffect eff : vcfEntry.getVcfEffects()) {
				if (debug)
					System.err.println("\t" + eff + "\n\t\tHGVS.p : " + eff.getHgvsProt() + "\n\t\tHGVS.c : "
							+ eff.getHgvsDna() + "\n\t\tGenotype: " + eff.getGenotype());
				if (trId != null && !trId.equals(eff.getTranscriptId()))
					continue;
				if (genotype.equals(eff.getGenotype())) {
					if (hgsvP != null)
						Assert.assertEquals(hgsvP, eff.getHgvsP());
					if (hgvsC != null)
						Assert.assertEquals(hgvsC, eff.getHgvsDna());
					found = true;
				}
			}
		}

		// Not found? Error
		if (!found)
			throw new RuntimeException("Genotype '" + genotype + "' not found.");
	}

	/**
	 * Create change effects
	 */
	LinkedList variantEffects(Variant variant, EffectType effectType, Marker marker) {
		VariantEffect changeEffect = new VariantEffect(variant);
		changeEffect.set(marker, effectType, effectType.effectImpact(), "");
		LinkedList variantEffects = new LinkedList<>();
		variantEffects.add(changeEffect);
		return variantEffects;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy