All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.snpeff.snpEffect.testCases.integration.TestCasesIntegrationApply Maven / Gradle / Ivy

The newest version!
package org.snpeff.snpEffect.testCases.integration;

import java.util.Random;

import org.junit.Assert;
import org.junit.Test;
import org.snpeff.interval.Exon;
import org.snpeff.interval.Gene;
import org.snpeff.interval.Genome;
import org.snpeff.interval.Transcript;
import org.snpeff.interval.Variant;
import org.snpeff.snpEffect.Config;
import org.snpeff.snpEffect.SnpEffectPredictor;
import org.snpeff.util.Gpr;
import org.snpeff.util.GprSeq;

/**
 * Test 'apply' method (apply variant to marker)
 *
 * @author pcingola
 */
public class TestCasesIntegrationApply extends TestCasesIntegrationBase {

	public static int SHOW_EVERY = 10;

	public TestCasesIntegrationApply() {
		super();
	}

	/**
	 * Test 'apply' on exons (test sequence changes)
	 * Only using SNPs variants
	 */
	@Test
	public void test_01_Exon_SNPs() {
		Gpr.debug("Test");
		Config config = new Config("testHg3765Chr22");
		SnpEffectPredictor snpEffectPredictor = config.loadSnpEffectPredictor();

		Random random = new Random(20130214);

		// All genes
		Genome genome = snpEffectPredictor.getGenome();
		int count = 1;
		for (Gene g : genome.getGenes()) {

			if (g.isProteinCoding()) { // Only protein coding ones...
				if (verbose) System.out.println(g.getGeneName());

				// All transcripts
				for (Transcript t : g) {
					if (verbose) System.out.println("\t" + t.getId());

					// All exons
					for (Exon ex : t) {
						// Positive strand sequence
						String seq = ex.getSequence();
						seq = ex.isStrandPlus() ? seq : GprSeq.reverseWc(seq);

						// Skip some exons, otherwise test takes too much time
						if (random.nextInt(10) > 1) continue; // Randomly some exons
						if (ex.size() > 1000) continue; // Skip exon if too long

						if (verbose) System.out.println("\t\t" + ex.getId() + "\tStrand: " + ex.getStrand() + "\tSize: " + ex.size());

						// Change each base
						for (int i = ex.getStart(), idx = 0; i < ex.getEnd(); i++, idx++) {
							// Create a fake SNP. Random REF and ALT bases
							char ref = Character.toUpperCase(seq.charAt(idx));
							char alt = ref;
							do {
								alt = GprSeq.randBase(random);
							} while (ref == alt);

							// Resulting sequence
							String altStr = alt + "";
							String newSeq = seq.substring(0, idx) + altStr + seq.substring(idx + 1);
							newSeq = ex.isStrandPlus() ? newSeq : GprSeq.reverseWc(newSeq);
							newSeq = newSeq.toLowerCase();

							Variant variant = new Variant(t.getChromosome(), i, ref + "", alt + "", "");

							Exon exNew = ex.apply(variant);

							if (!exNew.getSequence().equals(newSeq)) throw new RuntimeException("Error:" //
									+ "\n\t\tVariant   : " + variant //
									+ "\n\t\tOriginal  : " + ex //
									+ "\n\t\tNew       : " + exNew //
									+ "\n\t\tNew seq   : " + newSeq //
							);

							Gpr.showMark(count++, 1000);
						}
					}
				}
			}
		}
		System.err.println("");
	}

	/**
	 * Test 'apply' on exons (test sequence changes)
	 * Only using insertion variants
	 */
	@Test
	public void test_02_Exon_INS() {
		Gpr.debug("Test");
		Config config = new Config("testHg3765Chr22");
		SnpEffectPredictor snpEffectPredictor = config.loadSnpEffectPredictor();

		Random random = new Random(20130214);

		// All genes
		Genome genome = snpEffectPredictor.getGenome();
		int count = 1;
		for (Gene g : genome.getGenes()) {

			if (g.isProteinCoding()) { // Only protein coding ones...
				if (verbose) System.out.println(g.getGeneName());

				// All transcripts
				for (Transcript t : g) {
					if (verbose) System.out.println("\t" + t.getId());

					// All exons
					for (Exon ex : t) {
						// Positive strand sequence
						String seq = ex.getSequence();
						seq = ex.isStrandPlus() ? seq : GprSeq.reverseWc(seq);

						// Skip some exons, otherwise test takes too much time
						if (random.nextInt(10) > 1) continue; // Randomly some exons
						if (ex.size() > 1000) continue; // Skip exon if too long

						if (verbose) System.out.println("\t\t" + ex.getId() + "\tStrand: " + ex.getStrand() + "\tSize: " + ex.size());

						// Change each base
						for (int i = ex.getStart(), idx = 0; i < ex.getEnd(); i++, idx++) {
							// Create a fake INS.

							// Random ALT
							int insLen = 1 + random.nextInt(8);
							StringBuilder altsb = new StringBuilder();
							for (int j = 0; j < insLen; j++)
								altsb.append(GprSeq.randBase(random));

							// Resulting sequence
							String newSeq;
							if (idx > 0) newSeq = seq.substring(0, idx) + altsb.toString() + seq.substring(idx);
							else newSeq = altsb.toString() + seq;
							newSeq = ex.isStrandPlus() ? newSeq : GprSeq.reverseWc(newSeq);
							newSeq = newSeq.toLowerCase();

							Variant variant = new Variant(t.getChromosome(), i, "", altsb.toString(), "");
							if (debug) Gpr.debug("variant: " + variant.getVariantType() + "\t" + variant);

							Exon exNew = ex.apply(variant);

							if (!exNew.getSequence().equals(newSeq)) {
								String msg = "Error:" //
										+ "\n\t\tIndex               : " + idx //
										+ "\n\t\tVariant             : " + variant //
										+ "\n\t\tOriginal            : " + ex //
										+ "\n\t\tSequence (expected) : " + newSeq + "'" //
										+ "\n\t\tSequence            : " + exNew.getSequence() + "'" //
										;
								System.err.println(msg);
								throw new RuntimeException(msg);
							}

							Assert.assertEquals(newSeq, exNew.getSequence());

							Gpr.showMark(count++, 1000);
						}
					}
				}
			}
		}
		System.err.println("");
	}

	/**
	 * Test 'apply' on exons (test sequence changes)
	 * Only using deletions variants
	 */
	@Test
	public void test_03_Exon_DEL() {
		Gpr.debug("Test");
		Config config = new Config("testHg3765Chr22");
		SnpEffectPredictor snpEffectPredictor = config.loadSnpEffectPredictor();

		Random random = new Random(20130214);

		// All genes
		Genome genome = snpEffectPredictor.getGenome();
		int count = 1;
		for (Gene g : genome.getGenes()) {

			if (g.isProteinCoding()) { // Only protein coding ones...
				if (verbose) System.out.println(g.getGeneName());

				// All transcripts
				for (Transcript t : g) {
					if (verbose) System.out.println("\t" + t.getId());

					// All exons
					for (Exon ex : t) {
						// Positive strand sequence
						String seq = ex.getSequence();
						seq = ex.isStrandPlus() ? seq : GprSeq.reverseWc(seq);

						// Skip some exons, otherwise test takes too much time
						if (random.nextInt(10) > 1) continue; // Randomly some exons
						if (ex.size() > 1000) continue; // Skip exon if too long

						if (verbose) System.out.println("\t\t" + ex.getId() + "\tStrand: " + ex.getStrand() + "\tSize: " + ex.size());

						// Change each base
						for (int i = ex.getStart(), idx = 0; i < ex.getEnd(); i++, idx++) {
							// Create a fake DEL:  Random REF (since it's a deletion, alt="")
							int delLen = 1 + random.nextInt(8);
							int end = idx + delLen;
							String ref = end < seq.length() ? seq.substring(idx, end) : seq.substring(idx);

							// Resulting sequence
							String newSeq = "";
							if (idx > 0) newSeq = seq.substring(0, idx);
							newSeq += (end < seq.length() ? seq.substring(end) : "");

							newSeq = ex.isStrandPlus() ? newSeq : GprSeq.reverseWc(newSeq);
							newSeq = newSeq.toLowerCase();

							Variant variant = new Variant(t.getChromosome(), i, ref, "", "");
							if (debug) Gpr.debug("variant: " + variant.getVariantType() + "\t" + variant);

							Exon exNew = ex.apply(variant);

							String newExSeq = (exNew != null ? exNew.getSequence() : "");
							Assert.assertEquals(newSeq, newExSeq);
							if (!newExSeq.equals(newSeq)) {
								String msg = "Error:" //
										+ "\n\t\tVariant   : " + variant //
										+ "\n\t\tOriginal  : " + ex //
										+ "\n\t\tNew       : " + exNew //
										+ "\n\t\tNew seq   : " + newSeq //
										;
								System.err.println(msg);
								throw new RuntimeException(msg);
							}

							Gpr.showMark(count++, 1000);

						}
					}
				}
			}
		}
		System.err.println("");

	}

	/**
	 * Test 'apply' on exons (test sequence changes)
	 * Only using deletions variants
	 */
	@Test
	public void test_04_Exon_MNP() {
		Gpr.debug("Test");
		Config config = new Config("testHg3765Chr22");
		SnpEffectPredictor snpEffectPredictor = config.loadSnpEffectPredictor();

		Random random = new Random(20130214);

		// All genes
		Genome genome = snpEffectPredictor.getGenome();
		int count = 1;
		for (Gene g : genome.getGenes()) {

			if (g.isProteinCoding()) { // Only protein coding ones...
				if (verbose) System.out.println(g.getGeneName());

				// All transcripts
				for (Transcript t : g) {
					if (verbose) System.out.println("\t" + t.getId());

					// All exons
					for (Exon ex : t) {
						// Positive strand sequence
						String seq = ex.getSequence();
						seq = ex.isStrandPlus() ? seq : GprSeq.reverseWc(seq);

						// Skip some exons, otherwise test takes too much time
						if (random.nextInt(10) > 1) continue; // Randomly some exons
						if (ex.size() > 1000) continue; // Skip exon if too long

						if (verbose) System.out.println("\t\t" + ex.getId() + "\tStrand: " + ex.getStrand() + "\tSize: " + ex.size());

						// Change each base
						for (int i = ex.getStart(), idx = 0; i < ex.getEnd(); i++, idx++) {
							// Create a fake MNP. Random REF and ALT bases
							int len = random.nextInt(9) + 1;
							int end = idx + len;

							StringBuilder altsb, refsb;
							while (true) {
								altsb = new StringBuilder();
								refsb = new StringBuilder();
								for (int j = 0; j < len; j++) {
									if ((idx + j) < seq.length()) refsb.append(seq.charAt(idx + j));
									else refsb.append(GprSeq.randBase(random));
									altsb.append(GprSeq.randBase(random));
								}

								if (refsb.length() != altsb.length()) throw new RuntimeException("This should never happen!");

								// Are we satisfied with this MNP?
								String alt = altsb.toString().toUpperCase();
								String ref = refsb.toString().toUpperCase();
								if (!alt.equals(ref) && alt.charAt(0) != ref.charAt(0)) break;
							}

							// Resulting sequence
							String newSeq = "";
							if (idx > 0) newSeq = seq.substring(0, idx);
							int maxlen = Math.min(ex.getEnd() - i + 1, len);
							newSeq += altsb.substring(0, maxlen);
							newSeq += (end < seq.length() ? seq.substring(end) : "");

							newSeq = ex.isStrandPlus() ? newSeq : GprSeq.reverseWc(newSeq);
							newSeq = newSeq.toLowerCase();

							// Create variant and apply
							Variant variant = new Variant(t.getChromosome(), i, refsb.toString(), altsb.toString(), "");
							if (debug) Gpr.debug("variant: " + variant);
							Exon exNew = ex.apply(variant);

							// Check
							if (!exNew.getSequence().equals(newSeq)) {
								String msg = "Error:" //
										+ "\n\t\tVariant   : " + variant //
										+ "\n\t\tOriginal  : " + ex //
										+ "\n\t\tNew       : " + exNew //
										+ "\n\t\tNew seq   : " + newSeq //
										;
								System.err.println(msg);
								throw new RuntimeException(msg);
							}

							Gpr.showMark(count++, 1000);
						}
					}
				}
			}
		}
		System.err.println("");
	}

	/**
	 * Exon completely removed by a deletion.
	 * Bug triggers a null pointer on 'Transcript.apply()': Fixed
	 */
	@Test
	public void test_apply_05_delete_whole_exon() {
		Gpr.debug("Test");
		snpEffect("testHg19Chr1", path("test_apply_05_delete_whole_exon.vcf"), null);
	}

	/**
	 * Upstream region is completely removed by a deletion.
	 * Bug triggers a null pointer: Fixed
	 */
	@Test
	public void test_apply_06_delete_upstream() {
		Gpr.debug("Test");
		Transcript trNew = applyTranscript("testHg19Chr11", "NM_001004460.1", path("test_apply_06_delete_upstream.vcf"));

		// Check expected sequence
		Assert.assertEquals("atgagcttctcttccctgcctactgaaatacagtcattactctttctgacatttctaaccatctacctggtcaccctgatgggaaactgcctcatcattctggttaccctagctgaccccatgctacacagccccatgtacttcttcctcagaaacttatctttcctggagattggcttcaacctagtcattgtgcccaaaatgctggggaccctgcttgcccaggacacaaccatctccttccttggctgtgccactcagatgtatttcttcttct" //
				, trNew.mRna());
	}

	/**
	 * Upstream region is completely removed by a deletion.
	 * Bug triggers a null pointer: Fixed
	 */
	@Test
	public void test_apply_07_delete_upstream() {
		Gpr.debug("Test");
		Transcript trNew = applyTranscript("testHg3775Chr11", "ENST00000379829", path("test_apply_07_delete_upstream.vcf"));

		// Check expected sequence
		Assert.assertEquals("ttttttggggctgctgagtgctgcctcctggccaccatggcatatgaccgctacgtggccatctgtgaccccttgcactacccagtcatcatgggccacatatcctgtgcccagctggcagctgcctcttggttctcagggttttcagtggccactgtgcaaaccacatggattttcagtttccctttttgtggccccaacagggtgaaccacttcttctgtgacagccctcctgttattgcactggtctgtgctgacacctctgtgtttgaactggaggctctgacagccactgtcctattcattctctttcctttcttgctgatcctgggatcctatgtccgcatcctctccactatcttcaggatgccgtcagctgaggggaaacatcaggcattctccacctgttccgcccacctcttggttgtctctctcttctatagcactgccatcctcacgtatttccgaccccaatccagtgcctcttctgagagcaagaagctgctgtcactctcttccacagtggtgactcccatgttgaaccccatcatctacagctcaaggaataaagaagtgaaggctgcactgaagcggcttatccacaggaccctgggctctcagaaactatga" //
				, trNew.cds());
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy