
org.snpeff.snpEffect.testCases.integration.TestCasesIntegrationLof Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.snpEffect.testCases.integration;
import java.util.LinkedList;
import java.util.Random;
import org.junit.After;
import org.junit.Test;
import org.snpeff.SnpEff;
import org.snpeff.interval.Exon;
import org.snpeff.interval.Gene;
import org.snpeff.interval.Intron;
import org.snpeff.interval.Marker;
import org.snpeff.interval.SpliceSite;
import org.snpeff.interval.Transcript;
import org.snpeff.interval.Variant;
import org.snpeff.interval.Variant.VariantType;
import org.snpeff.snpEffect.Config;
import org.snpeff.snpEffect.EffectType;
import org.snpeff.snpEffect.LossOfFunction;
import org.snpeff.snpEffect.VariantEffect;
import org.snpeff.util.Gpr;
import junit.framework.Assert;
/**
* Test Loss of Function prediction
*
* @author pcingola
*/
public class TestCasesIntegrationLof extends TestCasesIntegrationBase {
public static final int NUM_DEL_TEST = 10; // number of random test per transcript
Config config;
Random random = new Random(20121030);
public TestCasesIntegrationLof() {
super();
}
@After
public void after() {
config = null;
}
@Override
Marker cdsMarker(Transcript tr) {
int start = tr.isStrandPlus() ? tr.getCdsStart() : tr.getCdsEnd();
int end = tr.isStrandPlus() ? tr.getCdsEnd() : tr.getCdsStart();
return new Marker(tr.getParent(), start, end, false, "");
}
/**
* Check that LOF works for a given transcript
*/
void checkLof(Transcript tr) {
// Don't check non-protein coding
if (!tr.isProteinCoding()) return;
checkLofSplice(tr);
checkLofStartLost(tr);
checkLofExonDeleted(tr);
checkLofFrameShift(tr);
}
/**
* Check that exon deleted is LOF
*/
void checkLofExonDeleted(Transcript tr) {
// First coding exont
checkLofExonDeletedFirstExon(tr);
// More than half protein is lost
checkLofExonDeletedHalf(tr);
}
/**
* First coding exon produces a LOF
*/
void checkLofExonDeletedFirstExon(Transcript tr) {
Exon ex = tr.getFirstCodingExon();
Variant Variant = new Variant(tr.getChromosome(), ex.getStart(), "AC", "A");
Variant.setStart(ex.getStart());
Variant.setEnd(ex.getEnd());
Variant.setVariantType(VariantType.DEL);
if (verbose) Gpr.debug("Variant:" + Variant);
LinkedList changeEffects = variantEffects(Variant, EffectType.EXON_DELETED, ex);
// Calculate LOF
LossOfFunction lof = new LossOfFunction(config, changeEffects);
boolean islof = lof.isLof();
Assert.assertEquals(true, islof);
}
/**
* If more than half protein is lost => LOF
*/
void checkLofExonDeletedHalf(Transcript tr) {
// Calculate coding part of the transcript
Marker cds = cdsMarker(tr);
for (int i = 0; i < NUM_DEL_TEST; i++) {
// Create a random Variant
int delStart = random.nextInt(tr.size() - 1) + tr.getStart();
int delEnd = random.nextInt(tr.getEnd() - delStart) + delStart + 1;
Variant Variant = new Variant(tr.getChromosome(), delStart, "AC", "A");
Variant.setStart(delStart);
Variant.setEnd(delEnd);
if (verbose) Gpr.debug("Variant:" + Variant);
Variant.setVariantType(VariantType.DEL);
// How many coding bases are affected?
Marker codingDel = cds.intersect(Variant);
if (codingDel != null) { // Does it intersect?
int numBases = 0;
for (Exon ex : tr)
numBases += codingDel.intersectSize(ex);
// Percent of coding bases?
double perc = numBases / ((double) tr.cds().length());
boolean delIsLof = perc > LossOfFunction.DEFAULT_DELETE_PROTEIN_CODING_BASES;
// Calculate LOF
LinkedList changeEffects = variantEffects(Variant, EffectType.TRANSCRIPT, tr); // Notice that we don't care what type of effect is, so we just use 'TRANSCRIPT'
LossOfFunction lof = new LossOfFunction(config, changeEffects);
boolean islof = lof.isLof();
Assert.assertEquals(delIsLof, islof);
}
}
}
/**
* Frame shifts are LOF
*/
void checkLofFrameShift(Transcript tr) {
Marker cds = cdsMarker(tr);
int codingBase = 0;
for (Exon ex : tr.sortedStrand()) {
int start = tr.isStrandPlus() ? ex.getStart() : ex.getEnd();
int step = tr.isStrandPlus() ? 1 : -1;
// All exonic positions
for (int pos = start; ex.intersects(pos); pos += step) {
// Create a Variant
Variant Variant;
boolean ins = random.nextBoolean(); // Randomly choose INS or DEL
if (ins) Variant = new Variant(tr.getChromosome(), pos, "A", "AC");
else Variant = new Variant(tr.getChromosome(), pos, "AC", "A");
Variant.setVariantType(ins ? VariantType.INS : VariantType.DEL);
if (verbose) Gpr.debug("Variant:" + Variant);
// Create change effect
LinkedList changeEffects = variantEffects(Variant, EffectType.FRAME_SHIFT, ex);
VariantEffect changeEffect = changeEffects.get(0);
changeEffect.setCodons("", "", codingBase / 3, codingBase % 3); // Set codon affected
int aaLen = changeEffect.getAaLength();
// Should this be a LOF?
boolean isFsLof = false;
if (cds.intersects(pos)) {
double perc = (codingBase / 3) / ((double) aaLen);
isFsLof = (LossOfFunction.DEFAULT_IGNORE_PROTEIN_CODING_BEFORE <= perc) && (perc <= LossOfFunction.DEFAULT_IGNORE_PROTEIN_CODING_AFTER);
codingBase++;
}
// Is LOF as expected?
LossOfFunction lof = new LossOfFunction(config, changeEffects);
boolean islof = lof.isLof();
Assert.assertEquals(isFsLof, islof);
}
}
}
void checkLofSplice(Transcript tr) {
// All transcripts in exon
for (Intron intron : tr.introns()) {
checkSpliceDonor(tr, intron);
checkSpliceAcceptor(tr, intron);
}
}
/**
* Check that START_LOST is LOF
*/
void checkLofStartLost(Transcript tr) {
// Find start codon position
int pos = tr.getCdsStart();
Variant Variant = new Variant(tr.getChromosome(), pos, "A", "C"); // Create a Variant
if (verbose) Gpr.debug("Variant:" + Variant);
// Finr exon
Exon exon = null;
for (Exon ex : tr)
if (ex.intersects(pos)) exon = ex;
if (exon == null) throw new RuntimeException("Cannot find first exon for transcript " + tr.getId());
// Create a LOF object and analyze the effect
LinkedList changeEffects = variantEffects(Variant, EffectType.START_LOST, exon);
LossOfFunction lof = new LossOfFunction(config, changeEffects);
boolean islof = lof.isLof();
Assert.assertEquals(true, islof);
}
/**
* Check that Core Splice Site acceptors are considered LOF
*/
void checkSpliceAcceptor(Transcript tr, Intron intron) {
int step = tr.isStrandPlus() ? +1 : -1;
if (intron.getRank() > 1) {
// Position
int posDonor = tr.isStrandPlus() ? intron.getEnd() : intron.getStart();
// Splice site size
int maxSize = Math.min(intron.size(), SpliceSite.CORE_SPLICE_SITE_SIZE);
posDonor -= step * (maxSize - 1);
if (verbose) Gpr.debug("Intron size: " + intron.size());
if (maxSize <= 0) throw new RuntimeException("Max splice size is " + maxSize);
//---
// For all position on splice site donor positions, make sure it is LOF
//---
for (int pos = posDonor, i = 0; i < maxSize; i++, pos += step) {
Variant Variant = new Variant(tr.getChromosome(), pos, "A", "C"); // Create a Variant
Marker marker = findMarker(config.getSnpEffectPredictor(), Variant, EffectType.SPLICE_SITE_ACCEPTOR, null, intron);
LinkedList changeEffects = variantEffects(Variant, EffectType.SPLICE_SITE_ACCEPTOR, marker); // Create a SPLICE_SITE_ACCEPTOR effect
if (verbose) Gpr.debug("Variant:" + Variant);
// Create a LOF object and analyze the effect
LossOfFunction lof = new LossOfFunction(config, changeEffects);
boolean islof = lof.isLof();
Assert.assertEquals(true, islof);
}
}
}
/**
* Check that Core Splice Donor acceptors are considered LOF
*/
void checkSpliceDonor(Transcript tr, Intron intron) {
int step = tr.isStrandPlus() ? 1 : -1;
int maxRank = tr.numChilds();
if (intron.getRank() < maxRank) {
// Position
int posDonor = tr.isStrandPlus() ? intron.getStart() : intron.getEnd();
// Splice site size
int maxSize = Math.min(intron.size(), SpliceSite.CORE_SPLICE_SITE_SIZE);
if (verbose) Gpr.debug("Intron size: " + intron.size() + "\tmaxSize: " + maxSize);
if (maxSize <= 0) throw new RuntimeException("Max splice size is non-positive: " + maxSize);
//---
// For all position on splice site donor positions, make sure it is LOF
//---
for (int pos = posDonor, i = 0; i < maxSize; i++, pos += step) {
if (verbose) Gpr.debug("Position: " + tr.getChromosome().getId() + ":" + posDonor);
Variant variant = new Variant(tr.getChromosome(), pos, "A", "C"); // Create a Variant
Marker marker = findMarker(config.getSnpEffectPredictor(), variant, EffectType.SPLICE_SITE_DONOR, null, intron);
LinkedList changeEffects = variantEffects(variant, EffectType.SPLICE_SITE_DONOR, marker); // Create a SPLICE_DONOR effect
if (verbose) Gpr.debug("Variant:" + variant);
// Create a LOF object and analyze the effect
LossOfFunction lof = new LossOfFunction(config, changeEffects);
boolean islof = lof.isLof();
Assert.assertEquals(true, islof);
}
}
}
@Test
public void test_01() {
Gpr.debug("Test");
// Load database
String genomeVer = "testHg3766Chr1";
config = new Config(genomeVer, Config.DEFAULT_CONFIG_FILE);
config.loadSnpEffectPredictor();
config.setTreatAllAsProteinCoding(true); // For historical reasons...
config.getSnpEffectPredictor().buildForest();
// For each gene, transcript, check that NMD works
int i = 1;
for (Gene gene : config.getGenome().getGenes()) {
Gpr.showMark(i++, 10);
for (Transcript tr : gene) {
if (verbose) System.err.println(tr);
checkLof(tr);
}
}
}
/**
* We should be able to annotate a BED file
*/
@Test
public void test_02() {
String args[] = { "testHg3775Chr22", "-noLog", "-i", "bed", path("test_lof_02.bed") };
SnpEff snpeff = new SnpEff(args);
snpeff.setVerbose(verbose);
snpeff.setSupressOutput(!verbose);
boolean ok = snpeff.run();
Assert.assertEquals(true, ok);
}
/**
* Create change effects
*/
@Override
LinkedList variantEffects(Variant variant, EffectType effectType, Marker marker) {
VariantEffect changeEffect = new VariantEffect(variant);
changeEffect.set(marker, effectType, effectType.effectImpact(), "");
LinkedList changeEffects = new LinkedList();
changeEffects.add(changeEffect);
return changeEffects;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy