
org.snpeff.snpEffect.testCases.integration.TestCasesIntegrationTranscript Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.snpEffect.testCases.integration;
import java.util.List;
import org.junit.Test;
import org.snpeff.codons.CodonTable;
import org.snpeff.interval.Gene;
import org.snpeff.interval.Transcript;
import org.snpeff.interval.Utr5prime;
import org.snpeff.snpEffect.Config;
import org.snpeff.snpEffect.SnpEffectPredictor;
import org.snpeff.util.Gpr;
import org.snpeff.util.GprSeq;
import org.snpeff.util.Timer;
import junit.framework.Assert;
/**
* Test random SNP changes
*
* @author pcingola
*/
public class TestCasesIntegrationTranscript {
boolean debug = false;
boolean verbose = false || debug;
@Test
public void test_01_mRnaSequence() {
Gpr.debug("Test");
String genome = "testHg3766Chr1";
Config config = new Config(genome);
if (verbose) Timer.showStdErr("Loading genome " + genome);
SnpEffectPredictor sep = config.loadSnpEffectPredictor();
if (verbose) Timer.showStdErr("Building interval forest");
sep.buildForest();
if (verbose) Timer.showStdErr("Done");
int count = 1;
for (Gene gene : sep.getGenome().getGenes()) {
for (Transcript tr : gene) {
if (!tr.isProteinCoding()) continue;
if (tr.hasErrorOrWarning()) continue;
String mRna = tr.mRna().toLowerCase();
String cds = tr.cds().toLowerCase();
// Get UTR sequence
List utrs5 = tr.get5primeUtrs();
if (utrs5.size() <= 0) continue;
if (verbose) Gpr.showMark(count++, 1);
Utr5prime utr5 = utrs5.get(0);
String utr5Str = utr5.getSequence().toLowerCase();
// Sanity check
if (!mRna.startsWith(utr5Str)) throw new RuntimeException("ERROR mRna does not start with UTR5");
if (!mRna.startsWith(utr5Str + cds)) throw new RuntimeException("ERROR mRna does not start with UTR+CDS");
}
}
}
@Test
public void test_02_mapping_mRna_Cds() {
Gpr.debug("Test");
String genome = "testHg3766Chr1";
Config config = new Config(genome);
if (verbose) Timer.showStdErr("Loading genome " + genome);
SnpEffectPredictor sep = config.loadSnpEffectPredictor();
if (verbose) Timer.showStdErr("Building interval forest");
sep.buildForest();
if (verbose) Timer.showStdErr("Done");
int count = 1;
for (Gene gene : sep.getGenome().getGenes()) {
for (Transcript tr : gene) {
if (!tr.isProteinCoding()) continue;
if (tr.hasErrorOrWarning()) continue;
String mRna = tr.mRna().toLowerCase();
String cds = tr.cds().toLowerCase();
// Get UTR sequence
List utrs5 = tr.get5primeUtrs();
if (utrs5.size() <= 0) continue;
if (verbose) Gpr.showMark(count++, 1);
Utr5prime utr5 = utrs5.get(0);
String utr5Str = utr5.getSequence().toLowerCase();
// Sanity check
if (!mRna.startsWith(utr5Str)) throw new RuntimeException("ERROR mRna does not start with UTR5");
if (!mRna.startsWith(utr5Str + cds)) throw new RuntimeException("ERROR mRna does not start with UTR+CDS");
}
}
}
@Test
public void test_03_baseNumberCds2Codon() {
Gpr.debug("Test");
String genome = "testHg19Chr1";
Config config = new Config(genome);
if (verbose) Timer.showStdErr("Loading genome " + genome);
SnpEffectPredictor sep = config.loadSnpEffectPredictor();
if (verbose) Timer.showStdErr("Done");
int countOk = 0;
for (Gene gene : sep.getGenome().getGenes()) {
for (Transcript tr : gene) {
if (!tr.isProteinCoding()) continue;
if (tr.hasErrorOrWarning()) continue;
if (debug) Gpr.debug(tr);
CodonTable codonTable = tr.codonTable();
String cds = tr.cds().toLowerCase();
String protein = tr.protein();
// Check each AA <-> codon mapping
for (int cdsBaseNum = 0, aaNum = 0; cdsBaseNum < cds.length(); cdsBaseNum++) {
String codon = tr.baseNumberCds2Codon(cdsBaseNum);
if (codon == null) continue;
String aa = codonTable.aa(codon);
String aaReal = "" + protein.charAt(aaNum);
if (debug) Gpr.debug("CDS base: " + cdsBaseNum + "\taaNum: " + aaNum + "\tAA: " + aa + " / " + aaReal + "\tcodon: " + codon);
if (aaNum == 0 && aaReal.equals("M") && !aaReal.equals(aa)) {
// First AA not always codes to 'M', we allow this exception
} else {
// Check that coding AA is the same
if (!aaReal.equals(aa)) {
String msg = "Difference in expected codon/AA:" //
+ "\n\tCDS base : " + cdsBaseNum //
+ "\n\tAA number : " + aaNum //
+ "\n\tCodon : " + codon //
+ "\n\tAA : " + aa //
+ "\n\tAA [real] : " + protein.charAt(aaNum) //
+ "\n\tCount OK : " + countOk //
+ "\n\n" + tr //
;
Gpr.debug(msg);
Assert.assertEquals(msg, aa, aaReal);
}
}
if (cdsBaseNum % 3 == 2) aaNum++;
countOk++;
}
}
}
Assert.assertTrue("No codon/AA checked!", countOk > 1);
}
@Test
public void test_04_codonNumber2Pos() {
Gpr.debug("Test");
String genome = "testHg19Chr1";
Config config = new Config(genome);
if (verbose) Timer.showStdErr("Loading genome " + genome);
SnpEffectPredictor sep = config.loadSnpEffectPredictor();
if (verbose) Timer.showStdErr("Done");
int countOk = 0;
for (Gene gene : sep.getGenome().getGenes()) {
for (Transcript tr : gene) {
if (!tr.isProteinCoding()) continue;
if (tr.hasErrorOrWarning()) continue;
if (debug) Gpr.debug(tr);
CodonTable codonTable = tr.codonTable();
String protein = tr.protein();
// Check each AA <-> codon mapping
for (int aaNum = 0; aaNum < protein.length(); aaNum++) {
// Create a codon using 'codonNumber2Pos(i)' mapping and compare to protein sequence
// Get codon coordinates
int codon[] = tr.codonNumber2Pos(aaNum);
// Get bases for each coordinate and build codon sequence
StringBuilder codonStr = new StringBuilder();
for (int j = 0; j < 3; j++) {
if (codon[j] < 0) break; // Negative codon number is error
String base = tr.baseAt(codon[j]);
if (base == null) break;
codonStr.append(base);
}
// Skip incomplete codons
if (codonStr.length() != 3) continue;
// Get 'real' and
String aaReal = "" + protein.charAt(aaNum);
String cstr = codonStr.toString();
if (tr.isStrandMinus()) cstr = GprSeq.reverseWc(cstr);
String aa = codonTable.aa(cstr);
// First AA not always codes to 'M', we allow this exception
if (aaNum == 0 && aaReal.equals("M") && !aaReal.equals(aa)) continue;
// Check
if (!aaReal.equals(aa)) {
String msg = "Difference in expected codon/AA:" //
+ "\n\tCodon numer: " + aaNum //
+ "\n\tCodon : " + codon[0] + ", " + codon[1] + ", " + codon[2] //
+ "\n\tBases : " + tr.baseAt(codon[0]) + ", " + tr.baseAt(codon[1]) + ", " + tr.baseAt(codon[2]) //
+ "\n\tCodonStr : " + codonStr + "\t" + cstr + "\t" + tr.baseNumberCds2Codon(3 * aaNum) //
+ "\n\tAA [real] : " + aaReal //
+ "\n\tAA : " + aa //
+ "\n\n" + tr //
;
Gpr.debug(msg);
Assert.assertEquals(msg, aaReal, aa);
}
countOk++;
}
}
}
Assert.assertTrue("No codon/AA checked!", countOk > 0);
}
@Test
public void test_05_codonNumber_aaNumber() {
Gpr.debug("Test");
String genome = "testHg19Chr1";
Config config = new Config(genome);
if (verbose) Timer.showStdErr("Loading genome " + genome);
SnpEffectPredictor sep = config.loadSnpEffectPredictor();
if (verbose) Timer.showStdErr("Done");
int countOk = 0;
for (Gene gene : sep.getGenome().getGenes()) {
for (Transcript tr : gene) {
if (!tr.isProteinCoding()) continue;
if (tr.hasErrorOrWarning()) continue;
if (debug) Gpr.debug(tr);
String protein = tr.protein();
int aanum2pos[] = tr.aaNumber2Pos();
// Check each AA <-> codon mapping
for (int aaNum = 0; aaNum < protein.length(); aaNum++) {
// Get codon coordinateshrow new RuntimeException("FINISH THIS TEST CASE!!!!" + aanum2pos);
int codon[] = tr.codonNumber2Pos(aaNum);
int codonPos = (tr.isStrandPlus() ? codon[0] : codon[2]);
Assert.assertEquals("Genomic locations do not matcn:" //
+ "\n\taaNum : " + aaNum //
+ "\n\tcodonNumber2Pos : " + codonPos //
+ "\n\taanum2pos : " + aanum2pos[aaNum] //
, codonPos //
, aanum2pos[aaNum]);
countOk++;
if (verbose) Gpr.showMark(countOk, 1);
}
}
}
Assert.assertTrue("No codon/AA checked!", countOk > 0);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy