
org.snpeff.snpEffect.testCases.unity.TestCasesDnaOverlap Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.snpEffect.testCases.unity;
import java.util.HashSet;
import java.util.Random;
import junit.framework.Assert;
import org.junit.Test;
import org.snpeff.binseq.BinarySequence;
import org.snpeff.binseq.DnaAndQualitySequence;
import org.snpeff.binseq.DnaSequence;
import org.snpeff.binseq.coder.DnaCoder;
import org.snpeff.binseq.comparator.DnaQualSubsequenceComparator;
import org.snpeff.binseq.comparator.DnaSubsequenceComparator;
import org.snpeff.fastq.FastqVariant;
import org.snpeff.util.Gpr;
public class TestCasesDnaOverlap {
public static boolean verbose = false;
/**
* Create random changes in a sequence
* @param overlap
* @param overlapChanges
* @return
*/
String change(String sequence, int numChanges, Random rand) {
HashSet changedPos = new HashSet();
char chars[] = sequence.toCharArray();
for (int i = 0; i < numChanges;) {
int pos = rand.nextInt(chars.length);
if (!changedPos.contains(pos)) { // Already changed?
int newCode = rand.nextInt() & 0x03;
char newBase = DnaCoder.get().toBase(newCode);
if (chars[pos] != newBase) { // Base is different?
chars[pos] = newBase;
changedPos.add(pos);
i++;
}
}
}
return new String(chars);
}
/**
* Perform a test of DnaCode.copyBases() method
* @param seqLenSrc
* @param seqLenDst
* @param rand
*/
void dnaCoderCopyBases(int seqLenSrc, int seqLenDst, Random rand) {
String srcStr = randSeq(seqLenSrc, rand);
String dstStr = randSeq(seqLenDst, rand);
// Create sequence
DnaSequence src = new DnaSequence(srcStr);
DnaSequence dst = null;
dst = new DnaSequence(dstStr);
long signature = src.getCodes()[0];
// Random start & length
int srcStart = rand.nextInt(src.length());
int dstStart = rand.nextInt(dst.length());
int len = rand.nextInt((Math.min(src.length() - srcStart, dst.length() - dstStart))) + 1;
// Sub strings to compare
String subStr = srcStr.substring(srcStart, srcStart + len);
String dstBefore = dst.getSequence().substring(0, dstStart);
String dstAfter = dst.getSequence().substring(dstStart + len);
// Copy test
DnaCoder.get().copyBases(src.getCodes(), srcStart, dst.getCodes(), dstStart, len);
String subStrDst = dst.getSequence().substring(dstStart, dstStart + len);
String dstBefore2 = dst.getSequence().substring(0, dstStart);
String dstAfter2 = dst.getSequence().substring(dstStart + len);
if (!subStr.equals(subStrDst)) { throw new RuntimeException("Substrings do not match! signature=" + Long.toHexString(signature) + " \n\t" + subStr + "\n\t" + subStrDst); }
if (!dstBefore2.equals(dstBefore)) { throw new RuntimeException("Substrings 'before' do not match! signature=" + Long.toHexString(signature) + "\n\tdstBefore:\t" + dstBefore + "\n\tdstBefore2:\t" + dstBefore2); }
if (!dstAfter2.equals(dstAfter)) { throw new RuntimeException("Substrings 'after' do not match! signature=" + Long.toHexString(signature) + "\n\tdstAfter:\t" + dstAfter + "\n\tdstAfter2:\t" + dstAfter2); }
}
/**
* Overlap two sequences
* @param seq1
* @param seq2
* @param start
* @param result
*/
void overlap(String seq1, String seq2, int start, String result, String resultQ) {
overlapDnaSequence(seq1, seq2, start, result);
overlapDnaAndQualitySequence(seq1, seq2, start, result, resultQ);
}
/**
* Test an overlap between two DnaAndQualitySequences
*/
void overlapDnaAndQualitySequence(String seq1, String seq2, int start, String result, String resultQ) {
// Overlapping with DnaAndQualitySequence
DnaAndQualitySequence s1 = new DnaAndQualitySequence(seq1, q(seq1.length(), 2), FastqVariant.FASTQ_SANGER);
DnaAndQualitySequence s2 = new DnaAndQualitySequence(seq2, q(seq2.length(), 3), FastqVariant.FASTQ_SANGER);
DnaAndQualitySequence s3 = s1.overlap(s2, start);
Assert.assertEquals(result, s3.getSequence());
if (resultQ != null) Assert.assertEquals(resultQ, s3.getQuality());
}
/**
* Test an overlap between two DnaSequences
*/
void overlapDnaSequence(String seq1, String seq2, int start, String result) {
// Overlapping with DnaSequence
DnaSequence s1 = new DnaSequence(seq1);
DnaSequence s2 = new DnaSequence(seq2);
BinarySequence s3 = s1.overlap(s2, start);
Assert.assertEquals(result, s3.getSequence());
}
/**
* Create random sequences and overlap them
* @param maxLen
* @param minLen
* @param rand
*/
void overlapRandTest(int maxLen, int minLen, Random rand) {
// First sequence
int len1 = rand.nextInt(maxLen) + minLen;
String seq1 = randSeq(len1, rand);
// Second sequence
int over = rand.nextInt(len1 - minLen + 1);
int start = over;
String overlap = "", nonOverlap = "", seq2 = "", result = "";
int len2 = rand.nextInt(maxLen - over);
if (rand.nextBoolean()) {
// Second sequence (overlapping at the end of the first one)
overlap = seq1.substring(over);
nonOverlap = randSeq(len2, rand);
seq2 = overlap + nonOverlap;
result = seq1 + nonOverlap; // Expected result
} else {
// Second sequence (overlapping at the beginning of the first one)
overlap = seq1.substring(0, over);
nonOverlap = randSeq(len2, rand);
seq2 = nonOverlap + overlap;
start = -nonOverlap.length();
result = nonOverlap + seq1;
// Expected result
}
overlap(seq1, seq2, start, result, null);
}
/**
* Create a quality string 'len' bases long
* @param len
* @return
*/
String q(int len, int quality) {
char q[] = new char[len];
for (int i = 0; i < len; i++)
q[i] = (char) ('!' + quality);
return new String(q);
}
/**
* Create a random sequence of length 'len'
* @param len
* @param rand
* @return
*/
String randSeq(int len, Random rand) {
StringBuilder sb = new StringBuilder();
// Create a random sequence
for (int i = 0; i < len; i++) {
int r = rand.nextInt() & 0x03;
sb.append(DnaCoder.get().toBase(r));
}
return sb.toString();
}
void score(String seq1, String seq2, int start, int threshold, int result) {
scoreDnaSequence(seq1, seq2, start, threshold, result);
scoreDnaAndQualitySequence(seq1, seq2, start, threshold, result);
}
void scoreDnaAndQualitySequence(String seq1, String seq2, int start, int threshold, int result) {
// Overlapping with DnaAndQualitySequence
DnaAndQualitySequence s1 = new DnaAndQualitySequence(seq1);
DnaAndQualitySequence s2 = new DnaAndQualitySequence(seq2);
DnaQualSubsequenceComparator compartor = new DnaQualSubsequenceComparator(true, threshold);
int idx1 = (start >= 0 ? start : 0);
int idx2 = (start >= 0 ? 0 : -start);
int score = compartor.score(s1, idx1, s2, idx2);
Assert.assertEquals(result, score);
}
void scoreDnaSequence(String seq1, String seq2, int start, int threshold, int result) {
// Overlapping with DnaSequence
DnaSequence s1 = new DnaSequence(seq1);
DnaSequence s2 = new DnaSequence(seq2);
DnaSubsequenceComparator compartor = new DnaSubsequenceComparator(true, threshold);
int idx1 = (start >= 0 ? start : 0);
int idx2 = (start >= 0 ? 0 : -start);
int score = compartor.score(s1, idx1, s2, idx2);
Assert.assertEquals(result, score);
}
/**
* Test DnaCoder.score() method
*
* @param maxLen
* @param minLen
* @param rand
* @param dnaCoder
* @param comparator
*/
void scoreRandTest(int maxLen, int minLen, Random rand, DnaCoder dnaCoder, DnaSubsequenceComparator comparator) {
// First sequence
int len1 = rand.nextInt(maxLen) + minLen;
String seq1 = randSeq(len1, rand);
// Second sequence
int over = rand.nextInt(len1 - minLen + 1);
int start = over;
String overlap = "", nonOverlap = "", seq2 = "";
int len2 = rand.nextInt(maxLen - over);
if (rand.nextBoolean()) {
// Second sequence (overlapping at the end of the first one)
overlap = seq1.substring(over);
nonOverlap = randSeq(len2, rand);
seq2 = overlap + nonOverlap;
} else {
// Second sequence (overlapping at the beginning of the first one)
overlap = seq1.substring(0, over);
nonOverlap = randSeq(len2, rand);
seq2 = nonOverlap + overlap;
start = -nonOverlap.length();
}
// Score should be zero for any 'start' after overlap
DnaSequence s1 = new DnaSequence(seq1);
DnaSequence s2 = new DnaSequence(seq2);
int threshold = 0;
boolean found = !(over > 0); // false, except if overlap is empty
for (int i = 0; (i < 10) || (!found); i++) { // Iterate until we find a non-zero score (but not less than 10 iterations)
int score1 = 0, score2 = 0, len = 0, starti;
if (start > 0) {
starti = rand.nextInt(s1.length());
len = Math.min(seq2.length(), seq1.length() - starti);
score1 = dnaCoder.score(s2.getCodes(), s1.getCodes(), starti, len, threshold);
score2 = comparator.scoreSlow(s2, 0, s1, starti);
} else {
starti = rand.nextInt(s2.length());
len = Math.min(seq1.length(), seq2.length() - starti);
score1 = dnaCoder.score(s1.getCodes(), s2.getCodes(), starti, len, threshold);
score2 = comparator.scoreSlow(s1, 0, s2, starti);
}
if (score1 != score2) throw new RuntimeException("Scores do not match!\n\tscore1: " + score1 + "\n\tscore2: " + score2 + "\n\tstarti: " + starti + "\n\tstart: " + start + "\n\tlen: " + len);
if (score1 > 0) found = true;
}
}
/**
* Create random sequences and calculate score (using a threshold)
* @param maxLen
* @param minLen
* @param rand
*/
void scoreRandTestThreshold(int maxLen, int minLen, Random rand, int threshold, int overlapChanges) {
// First sequence
int len1 = rand.nextInt(maxLen) + minLen;
String seq1 = randSeq(len1, rand);
if (verbose) System.out.println("\nseq1:\t" + seq1);
// Second sequence
int over = rand.nextInt(len1 - minLen + 1);
int start = over;
String overlap = "", nonOverlap = "", seq2 = "";
int expectedScore = 0;
int len2 = rand.nextInt(maxLen - over);
if (rand.nextBoolean()) {
// Second sequence (overlapping at the end of the first one)
overlap = seq1.substring(over);
overlapChanges = Math.min(overlapChanges, overlap.length());
overlap = change(overlap, overlapChanges, rand);
if (verbose) System.out.println("over:\t" + overlap);
nonOverlap = randSeq(len2, rand);
seq2 = overlap + nonOverlap;
} else {
// Second sequence (overlapping at the beginning of the first one)
overlap = seq1.substring(0, over);
overlapChanges = Math.min(overlapChanges, overlap.length());
overlap = change(overlap, overlapChanges, rand);
if (verbose) System.out.println("over:\t" + overlap);
nonOverlap = randSeq(len2, rand);
seq2 = nonOverlap + overlap;
start = -nonOverlap.length();
}
if (verbose) System.out.println("seq2:\t" + seq2);
// Expected result
expectedScore = overlapChanges <= threshold ? overlap.length() - overlapChanges : 0;
if (verbose) System.out.println("start:\t" + start + "\tthreshold: " + threshold + "\toverlapChanges: " + overlapChanges + "\tscore: " + expectedScore);
// Caclualte
score(seq1, seq2, start, threshold, expectedScore);
}
@Test
public void test_07_overlap() {
Gpr.debug("Test");
overlap(//
"catagaaaccaacagccatataactggtagctttaagcggctcacctttagcatcaacaggccacaaccaaccagaacgtgaaaaagcgtcctgcgtgtagcgaactg"// First sequence
, "tttagcagcaaggtccatatctgactttttgttaacgtatttagccacatagaaaccaacagccatataactggtagctttaagcggctc" // Second sequence
, -47 // Start
, "tttagcagcaaggtccatatctgactttttgttaacgtatttagccacatagaaaccaacagccatataactggtagctttaagcggctcacctttagcatcaacaggccacaaccaaccagaacgtgaaaaagcgtcctgcgtgtagcgaactg" // Expected result
, "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&################################################################");
}
@Test
public void test_08_overlap() {
Gpr.debug("Test");
overlap(//
"tttagcagcaaggtccatatctgactttttgttaacgtatttagccacatagaaaccaacagccatataactggtagctttaagcggctc" // First sequence
, "catagaaaccaacagccatataactggtagctttaagcggctcacctttagcatcaacaggccacaaccaaccagaacgtgaaaaagcgtcctgcgtgtagcgaactg"// Second sequence
, 47 // Start
, "tttagcagcaaggtccatatctgactttttgttaacgtatttagccacatagaaaccaacagccatataactggtagctttaagcggctcacctttagcatcaacaggccacaaccaaccagaacgtgaaaaagcgtcctgcgtgtagcgaactg" // Expected result
, "###############################################&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" //
);
}
@Test
public void test_09_overlap() {
Gpr.debug("Test");
overlap(//
"tttagcagcaaggtccatatctgactttttgttaacgtatttagccacatagaaaccaacagccatataactggtagctttaagcggctc" // First sequence
, "catagaaaccaacagccatataactggtagctttaagcggctc"// Second sequence
, 47 // Start
, "tttagcagcaaggtccatatctgactttttgttaacgtatttagccacatagaaaccaacagccatataactggtagctttaagcggctc" // Expected result
, "###############################################&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" //
);
}
@Test
public void test_10_overlap() {
Gpr.debug("Test");
overlap(//
"catagaaaccaacagccatataactggtagctttaagcggctc"//
, "tttagcagcaaggtccatatctgactttttgttaacgtatttagccacatagaaaccaacagccatataactggtagctttaagcggctc" //
, -47 // Start
, "tttagcagcaaggtccatatctgactttttgttaacgtatttagccacatagaaaccaacagccatataactggtagctttaagcggctc" // Expected result
, "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" //
);
}
@Test
public void test_11_overlap() {
Gpr.debug("Test");
overlap(//
"catagaaaccaacagccatataactggtagctttaagcggctcacctttagcatcaacaggccacaaccaaccagaacgtgaaaaagcgtcctgcgtgtagcgaactg"//
, "catagaaaccaacagccatataactggtagctttaagcggctc" //
, 0 // Start
, "catagaaaccaacagccatataactggtagctttaagcggctcacctttagcatcaacaggccacaaccaaccagaacgtgaaaaagcgtcctgcgtgtagcgaactg" // Expected result
, "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&################################################################" //
);
}
@Test
public void test_12_overlap() {
Gpr.debug("Test");
overlap(//
"catagaaaccaacagccatataactggtagctttaagcggctc" //
, "catagaaaccaacagccatataactggtagctttaagcggctcacctttagcatcaacaggccacaaccaaccagaacgtgaaaaagcgtcctgcgtgtagcgaactg"//
, 0 // Start
, "catagaaaccaacagccatataactggtagctttaagcggctcacctttagcatcaacaggccacaaccaaccagaacgtgaaaaagcgtcctgcgtgtagcgaactg" // Expected result
, "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" //
);
}
@Test
public void test_13_overlap() {
Gpr.debug("Test");
overlap(//
"caggagcaggaaagcgagggtatcctacaaagtccagcgtaccataaacgcaagcctcaacgcagcgacgagcacgagagcggtcagtagcaatccaaac" //
, "aaagtccagcgtaccataaacgcaagcctcaacgcagcgacgagcacgagagcggtcagtagcaatccaa"//
, 28 // Start
, "caggagcaggaaagcgagggtatcctacaaagtccagcgtaccataaacgcaagcctcaacgcagcgacgagcacgagagcggtcagtagcaatccaaac" // Expected result
, "############################&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&#" //
);
}
@Test
public void test_15_DnaCoder_copy_1() {
Gpr.debug("Test");
DnaCoder dnaCoder = DnaCoder.get();
// Rand
long seed = 20100809;
Random rand = new Random(seed);
int numTests = 10000;
int maxLen = 1000;
for (int i = 1; i < numTests; i++) {
int seqlen = rand.nextInt(maxLen) + 10;
String srcStr = randSeq(seqlen, rand);
String dstStr = randSeq(seqlen, rand);
// Create sequences
DnaSequence src = new DnaSequence(srcStr);
DnaSequence dst = new DnaSequence(dstStr);
// Random start & length
int srcStart = rand.nextInt(src.length());
int dstStart = srcStart;
int len = rand.nextInt((Math.min(src.length() - srcStart, dst.length() - dstStart)));
// Sub strings to compare
String subStr = srcStr.substring(srcStart, srcStart + len);
String dstBefore = dst.getSequence().substring(0, dstStart);
String dstAfter = dst.getSequence().substring(dstStart + len);
if (verbose) {
System.out.println("\n-----------------------------------------------------------------------------------------------");
System.out.println("src:\t" + src);
System.out.println("dst:\t" + dst);
System.out.println("sub:\t" + subStr);
System.out.println("bef:\t" + dstBefore);
System.out.println("aft:\t" + dstAfter);
}
// Copy test
dnaCoder.copyBases(src.getCodes(), dst.getCodes(), srcStart, len);
String subStrDst = dst.getSequence().substring(dstStart, dstStart + len);
String dstBefore2 = dst.getSequence().substring(0, dstStart);
String dstAfter2 = dst.getSequence().substring(dstStart + len);
if (verbose) {
System.out.println("\ndst:\t" + dst);
System.out.println("sub:\t" + subStrDst);
}
if (!subStr.equals(subStrDst)) { throw new RuntimeException("Substrings do not match!"); }
if (!dstBefore2.equals(dstBefore)) { throw new RuntimeException("Substrings 'before' do not match!\n\tdstBefore:\t" + dstBefore + "\n\tdstBefore2:\t" + dstBefore2); }
if (!dstAfter2.equals(dstAfter)) { throw new RuntimeException("Substrings 'after' do not match!"); }
Gpr.showMarkStderr(i, 1000);
}
}
@Test
public void test_16_DnaCoder_copy_2() {
Gpr.debug("Test");
long seed = 20100812;
Random rand = new Random(seed);
int numTests = 100000;
int maxLen, minLen = 10;
System.err.print("\nDnaCoder.copyBases test:");
for (int numWords = 1; numWords < 10; numWords++) {
System.err.print("\n\tMax words: " + numWords + "\t");
for (int i = 1; i < numTests; i++) {
maxLen = 32 * numWords;
int seqlensrc = Math.max(rand.nextInt(maxLen), minLen);
int seqlendst = Math.max(rand.nextInt(maxLen), minLen);
dnaCoderCopyBases(seqlensrc, seqlendst, rand);
Gpr.showMarkStderr(i, 10000);
}
}
System.err.print("\nDone.\n");
}
@Test
public void test_17_overlap_rand() {
Gpr.debug("Test");
int numTests = 10;
int minLen = 10;
System.err.print("\nOverlap random test:\n");
Random rand = new Random(20100812);
for (int maxlen = 10, i = 1; maxlen < 10000; maxlen += 10, i++) {
for (int it = 0; it < numTests; it++)
overlapRandTest(maxlen, minLen, rand);
Gpr.showMarkStderr(i, 1);
}
System.err.print("\nDone.\n");
}
@Test
public void test_18_DnaCoder_score_rand() {
Gpr.debug("Test");
int numTests = 10;
int minLen = 10;
System.err.println("DnaCoder.score test:");
DnaCoder dnaCoder = DnaCoder.get();
DnaSubsequenceComparator comparator = new DnaSubsequenceComparator(true);
Random rand = new Random(20100812);
for (int maxlen = 10, i = 1; maxlen < 10000; maxlen += 10, i++) {
for (int it = 0; it < numTests; it++)
scoreRandTest(maxlen, minLen, rand, dnaCoder, comparator);
Gpr.showMarkStderr(i, 1);
}
System.err.println("Done.");
}
@Test
public void test_19_score_threshold_rand() {
Gpr.debug("Test");
int thresholdMax = 5;
int changesMax = 6;
int minLen = thresholdMax + changesMax + 10; // Big enough so that we don't run into problems creating random changes
System.err.print("\nScore (threshold) random test:\n");
Random rand = new Random(20100821);
for (int maxLen = 10, i = 1; maxLen < 10000; maxLen += 10, i++) {
for (int threshold = 0; threshold < thresholdMax; threshold++) {
for (int changes = 0; changes < changesMax; changes++) {
scoreRandTestThreshold(maxLen, minLen, rand, threshold, changes);
}
}
Gpr.showMarkStderr(i, 1);
}
System.err.print("\nDone.\n");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy