net.maizegenetics.analysis.gbs.PolymorphismFinder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tassel Show documentation
Show all versions of tassel Show documentation
TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage
disequilibrium.
/*
* PolymorphismFinder
*/
package net.maizegenetics.analysis.gbs;
import cern.colt.GenericSorting;
import cern.colt.Swapper;
import cern.colt.function.IntComparator;
import java.util.ArrayList;
import java.util.Arrays;
import net.maizegenetics.dna.tag.ReadsByTaxa;
/**
*
* @author fl262
*/
public class PolymorphismFinder {
ReadsByTaxa rbt;
long[] lookup;
int[] indexOfRbt;
public PolymorphismFinder(ReadsByTaxa rbt) {
this.rbt = rbt;
initialize();
}
public void initialize() {
lookup = new long[rbt.haplotypeNum * 2];
indexOfRbt = new int[rbt.haplotypeNum * 2];
for (int i = 0; i < rbt.haplotypeNum; i++) {
for (int j = 0; j < 2; j++) {
int dex = 2 * i + j;
lookup[dex] = rbt.haplotype[j][i];
indexOfRbt[dex] = i;
}
}
GenericSorting.quickSort(0, lookup.length, comp, swapper);
this.reduceDuplicates();
System.out.println("Initialization is done");
}
public ArrayList findOneMismatch(long[] queryLongSeq) {
ArrayList hitIndex = new ArrayList();
for (int i = 0; i < queryLongSeq.length; i++) {
int hit = Arrays.binarySearch(lookup, queryLongSeq[i]);
if (hit < 0) {
continue;
}
while ((hit > 0) && (queryLongSeq[i] == lookup[hit - 1])) {
hit--;
}
while ((hit < lookup.length) && (lookup[hit] == queryLongSeq[i])) {
int count = getMismatchInLong(queryLongSeq[0], rbt.haplotype[0][indexOfRbt[hit]]) + getMismatchInLong(queryLongSeq[1], rbt.haplotype[1][indexOfRbt[hit]]);
if (count == 1) {
hitIndex.add(indexOfRbt[hit]);
}
hit++;
}
}
return hitIndex;
}
public byte getMismatchInLong(long longSeq1, long longSeq2) {
long mask = 3;
long diff = longSeq1 ^ longSeq2;
byte count = 0;
for (int i = 0; i < 32; i++) {
if ((diff & mask) > 0) {
count++;
}
diff = diff >> 2;
}
return count;
}
private void reduceDuplicates() {
int start = 0, end = -1, duplicated = 0;
long currHap = lookup[0];
for (int i = 0; i < lookup.length; i++) {
if (lookup[i] == currHap) {
end = i;
} else {
if (((end - start) > 1000)) {
//System.out.println(BaseEncoder.getSequenceFromInt(currHap)+" "+(end-start));
for (int j = start; j <= end; j++) {
lookup[j] = Long.MAX_VALUE;
duplicated++;
}
}
currHap = lookup[i];
start = end = i;
}
}
GenericSorting.quickSort(0, lookup.length, comp, swapper);
long[] newlookup = new long[lookup.length - duplicated];
int[] newindexOfRbt = new int[lookup.length - duplicated];
System.arraycopy(lookup, 0, newlookup, 0, lookup.length - duplicated);
System.arraycopy(indexOfRbt, 0, newindexOfRbt, 0, indexOfRbt.length - duplicated);
System.out.println("Old Lookup Size:" + lookup.length + " new size:" + newlookup.length);
lookup = newlookup;
indexOfRbt = newindexOfRbt;
}
Swapper swapper = new Swapper() {
public void swap(int a, int b) {
long tl;
int ti, tb;
tl = lookup[a];
lookup[a] = lookup[b];
lookup[b] = tl;
ti = indexOfRbt[a];
indexOfRbt[a] = indexOfRbt[b];
indexOfRbt[b] = ti;
}
};
IntComparator comp = new IntComparator() {
public int compare(int a, int b) {
if (lookup[a] < lookup[b]) {
return -1;
} else if (lookup[a] > lookup[b]) {
return 1;
} else {
return 0;
}
}
};
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy