net.maizegenetics.pangenome.pipelineTests.EvaluateGVCFWithIBDTest Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome.pipelineTests;
import com.google.common.collect.*;
import net.maizegenetics.util.Tuple;
import org.junit.*;
import java.io.IOException;
import java.util.Map;
/**
* Evaluate GVCF calls in regions with IBD to the reference genome.
* Requires known list of IBD regions. The scoring is effectively
* the number of SNPs scored divided by the total number of bases scored
* across the regions.
*
*/
public class EvaluateGVCFWithIBDTest {
//private static String workingDir ="/Users/sej65/Documents/Hackathon_Jun17/unitTestFiles/";
private static String workingDir = "/Users/edbuckler/Box Sync/Hackhaton_files_June_2017/W22_unit_test_files/";
private static String gvcfFile = workingDir + "W22_haplotype_caller_output.g.vcf.gz";
private static String referenceGenomeFile = workingDir + "Zea_mays.AGPv4.dna.toplevel.fa.gz";
private static SimpleGVCFReader simpleGVCFReader;
private static int minDepth = 3;
@BeforeClass
public static void setUpClass() throws Exception {
System.out.println("EvaluateGVCFbyKnownSNPTest.setUpClass");
System.out.println("Reading GVCF file: " + gvcfFile);
simpleGVCFReader = new SimpleGVCFReader(gvcfFile, referenceGenomeFile, 100000000, minDepth);
}
@Test
public void testIBDinGVCF() {
RangeSet> ibdRegions= ImmutableRangeSet.>builder()
.add(Range.closed(new Tuple<>(8,122_000_000),new Tuple<>(8,142_000_000)))
.build();
testIBDinGVCF(ibdRegions,false, true);
// RangeSet> ibdRegions2= ImmutableRangeSet.>builder()
// .add(Range.closed(new Tuple<>(8,12_000_000),new Tuple<>(8,32_000_000)))
// .build();
//testIBDinGVCF(ibdRegions2,false, true);
}
private void testIBDinGVCF(RangeSet> ibdRegions, boolean errorReport, boolean regionReport) {
int snpCount=0, rangeRefCount=0;
for (Range> ibdRegion : ibdRegions.asRanges()) {
RangeMap, String> ibdMap=simpleGVCFReader.getSubMap(ibdRegion);
for (Map.Entry>,String> rangeCall : ibdMap.asMapOfRanges().entrySet()) {
if(rangeCall.getValue().equals("REFRANGE")) {
rangeRefCount+=rangeCall.getKey().upperEndpoint().getY()-rangeCall.getKey().lowerEndpoint().getY();
} else {
String call=rangeCall.getValue();
if(call.length()>1) continue;
if(call.equals(simpleGVCFReader.reference(rangeCall.getKey().upperEndpoint().getX(),rangeCall.getKey().upperEndpoint().getY()))) continue;
snpCount++;
System.out.println("rangeCall = " + rangeCall.getKey()+"->"+rangeCall.getValue());
}
}
}
System.out.println("snpCount = " + snpCount);
System.out.println("rangeRefCount = " + rangeRefCount);
System.out.println("snpCount/rangeRefCount = " + (double)snpCount/rangeRefCount);
System.out.println("TODO These errors rates will drop more if the gvcf is pre-filtered for bad SNPs and duplicated regions.");
}
}