All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.pipelineTests.EvaluateGVCFWithIBDTest Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome.pipelineTests;

import com.google.common.collect.*;
import net.maizegenetics.util.Tuple;
import org.junit.*;

import java.io.IOException;
import java.util.Map;

/**
 * Evaluate GVCF calls in regions with IBD to the reference genome.
 * Requires known list of IBD regions.  The scoring is effectively
 * the number of SNPs scored divided by the total number of bases scored
 * across the regions.
 *
 */
public class EvaluateGVCFWithIBDTest {

    //private static String workingDir ="/Users/sej65/Documents/Hackathon_Jun17/unitTestFiles/";
    private static String workingDir = "/Users/edbuckler/Box Sync/Hackhaton_files_June_2017/W22_unit_test_files/";

    private static String gvcfFile = workingDir + "W22_haplotype_caller_output.g.vcf.gz";
    private static String referenceGenomeFile = workingDir + "Zea_mays.AGPv4.dna.toplevel.fa.gz";
    private static SimpleGVCFReader simpleGVCFReader;
    private static int minDepth = 3;

    @BeforeClass
    public static void setUpClass() throws Exception {
        System.out.println("EvaluateGVCFbyKnownSNPTest.setUpClass");
        System.out.println("Reading GVCF file: " + gvcfFile);
        simpleGVCFReader = new SimpleGVCFReader(gvcfFile, referenceGenomeFile, 100000000, minDepth);

    }

    @Test
    public void testIBDinGVCF() {
        RangeSet> ibdRegions= ImmutableRangeSet.>builder()
                .add(Range.closed(new Tuple<>(8,122_000_000),new Tuple<>(8,142_000_000)))
                .build();

        testIBDinGVCF(ibdRegions,false, true);
//        RangeSet> ibdRegions2= ImmutableRangeSet.>builder()
//                .add(Range.closed(new Tuple<>(8,12_000_000),new Tuple<>(8,32_000_000)))
//                .build();

        //testIBDinGVCF(ibdRegions2,false, true);

    }

    private void testIBDinGVCF(RangeSet> ibdRegions, boolean errorReport, boolean regionReport) {
        int snpCount=0, rangeRefCount=0;
        for (Range> ibdRegion : ibdRegions.asRanges()) {
            RangeMap, String> ibdMap=simpleGVCFReader.getSubMap(ibdRegion);
            for (Map.Entry>,String> rangeCall : ibdMap.asMapOfRanges().entrySet()) {
                if(rangeCall.getValue().equals("REFRANGE")) {
                    rangeRefCount+=rangeCall.getKey().upperEndpoint().getY()-rangeCall.getKey().lowerEndpoint().getY();
                } else {
                    String call=rangeCall.getValue();
                    if(call.length()>1) continue;
                    if(call.equals(simpleGVCFReader.reference(rangeCall.getKey().upperEndpoint().getX(),rangeCall.getKey().upperEndpoint().getY()))) continue;
                    snpCount++;
                    System.out.println("rangeCall = " + rangeCall.getKey()+"->"+rangeCall.getValue());
                }
            }
        }
        System.out.println("snpCount = " + snpCount);
        System.out.println("rangeRefCount = " + rangeRefCount);
        System.out.println("snpCount/rangeRefCount = " + (double)snpCount/rangeRefCount);
        System.out.println("TODO These errors rates will drop more if the gvcf is pre-filtered for bad SNPs and duplicated regions.");


    }

    }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy