All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.processAssemblyGenomes.AssemblyProcessingUtils Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
/**
 * 
 */
package net.maizegenetics.pangenome.processAssemblyGenomes;

import java.io.*;
import java.sql.Connection;
import java.sql.ResultSet;
import java.util.*;
import java.util.stream.Collector;
import java.util.stream.Collectors;

import com.google.common.collect.*;
import htsjdk.variant.variantcontext.*;
import net.maizegenetics.dna.map.GenomeSequence;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.pangenome.api.CreateGraphUtils;
import net.maizegenetics.pangenome.db_loading.*;
import org.apache.log4j.Logger;

import net.maizegenetics.analysis.gbs.neobio.BasicScoringScheme;
import net.maizegenetics.analysis.gbs.neobio.IncompatibleScoringSchemeException;
import net.maizegenetics.analysis.gbs.neobio.InvalidSequenceException;
import net.maizegenetics.analysis.gbs.neobio.PairwiseAlignment;
import net.maizegenetics.analysis.gbs.neobio.PairwiseAlignmentAlgorithm;
import net.maizegenetics.analysis.gbs.neobio.ScoringScheme;
import net.maizegenetics.analysis.gbs.neobio.SmithWaterman;
import net.maizegenetics.analysis.gbs.repgen.AlignmentInfo;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.pangenome.api.ReferenceRange;
import net.maizegenetics.util.Tuple;
import net.maizegenetics.util.Utils;

/**
 * This class contains methods useful for processing assembly haplotypes.
 * 
 * @author lcj34
 *
 */
public class AssemblyProcessingUtils {

    private static final Logger myLogger = Logger.getLogger(AssemblyProcessingUtils.class);    

    /**
     * Create range map from list of entries from a Mummer coordinates file 
     * 
     * @param coordsList  List containing tab-delimited strings from a Mummer coordinates file
     * @param chrom String with chromosome name 
     * @return  RangeMap:  A rangemap of the coordinate values form the mummer coords file list
     */
    public static RangeMap> getCoordsRangeMap(List coordsList, String chrom) {
        // Create RangeSet        
        RangeMap> coordsRangeMap = TreeRangeMap.create();
        Chromosome chr = Chromosome.instance(chrom);
        for (String entry : coordsList) {
            //  S1 E1 S2 E2 len1 len2 %id tag1 tag2
            int tabIndex1 = entry.indexOf("\t");
            int tabIndex2 = entry.indexOf("\t",tabIndex1+1);
            int tabIndex3 = entry.indexOf("\t",tabIndex2+1);
            int tabIndex4 = entry.indexOf("\t",tabIndex3+1);
           
            int refStart;
            int refEnd;
            refStart = Integer.parseInt(entry.substring(0,tabIndex1));
            refEnd = Integer.parseInt(entry.substring(tabIndex1+1,tabIndex2));
            Range refPositionRange = Range.closed(Position.of(chr,refStart), Position.of(chr, refEnd));
            int asmStart = Integer.parseInt(entry.substring(tabIndex2+1,tabIndex3));
            int asmEnd = Integer.parseInt(entry.substring(tabIndex3+1,tabIndex4));
            
            if (asmStart > asmEnd) { // asm could be ascending or descending
                int temp = asmEnd;
                asmEnd = asmStart;
                asmStart = temp;                       
            }   
            
            coordsRangeMap.put(refPositionRange, Arrays.asList(Position.of(chr,asmStart),Position.of(chr,asmEnd)));
                     
        }
        return coordsRangeMap;
    }
    

    /**
     * Returns the value for a specific column in a tab-delimited string
     * @param mline
     * @param entryColumn - the column number (1-based) of the entry the caller wants
     * @param totalColumns - total number of tab-delimited columns in the line
     * @return String containing the column contents
     */
    public static String getEntryFromTabDelimitedLine(String mline, int entryColumn, int totalColumns) {
        int[] tabPos = new int[totalColumns-1];
        int len = mline.length();
        int tabIndex = 0;
        for (int idx = 0; (tabIndex < totalColumns) && (idx < len); idx++) {
            if (mline.charAt(idx) == '\t') {
                tabPos[tabIndex++] = idx;
            }
        }
        int begin;
        if (entryColumn == 1) {
            begin = 0;
        } else {
            begin = tabPos[entryColumn-2] + 1; // -2 for list is 0-based, and 1st tab is after first column
        }
        int end;
        if (entryColumn == totalColumns) {
            end = mline.length();
        } else {
            end = tabPos[entryColumn-1];
        }
        return mline.substring(begin,end);
    }
    
    
    /**
     * Find the start/end coordinates from a tab-delmimited Mummer4 coords file entry
     * @param entry:  Line from a mummer coords file
     * @param ref Boolean: if true, get ref start end.  Otherwise, get assembly start/end
     * @return
     */
    public static Tuple getStartEndCoordinates(String entry, boolean ref) {
        // Finds the start and end coordinate position from the input file.
        // For ref, these occur in the 1st and 2nd fields of the tab-delimited string
        // For assembly, these occur in the 3rd and 4th fields of the tab-delimited string.
        int tabIndex1 = entry.indexOf("\t");
        int tabIndex2 = entry.indexOf("\t",tabIndex1+1);
        int tabIndex3 = entry.indexOf("\t",tabIndex2+1);
        int tabIndex4 = entry.indexOf("\t",tabIndex3+1);
        int start;
        int end;
        if (ref) {
            start = Integer.parseInt(entry.substring(0,tabIndex1));
            end = Integer.parseInt(entry.substring(tabIndex1+1,tabIndex2));
        } else {
            start = Integer.parseInt(entry.substring(tabIndex2+1,tabIndex3));
            end = Integer.parseInt(entry.substring(tabIndex3+1,tabIndex4));
        }
        
        return new Tuple(start,end);
    }
    
    /**
     * Calculates distance between 2 sets of mummer coords file entries
     * This is called on coordinates that are both either ascending (start < end)
     * or both descending (start > end) so "sign" of entries is not checked here.
     * 
     * @param prev
     * @param current
     * @return
     */
    public static double calculateCoordDistance(Tuple prev, Tuple current) {
        int endStartDiff = Math.abs(prev.y-current.x);
        double distance = (double)endStartDiff/prev.y;
        
        return distance;
    }
    /**
     * Verifies if the positions from a Mummer4 snp file fall within the range map
     * of reference and assembly positions created from the Mummer4 coordinates files.
     * @param mline
     * @return
     */
    public static boolean checkSnpEntryInRange(String mline, RangeMap> coordsRangeMap, String chrom) {
        int totalSnpColumns = 12; // total number of columns in tab-delimited show-snps file WITHOUT -C option
        int refPosCol = 1; // column containing reference position in mummer4 SNP file
        int asmPosCol = 4; // column containing assembly position in mummer4 SNP file

        // get snp positions for ref and assembly
        int refPos = Integer.parseInt(getEntryFromTabDelimitedLine(mline, refPosCol, totalSnpColumns));
        int asmPos = Integer.parseInt(getEntryFromTabDelimitedLine(mline, asmPosCol, totalSnpColumns));

        Position refAsPos = Position.of(chrom, refPos);
        RangeMap> rangeWithPosMap = coordsRangeMap.subRangeMap(Range.closed(refAsPos, refAsPos));
        if ( rangeWithPosMap.asMapOfRanges().isEmpty()) return false; // reference position not included in map

        List asmCoords = rangeWithPosMap.get(refAsPos);
        if (asmCoords == null) return false; // ref position not in range map
        // If asm position falls in range that mapped to this reference position, return true
        if (asmPos >= asmCoords.get(0).getPosition() && asmPos <= asmCoords.get(1).getPosition() ){
            return true; 
        }
            
        return false; // asm coordinate does not map to reference coordinate alignments that are stored.
    }       

    
    public static List findVCListForAnchor(RangeMap positionRangeToVariantContextMap, Position refStart, Position refEnd) {
        List parsedVCList = new ArrayList();
        RangeMap anchorSubRangeMap
              = positionRangeToVariantContextMap.subRangeMap(Range.closed(refStart,refEnd));
        
        anchorSubRangeMap.asMapOfRanges().values().stream().forEach(vc -> {
            parsedVCList.add(vc);
        });
        
        return parsedVCList;
    }

    /**
     * This method takes a RangeSet of integers, and a single range.  It finds all
     * the ranges in the set that intersect the targetRange.
     * Calculate both the number of bases from the targetRange that are represented
     * in the rangeSet, and the percentage of the bases represented.  Return a Tuple with
     * this information.
     * 
     * @param rangeSet
     * @param targetRange
     * @return
     */
    public static Tuple getRegionCoverage(RangeSet rangeSet, Range targetRange) {
        
        int rangeSize = (targetRange.upperEndpoint() - targetRange.lowerEndpoint()) +1;
        
        // This just gives overlaps.
        // Not looking for the exact ranges in the set, but rather the number of positions
        // from the targetRange that are included in the rangeSet
        RangeSet numberSubRangeSet = rangeSet.subRangeSet(targetRange);
        int numBases = 0;
        for (Range range : numberSubRangeSet.asRanges()) {            
            numBases += ((range.upperEndpoint() - range.lowerEndpoint()) +1);
        }
        
        double percentRangeCovered = ((double)(numBases)/rangeSize) * 100 ;  
        return new Tuple(numBases,percentRangeCovered);
    }

    /**
     * Method to parse the Mummer SNP file into a rangemap
     * The first String in the tuple is for the reference call
     * The second String is for the assembly call
     * @param fileName
     * @param chromosome
     * @return
     */
    public static RangeMap> parseMummerSNPFile(String fileName, String chromosome) {
        RangeMap> snpMap = TreeRangeMap.create();
        
        try(BufferedReader reader = Utils.getBufferedReader(fileName)){
            String currentLine = reader.readLine();
            int tabIndex1 = currentLine.indexOf("\t");
            int tabIndex2 = currentLine.indexOf("\t",tabIndex1+1);
            int tabIndex3 = currentLine.indexOf("\t",tabIndex2+1);
            int tabIndex4 = currentLine.indexOf("\t",tabIndex3+1);
            int prevRefPos = Integer.parseInt(currentLine.substring(0,tabIndex1));
            int prevRefPosStart = prevRefPos;
            int prevAsmPos = Integer.parseInt(currentLine.substring(tabIndex3+1,tabIndex4));
            String currentRefCall = removeMissing(currentLine.substring(tabIndex1+1,tabIndex2));
            String currentCall = removeMissing(currentLine.substring(tabIndex2+1,tabIndex3));

            while((currentLine = reader.readLine()) != null) {
                tabIndex1 = currentLine.indexOf("\t");
                tabIndex2 = currentLine.indexOf("\t",tabIndex1+1);
                tabIndex3 = currentLine.indexOf("\t",tabIndex2+1);
                tabIndex4 = currentLine.indexOf("\t",tabIndex3+1);
                int currRefPos = Integer.parseInt(currentLine.substring(0,tabIndex1));
                int currAsmPos = Integer.parseInt(currentLine.substring(tabIndex3+1,tabIndex4));

                String refAllele = removeMissing(currentLine.substring(tabIndex1+1,tabIndex2));
                String altAllele = removeMissing(currentLine.substring(tabIndex2+1,tabIndex3));

                //Need to check this as we can have overlapping
                if((currRefPos == prevRefPos && refAllele.length() == 0) || (currAsmPos == prevAsmPos && altAllele.length() == 0)) {
                    currentRefCall += refAllele;
                    currentCall += altAllele;
                }
                else {
                    //Add the call to the snpMap
                    snpMap.put(Range.closed(Position.of(chromosome,prevRefPosStart),Position.of(chromosome,prevRefPos)),new Tuple(currentRefCall,currentCall));
                    prevRefPosStart = currRefPos;
                    currentRefCall = refAllele;
                    currentCall = altAllele;
                }

                prevRefPos = currRefPos;
                prevAsmPos = currAsmPos;
            }
            snpMap.put(Range.closed(Position.of(chromosome,prevRefPosStart),Position.of(chromosome,prevRefPos)),new Tuple(currentRefCall,currentCall));

        }
        catch(Exception e) {
            throw new IllegalStateException(e);
        }
        return snpMap;
    }

    private static String removeMissing(String call) {
        if(call.equals(".")) {
            return "";
        }
        else {
            return call;
        }
    }

    /**
     * Method to parse out the reference coordinates into a map which along with the SNP data can then be used to create Variants.
     * @param coordFile
     * @param chromosome
     * @return
     */
    public static Map,List> parseCoordinateRegions(String coordFile, String chromosome) {
        Map,List> mappedRegions = new LinkedHashMap<>();

        try(BufferedReader reader = Utils.getBufferedReader(coordFile)) {
            String currentLine = "";

            while((currentLine = reader.readLine()) != null) {
                int tabIndex1 = currentLine.indexOf("\t");
                int tabIndex2 = currentLine.indexOf("\t",tabIndex1+1);
                int tabIndex3 = currentLine.indexOf("\t",tabIndex2+1);
                int tabIndex4 = currentLine.indexOf("\t",tabIndex3+1);

                int refStart = Integer.parseInt(currentLine.substring(0,tabIndex1));
                int refEnd = Integer.parseInt(currentLine.substring(tabIndex1+1,tabIndex2));

                int asmStart = Integer.parseInt(currentLine.substring(tabIndex2+1,tabIndex3));
                int asmEnd = Integer.parseInt(currentLine.substring(tabIndex3+1,tabIndex4));


                mappedRegions.put(Range.closed(Position.of(chromosome, refStart),Position.of(chromosome, refEnd)),
                        Arrays.asList(Position.of(chromosome,asmStart),Position.of(chromosome,asmEnd)));
            }
        }
        catch(Exception e) {
            throw new IllegalStateException(e);
        }

        return mappedRegions;
    }


    /**
     * Creates a RangeMap of asm positions from the given reference range map,
     * using lower reference position as the value.
     * 
     * @param refCoords
     * @return
     */
    public static RangeMap createAsmCoordinatesRangeMap (Map,List> refCoords) {
        RangeMap asmMap = TreeRangeMap.create();
        
        
        refCoords.entrySet().stream().forEach(entry -> {
            List asmCoords = entry.getValue();
            Position refPos = entry.getKey().lowerEndpoint();
            
            Position asmCurrentStart = asmCoords.get(0);
            Position asmCurrentEnd = asmCoords.get(1);
            boolean currentDirection = (asmCurrentStart.compareTo(asmCurrentEnd) <= 0 );
            
            Range asmRange = (currentDirection)? Range.closed(asmCurrentStart,asmCurrentEnd) : Range.closed(asmCurrentEnd,asmCurrentStart);
            // Storing ref staring position as the value for the asm range entry
            asmMap.put(asmRange, refPos);
        });
        
        return asmMap;
    }
    
    /**
     * Given a map of ranges and a range, calculate the number
     * of positions within the given range that are represented
     * in the RangeMap.
     * 
     * @param asmCoveredMap
     * @param asmRange
     * @return
     */
    public static int calculateRegionCovered(RangeMap asmCoveredMap, Range asmRange) {
        
        Map, Position> asMap = asmCoveredMap.asMapOfRanges();
        
        int totalPositions = 0;
        for (Map.Entry, Position> entry : asMap.entrySet()) {
            int upperPos = entry.getKey().upperEndpoint().getPosition();
            int lowerPos = entry.getKey().lowerEndpoint().getPosition();
            int count = upperPos - lowerPos;
            totalPositions += count;
        }

        return totalPositions;
    }
    
    /**
     * Test method to try to merge overlapping coordinates.
     * We thought Show-SNPs would recall the SNPs in the delta file based on this information, but this was not the case.
     * @param coordFile
     * @param chromosome
     * @return
     */
    @Deprecated
    public static Map,List> mergeCoords(String coordFile, String chromosome) {
        Map,List> mappedRegions = new LinkedHashMap<>();
        RangeSet coveredRefRanges = TreeRangeSet.create();
        try(BufferedReader reader = Utils.getBufferedReader(coordFile)) {
            String currentLine = "";

            while((currentLine = reader.readLine()) != null) {
                int tabIndex1 = currentLine.indexOf("\t");
                int tabIndex2 = currentLine.indexOf("\t",tabIndex1+1);
                int tabIndex3 = currentLine.indexOf("\t",tabIndex2+1);
                int tabIndex4 = currentLine.indexOf("\t",tabIndex3+1);

                int refStart = Integer.parseInt(currentLine.substring(0,tabIndex1));
                int refEnd = Integer.parseInt(currentLine.substring(tabIndex1+1,tabIndex2));

                int asmStart = Integer.parseInt(currentLine.substring(tabIndex2+1,tabIndex3));
                int asmEnd = Integer.parseInt(currentLine.substring(tabIndex3+1,tabIndex4));

                boolean updatedRange = false;
                for(int i = refStart; i <= refEnd; i++) {
                    if(coveredRefRanges.contains(Position.of(chromosome, i))) {
                        //We need to merge this one with the the range which overlaps
                        Range overlappingRefRange = coveredRefRanges.rangeContaining(Position.of(chromosome,i));
                        List asmCoordinates = mappedRegions.get(overlappingRefRange);
                        myLogger.debug("Merging:"+overlappingRefRange);
                        if((asmCoordinates.get(0).getPosition() <= asmCoordinates.get(1).getPosition() && asmStart <= asmEnd) ||
                                (asmCoordinates.get(0).getPosition() >= asmCoordinates.get(1).getPosition() && asmStart >= asmEnd) ) {

                            if(!overlappingRefRange.encloses(Range.closed(Position.of(chromosome, refStart),Position.of(chromosome, refEnd)))) {

                                Range newRefRange = Range.closed(overlappingRefRange.lowerEndpoint(), Position.of(chromosome, refEnd));
                                List newASMRange = Arrays.asList(asmCoordinates.get(0), Position.of(chromosome, asmEnd));

                                mappedRegions.remove(overlappingRefRange);
                                mappedRegions.put(newRefRange, newASMRange);
                                coveredRefRanges.add(newRefRange);
                                updatedRange = true;
                            }
                        }
                        else{
                            //ignore as the assembly coordinates do not agree on direction
                        }
                        break;
                    }
                }

                if(!updatedRange) {
                    coveredRefRanges.add(Range.closed(Position.of(chromosome, refStart),Position.of(chromosome, refEnd)));
                    mappedRegions.put(Range.closed(Position.of(chromosome, refStart),Position.of(chromosome, refEnd)),
                            Arrays.asList(Position.of(chromosome,asmStart),Position.of(chromosome,asmEnd)));
                }
            }
        }
        catch(Exception e) {
            throw new IllegalStateException(e);
        }
        return mappedRegions;
    }

    /**
     * Test method to resize the coordinate files so they are not overlapping
     * @param coordFile
     * @param chromosome
     * @return
     */
    public static Map,List> resizeCoords(String coordFile, String chromosome) {
        Map,List> mappedRegions = new LinkedHashMap<>();
        RangeSet coveredRefRanges = TreeRangeSet.create();
        try(BufferedReader reader = Utils.getBufferedReader(coordFile)) {
            String currentLine = "";

            while((currentLine = reader.readLine()) != null) {
                int tabIndex1 = currentLine.indexOf("\t");
                int tabIndex2 = currentLine.indexOf("\t",tabIndex1+1);
                int tabIndex3 = currentLine.indexOf("\t",tabIndex2+1);
                int tabIndex4 = currentLine.indexOf("\t",tabIndex3+1);

                int refStart = Integer.parseInt(currentLine.substring(0,tabIndex1));
                int refEnd = Integer.parseInt(currentLine.substring(tabIndex1+1,tabIndex2));

                int asmStart = Integer.parseInt(currentLine.substring(tabIndex2+1,tabIndex3));
                int asmEnd = Integer.parseInt(currentLine.substring(tabIndex3+1,tabIndex4));

                boolean updatedRange = false;
                for(int i = refStart; i <= refEnd; i++) {
                    if(coveredRefRanges.contains(Position.of(chromosome, i))) {
                        //We have an overlap so we need to scale this referenceRange to the part which is not overlapping


                        //We need to resize this one based on the range which overlaps
                        Range overlappingRefRange = coveredRefRanges.rangeContaining(Position.of(chromosome,i));
                        List asmCoordinates = mappedRegions.get(overlappingRefRange);

                        myLogger.debug("Resizing:"+overlappingRefRange);
                        if(!overlappingRefRange.encloses(Range.closed(Position.of(chromosome, refStart),Position.of(chromosome, refEnd)))) {
                            int lengthOfOverlap = overlappingRefRange.upperEndpoint().getPosition() - refStart +1;

                            Range newRefRange = Range.closed(Position.of(chromosome, refStart + lengthOfOverlap), Position.of(chromosome, refEnd));
                            List newASMRange = Arrays.asList(Position.of(chromosome,asmStart + lengthOfOverlap), Position.of(chromosome, asmEnd));

                            mappedRegions.put(newRefRange, newASMRange);
                            coveredRefRanges.add(newRefRange);
                            updatedRange = true;
                        }
                        break;
                    }
                }

                if(!updatedRange) {
                    coveredRefRanges.add(Range.closed(Position.of(chromosome, refStart),Position.of(chromosome, refEnd)));
                    mappedRegions.put(Range.closed(Position.of(chromosome, refStart),Position.of(chromosome, refEnd)),
                            Arrays.asList(Position.of(chromosome,asmStart),Position.of(chromosome,asmEnd)));
                }
            }
        }
        catch(Exception e) {
            throw new IllegalStateException(e);
        }
        return mappedRegions;
    }

    /**
     * Utility method to export out the merged regions
     * @param mergedCoords
     * @param fileName
     */
    @Deprecated
    public static void exportMergedRegions(Map,List> mergedCoords, String fileName) {
        try(BufferedWriter writer = Utils.getBufferedWriter(fileName)) {
            Set> refPositionRanged = mergedCoords.keySet();
            for(Range currentRefPositionRange : refPositionRanged) {

                List asmCoordinateRange = mergedCoords.get(currentRefPositionRange);

                writer.write(""+currentRefPositionRange.lowerEndpoint().getPosition());
                writer.write("\t");
                writer.write(""+currentRefPositionRange.upperEndpoint().getPosition());
                writer.write("\t");
                writer.write(""+asmCoordinateRange.get(0).getPosition());
                writer.write("\t");
                writer.write(""+asmCoordinateRange.get(1).getPosition());
                writer.write("\t");
                writer.write(""+(currentRefPositionRange.upperEndpoint().getPosition()-currentRefPositionRange.lowerEndpoint().getPosition()));
                writer.write("\t");
                writer.write(""+(asmCoordinateRange.get(1).getPosition()-asmCoordinateRange.get(0).getPosition()));
                writer.write("\t9\tchr9\n");

            }
        }
        catch(Exception e) {
            throw new IllegalStateException(e);
        }
    }


    /**
     * Method to fill in the unmapped regions coming from nucmer.  This method will create multi-bp indels between the mapped regions which can then be added to the SNP list for processing into Variants.
     * @param coordinates
     * @param refSequence
     * @param asmSequence
     * @return
     */
    public static RangeMap> setupIndelVariants(Map,List> coordinates, GenomeSequence refSequence, GenomeSequence asmSequence) {
        RangeMap> snpMap = TreeRangeMap.create();
        List> referencePositionCoordinates = coordinates.keySet().stream().collect(Collectors.toList());
        
        // Create RangeMap of Assembly positions for verifying which have already been covered        
        RangeMap asmRanges = createAsmCoordinatesRangeMap (coordinates);
        
        Range previousRange = referencePositionCoordinates.get(0);
        //Set the direction if increasing direction = true
        boolean direction = (coordinates.get(previousRange).get(0).compareTo(coordinates.get(previousRange).get(1)) <= 0 );
        int asmRangeCounter = 0;
        int lengthOfOverlap = 0;
        int numAlreadyCovered = 0;
        int totalPossibleIndels = 0;

        int refRangeOverlaps = 0;
        int directionChanged = 0;
            
        for(int rangeIndex = 1; rangeIndex < referencePositionCoordinates.size(); rangeIndex++) {
            Range currentRange = referencePositionCoordinates.get(rangeIndex);

            Position previousEnd = previousRange.upperEndpoint();
            Position currentStart = currentRange.lowerEndpoint();

            Position currentEnd = currentRange.upperEndpoint();

            Position asmPreviousStart = coordinates.get(previousRange).get(0);
            Position asmPreviousEnd = coordinates.get(previousRange).get(1);
            Position asmCurrentStart = coordinates.get(currentRange).get(0);
            Position asmCurrentEnd = coordinates.get(currentRange).get(1);

            boolean currentDirection = (asmCurrentStart.compareTo(asmCurrentEnd) <= 0 );
            totalPossibleIndels++;
            
            if(currentDirection != direction) {
                //this means we swapped direction
                myLogger.debug("AssemblyProcessingUtils setupIndelVariants: Not creating indel between mapped regions, Assembly direction switching:"+createUserReadableStringPositionRanges(previousRange,currentRange));
                direction = (coordinates.get(currentRange).get(0).compareTo(coordinates.get(currentRange).get(1)) <= 0 );
                previousRange = currentRange;
                directionChanged++;
                continue;
            }
            
            if(currentEnd.getPosition() < previousEnd.getPosition()) {
                //Skip these as the whole mappings reference positions are covered by the previous mapping
                refRangeOverlaps++;
                myLogger.debug("AssemblyProcessingUtils setupIndelVariants: Not creating indel between mapped regions, Reference ranges overlap:"+createUserReadableStringPositionRanges(previousRange,currentRange));
                continue;
            }

            Range currentASMRange = (currentDirection)? Range.closed(asmCurrentStart,asmCurrentEnd) : Range.closed(asmCurrentEnd,asmCurrentStart);
            if(currentASMRange.contains(asmPreviousStart) || currentASMRange.contains(asmPreviousEnd)) {

                myLogger.debug("AssemblyProcessingUtils setupIndelVariants: Not creating indel between mapped regions, Assembly ranges overlap:"+createUserReadableStringPositionRanges(previousRange,currentRange));
                asmRangeCounter++;
                lengthOfOverlap+= Math.abs(asmCurrentStart.getPosition() - asmPreviousEnd.getPosition());
                previousRange = currentRange;
                continue;
            }

            // see if range is already represented in part of the map.
            // This attempts to remove long indels.

            Range asmRange = null;
            Chromosome chrom = asmCurrentStart.getChromosome();
            int prevEnd = asmPreviousEnd.getPosition();
            int curStart = asmCurrentStart.getPosition();
            if ((asmPreviousEnd.compareTo(asmCurrentStart) > 0)) {
                if ((curStart+1) == prevEnd) {
                    asmRange = null; // there is no gap between the ranges
                } else {
                    asmRange = Range.closed(Position.of(chrom, asmCurrentStart.getPosition()+1), Position.of(chrom, asmPreviousEnd.getPosition()-1));
                }               
            } else if (asmPreviousEnd.compareTo(asmCurrentStart) < 0) {
                if ((prevEnd + 1) == curStart) {
                    asmRange = null;
                } else {
                    asmRange = Range.closed(Position.of(chrom, asmPreviousEnd.getPosition()+1), Position.of(chrom, asmCurrentStart.getPosition()-1));
                }                
            }
            
            double percentCovered = 0;
            int totalPositionsCovered = 0;
            if (asmRange != null) {
                int asmRangeSize = asmRange.upperEndpoint().getPosition() - asmRange.lowerEndpoint().getPosition();
                RangeMap asmCoveredMap = asmRanges.subRangeMap(asmRange);           
                totalPositionsCovered = calculateRegionCovered( asmCoveredMap, asmRange);
                percentCovered = (double)totalPositionsCovered/asmRangeSize;
            }
             
            if (percentCovered >= 0.20) {
                //this means assembly mapping was out of order.  Don't create VC for the jump between mappings
                myLogger.debug("AssemblyProcessingUtils setupIndelVariants: Not creating indel between mapped regions, Assembly mapped out of order, percent already covered:" + percentCovered +":"+createUserReadableStringPositionRanges(previousRange,currentRange));
                previousRange = currentRange;
                numAlreadyCovered++;
                continue;
            }
            
            // Made it here, we are adding the range.  Add these coordinates to the
            // asmCoveredMap so they don't get added again if asm mappings in coords file are 
            // skipping around.
            if (asmRange != null) {
                asmRanges.put(asmRange,currentRange.lowerEndpoint());
            }
                        
            String asmCalls = "";
            //Assembly is out of order we need to reverse
            if(asmPreviousEnd.compareTo(asmCurrentStart) > 0) {
                asmCalls = asmSequence.genotypeAsString(asmCurrentStart.getChromosome(),asmCurrentStart.getPosition()+1,asmPreviousEnd.getPosition()-1);
            }
            else if(asmPreviousEnd.compareTo(asmCurrentStart) < 0) {
                asmCalls = asmSequence.genotypeAsString(asmCurrentStart.getChromosome(),asmPreviousEnd.getPosition()+1,asmCurrentStart.getPosition()-1);
            }

            if (asmCalls.length() > 1000000) {
                String endStart = asmPreviousEnd.getPosition() + "\t" + asmCurrentStart.getPosition();
                myLogger.debug("setupIndels: TOO BIG: asmPreviousEnd: " + asmPreviousEnd.getPosition()
                        + ", asmCurrentStart position: " + asmCurrentStart.getPosition() 
                        + ", total positions already covered " + totalPositionsCovered + ", percent covered: " + percentCovered);                        
            }
            
            Position newEndOfInterMapRegion = Position.of(currentStart.getChromosome(),currentStart.getPosition()-1);
            Position newStartOfInterMapRegion = Position.of(previousEnd.getChromosome(),previousEnd.getPosition()+1);
            
            String refCalls = "";
            if(newStartOfInterMapRegion.compareTo(newEndOfInterMapRegion) < 0) {
                refCalls = refSequence.genotypeAsString(previousEnd.getChromosome(),previousEnd.getPosition()+1,currentStart.getPosition()-1);
                if(asmCalls.length() > refCalls.length()) {
                    //Insertion case as we have more assembly alleles than reference
                    //Need to add range [previousEndPosition - (currentStart-1)]
                    snpMap.put(Range.closed(previousEnd,newEndOfInterMapRegion),new Tuple(refCalls,asmCalls));
                }
                else if(asmCalls.length() < refCalls.length()) {
                    //Deletion case as we have more reference alleles than assembly
                    //Need to add range [previousEndPosition+1 - (currentStart-1)]
                    snpMap.put(Range.closed(newStartOfInterMapRegion,newEndOfInterMapRegion),new Tuple(refCalls,asmCalls));
                }
                else {
                    //Simple multi allele SNP case
                    //Need to add Range [previousEndPosition - (currentStart-1)] but add the first allele to the start
                    String starterAllele = refSequence.genotypeAsString(previousEnd.getChromosome(),previousEnd.getPosition(),previousEnd.getPosition());
                    snpMap.put(Range.closed(previousEnd,newEndOfInterMapRegion),new Tuple(starterAllele+refCalls,starterAllele+asmCalls));
                }

            }
            else if(previousEnd.compareTo(currentStart) == 0 || newStartOfInterMapRegion.compareTo(newEndOfInterMapRegion) >= 0){
                //Slide up both 1 from previous end so it matches SNP file
                if(!(asmCalls.equals("") && refCalls.equals(""))) {
                    Position insertionPosition = Position.of(previousEnd.getChromosome(), previousEnd.getPosition());
                    snpMap.put(Range.closed(insertionPosition, insertionPosition), new Tuple(refCalls, asmCalls));
                }
            }
            else {
                //we have an overlapping range.  we do not add anything in for this case.
            }

            previousRange = currentRange;
        }
        
        myLogger.debug("Number Of overlapping assemblies: "+ asmRangeCounter);
        myLogger.debug("Avg length of overlap: "+((double)lengthOfOverlap/asmRangeCounter));
        myLogger.debug("Total possible indels: " + totalPossibleIndels + " Number refOverlaps: " + refRangeOverlaps + ", num sequence already covered at 40 percent: " + numAlreadyCovered);
        return snpMap;
    }
    
    public static RangeSet getIndelRanges(RangeMap> coordinates) {
        List> referencePositionCoordinates = coordinates.asMapOfRanges().keySet().stream().collect(Collectors.toList());

        RangeSet indelRegions = TreeRangeSet.create();

        Range previousRange = referencePositionCoordinates.get(0);

        for(int rangeIndex = 1; rangeIndex < referencePositionCoordinates.size(); rangeIndex++) {
            Range currentRange = referencePositionCoordinates.get(rangeIndex);

            Position previousEnd = previousRange.upperEndpoint();
            Position currentStart = currentRange.lowerEndpoint();

            Position asmPreviousEnd = coordinates.asMapOfRanges().get(previousRange).get(1);
            Position asmCurrentStart = coordinates.asMapOfRanges().get(currentRange).get(0);

            //Check to see if we have a gap in the assembly
            if(asmPreviousEnd.compareTo(asmCurrentStart)<0) {
                //Check for diagonal gap
                if(previousEnd.compareTo(currentStart)<0) {
                    //Add one to the start and subtract one from the end as we do not have overlapping endpoints
                    indelRegions.add(Range.closed(Position.of(previousEnd.getChromosome(),previousEnd.getPosition()+1),
                            Position.of(currentStart.getChromosome(),currentStart.getPosition()-1)));
                }
                else if(previousEnd.compareTo(currentStart)==0) {
                    //Means a long insertion broke up the clusters.  We need to return a range of one element
                    indelRegions.add(Range.closed(previousEnd,currentStart));
                }
            }
            else if( asmPreviousEnd.compareTo(asmCurrentStart)==0)
            {
                //We have a deletion
                //Make sure the reference Coordiantes are not equal as well, if so RangeSet will coalesce correctly
                if(previousEnd.compareTo(currentStart)!=0) {
                    indelRegions.add(Range.closed(previousEnd,currentStart));
                }
            }

            previousRange = currentRange;
        }
        return indelRegions;
    }

    /**
     * Method to build list of VariantContexts as RefRangeVCs - used when the reference and
     * assembly have identical chromosome data
     * @param refSequence
     * @param assemblyName
     * @param anchors
     * @return
     */
    public static List createVCasRefBlock(GenomeSequence refSequence, String assemblyName, RangeSet anchors, Map,List> refMappings ) {
        List rangeVCList = new ArrayList<>();

        // This should only be called from AssemblyHapltoypesMUltiThreadPlugin or AssemblyHaplotypesPlugin
        // when the refMappings contained a single element and there is no SNP data.
        if (refMappings.keySet().size() > 1) {
            // This is a developer directive -
            throw new IllegalArgumentException("createVCasRefBlock only applies to singly mapped coordinates file. Alter code if you want it to apply otherwise");
        }

        Collection> coordsKeys = refMappings.keySet();

        for (Range key : coordsKeys){
            List asmPositions = refMappings.get(key);
            int refStart = key.lowerEndpoint().getPosition();
            int refEnd = key.upperEndpoint().getPosition();
            int asmStart = asmPositions.get(0).getPosition();
            int asmEnd = asmPositions.get(1).getPosition();
            if (refStart == asmStart && refEnd == asmEnd) {
                // Run a loop through all the anchors
                anchors.asRanges().stream().forEach(range -> {
                    VariantContext vc = AssemblyProcessingUtils.createRefRangeVC(refSequence, assemblyName, range.lowerEndpoint(), range.upperEndpoint(), range.lowerEndpoint(), range.upperEndpoint());
                    rangeVCList.add(vc);
                });
            } else {
                throw new IllegalArgumentException("createVCasRefBlock: ref and asm coords do not match - this should never happen here");
            }
        }
        return rangeVCList;
    }


        /**
         * Method to build the list of VariantContexts based on the mapped coordinates and the SNPs
         * @param refSequence
         * @param assemblyName
         * @param anchors
         * @param refMappings
         * @param snps
         * @return
         */
    public static List extractAnchorVariantContextsFromAssemblyAlignments(GenomeSequence refSequence, String assemblyName, RangeSet anchors, Map,List> refMappings , RangeMap> snps) {
        List anchorVariants = new ArrayList<>();

        List> mappedCoordinateRanges = refMappings.keySet().stream().collect(Collectors.toList());

        //TODO Add in the anchors for the first and last unmapped regions if they exist.

        Range currentRange = mappedCoordinateRanges.get(0);
        int currentRangeCounter = 1;
        Position refRangeStartPosition = currentRange.lowerEndpoint();
        List currentRangeASMPositions = refMappings.get(currentRange);
        Position lastProcessedASMPosition = Position.of(currentRangeASMPositions.get(0).getChromosome(),currentRangeASMPositions.get(0).getPosition()-1);
        boolean isASMCoordIncreasing = (currentRangeASMPositions.get(1).getPosition() >= lastProcessedASMPosition.getPosition());

        try { 
        //Walk through SNP ranges
        for(Range currentSNP : snps.asMapOfRanges().keySet()) {
            Tuple refAndAssemblyCalls = snps.get(currentSNP.lowerEndpoint());
            Range nextRange = (currentRangeCounter < mappedCoordinateRanges.size())?mappedCoordinateRanges.get(currentRangeCounter) : null;
            
            if (refAndAssemblyCalls.getX().equals(refAndAssemblyCalls.getY())) {
                // The ref and asm alleles are identical.  This can happen with entries created
                // in setupIndelVariants.  These are either regions that Mummer4 did not align, or
                // that Mummer4 did align, but were filtered out with PHG processing.  
                myLogger.info("extractAnchorVariantContextsFromAssemblyAlignments- refCall equals dup string at lower position " + currentSNP.lowerEndpoint().getPosition());
                Tuple asmStartAndEnd = determineGVCFRefBlockASMStartAndEnd(currentRange,refRangeStartPosition,isASMCoordIncreasing,lastProcessedASMPosition);
                anchorVariants.add(createRefRangeVC(refSequence, assemblyName, refRangeStartPosition,currentRange.upperEndpoint(),asmStartAndEnd.getX(),asmStartAndEnd.getY()));
                
                continue;
            }
                       
            //if snpPosition is greater than currentRange.upperEndpoint
            if(currentSNP.lowerEndpoint().compareTo(currentRange.upperEndpoint())>0) {

                if(nextRange != null && nextRange.contains(currentSNP.lowerEndpoint())) {                   
                    //Finish off the range variant context:
                    //create RefRange VCF between refRangeStartPosition and currentRange.upperEndpoint
                    Tuple asmStartAndEnd = determineGVCFRefBlockASMStartAndEnd(currentRange,refRangeStartPosition,isASMCoordIncreasing,lastProcessedASMPosition);
                    if (refRangeStartPosition.getPosition() > currentRange.upperEndpoint().getPosition()) {
                        myLogger.info("extractAnchorVariantContext: no refRange VCF between refRangeStartPosition and currentRange.upperEndpoint " + 
                                refRangeStartPosition.getPosition() + "/" + currentRange.upperEndpoint().getPosition());
                    } else {
                        anchorVariants.add(createRefRangeVC(refSequence, assemblyName, refRangeStartPosition,currentRange.upperEndpoint(),asmStartAndEnd.getX(),asmStartAndEnd.getY()));
                    }
                    
                }
                else if(currentRange.upperEndpoint().getPosition() + 1 == currentSNP.lowerEndpoint().getPosition()) { //We have a deletion covering the gap between the mapped ranges
                    //This means that we have a deletion as deletions are stored in the SNP file as occurring at the site of the gap
                    //We need to process this like a normal deletion snp then shift the current range up and continue on so we do not put the SNP in 2x
                    Position refRangeEndPosition = Position.of(currentSNP.lowerEndpoint().getChromosome(), currentSNP.lowerEndpoint().getPosition() - 2);
                    
                    // Handle the case where the SNP is at refRangeEnd-1, e.g SNP at 90733989, refRangEnd = 90733990.  
                    // In this case, subtracting 2 makes the end position LESS than the start position.
                    // Don't create a refRangeVC.  It means we miss 1 BP of ref data, which instead is included
                    // in the SNPVC created below.  The asmStartEnd do not change when we don't create the refRangeVC.
                    Tuple asmStartAndEnd = determineGVCFRefBlockASMStartAndEnd(refRangeStartPosition, refRangeEndPosition, isASMCoordIncreasing,lastProcessedASMPosition);
                    if (refRangeEndPosition.getPosition() >= refRangeStartPosition.getPosition()) {                                               
                        anchorVariants.add(createRefRangeVC(refSequence, assemblyName, refRangeStartPosition, refRangeEndPosition ,asmStartAndEnd.getX(),asmStartAndEnd.getY()));
                                             
                    } else {
                        System.out.println("extractAnchorVariantContext - case refRangeStart > end: currentSNP.lowerEndPoint "
                                + currentSNP.lowerEndpoint().getPosition() + ", upper SNP endpoint: " + currentSNP.upperEndpoint().getPosition());
                    }
                    //Update lastProcessedASMPosition so we know what the next refRange should start at
                    lastProcessedASMPosition = asmStartAndEnd.getY();  
                    
                    //Create indel vcf record
                    //Add in the previous bp to both ref and alt
                    String firstAllele = refSequence.genotypeAsString(currentSNP.lowerEndpoint().getChromosome(),currentSNP.lowerEndpoint().getPosition()-1);
                    Tuple indelCalls = new Tuple(firstAllele+refAndAssemblyCalls.getX(), firstAllele + refAndAssemblyCalls.getY());

                    asmStartAndEnd = determineSNPASMStartAndEnd(indelCalls,lastProcessedASMPosition,isASMCoordIncreasing);

                    anchorVariants.add(createSNPVC(assemblyName,Position.of(currentSNP.lowerEndpoint().getChromosome(),currentSNP.lowerEndpoint().getPosition()-1),
                            Position.of(currentSNP.lowerEndpoint().getChromosome(),currentSNP.upperEndpoint().getPosition()),indelCalls,asmStartAndEnd.getX(),asmStartAndEnd.getY()));
                    lastProcessedASMPosition = Position.of(asmStartAndEnd.getY().getChromosome(),asmStartAndEnd.getY().getPosition());
                    refRangeStartPosition = Position.of(currentSNP.upperEndpoint().getChromosome(),currentSNP.upperEndpoint().getPosition()+1);

                }
                else{
                    Tuple asmStartAndEnd = determineGVCFRefBlockASMStartAndEnd(currentRange,refRangeStartPosition,isASMCoordIncreasing,lastProcessedASMPosition);                   
                    if (refRangeStartPosition.getPosition() > currentRange.upperEndpoint().getPosition()) {
                        myLogger.info("extractAnchorVariantContext: no refRange VCF between refRangeStartPosition and currentRange.upperEndpoint " + 
                                refRangeStartPosition.getPosition() + "/" + currentRange.upperEndpoint().getPosition());
                    } else {
                        anchorVariants.add(createRefRangeVC(refSequence, assemblyName, refRangeStartPosition,currentRange.upperEndpoint(),asmStartAndEnd.getX(),asmStartAndEnd.getY()));
                    }

                }

                if(currentRangeCounter < mappedCoordinateRanges.size()) {

                    while(currentSNP.lowerEndpoint().compareTo(currentRange.upperEndpoint())>=0) {
                        currentRange = mappedCoordinateRanges.get(currentRangeCounter);
                        currentRangeCounter++;
                        refRangeStartPosition = currentRange.lowerEndpoint();
                        //Update Assembly coordinate variables
                        currentRangeASMPositions = refMappings.get(currentRange);
                        lastProcessedASMPosition = Position.of(currentRangeASMPositions.get(0).getChromosome(),currentRangeASMPositions.get(0).getPosition()-1);
                        isASMCoordIncreasing = (currentRangeASMPositions.get(1).getPosition() >= lastProcessedASMPosition.getPosition());
                    }
                }
                else{
                    //It is outside of an anchor so we should stop processing
                    break;
                }
            }
            //if snpPosition is above refRangeStartPosition(it likely is)
            if(currentSNP.lowerEndpoint().compareTo(refRangeStartPosition) >= 0) {
                //if variant is insertion
                if(refAndAssemblyCalls.getX().length() < refAndAssemblyCalls.getY().length()) {
                    //In nucmer the last shared bp of the insertion is the one stored in the map,
                    //create RefRangeVCF between refRangeStartPosition and snpPosition-1
                    if(refRangeStartPosition.compareTo(Position.of(currentSNP.lowerEndpoint().getChromosome(),currentSNP.lowerEndpoint().getPosition()-1)) <= 0) {
                        Position refRangeEndPosition = Position.of(currentSNP.lowerEndpoint().getChromosome(), currentSNP.lowerEndpoint().getPosition() - 1);
                        Tuple asmStartAndEnd = determineGVCFRefBlockASMStartAndEnd(refRangeStartPosition,refRangeEndPosition,isASMCoordIncreasing,lastProcessedASMPosition);

                        anchorVariants.add(createRefRangeVC(refSequence, assemblyName, refRangeStartPosition, refRangeEndPosition, asmStartAndEnd.getX(),asmStartAndEnd.getY()));

                        //Update lastProcessedASMPosition so we know what the next refRange should start at
                        lastProcessedASMPosition = asmStartAndEnd.getY();
                    }

                    //Create indel vcf record
                    //Add in the previous bp to both ref and alt
                    String firstAllele = refSequence.genotypeAsString(currentSNP.lowerEndpoint().getChromosome(),currentSNP.lowerEndpoint().getPosition());
                    Tuple indelCalls = new Tuple(firstAllele+refAndAssemblyCalls.getX(), firstAllele + refAndAssemblyCalls.getY());

                    Tuple asmStartAndEnd = determineSNPASMStartAndEnd(indelCalls,lastProcessedASMPosition,isASMCoordIncreasing);


                    anchorVariants.add(createSNPVC(assemblyName,Position.of(currentSNP.lowerEndpoint().getChromosome(),currentSNP.lowerEndpoint().getPosition()),
                            Position.of(currentSNP.upperEndpoint().getChromosome(),currentSNP.upperEndpoint().getPosition()),indelCalls,asmStartAndEnd.getX(),asmStartAndEnd.getY()));

                    lastProcessedASMPosition = asmStartAndEnd.getY();
                    //For the insertion case we need to set refRangeStart to currentPosition+1 as its the next available position
                    refRangeStartPosition = Position.of(currentSNP.upperEndpoint().getChromosome(),currentSNP.upperEndpoint().getPosition()+1);
                }
                //If Variant is deletion
                else if( refAndAssemblyCalls.getX().length() > refAndAssemblyCalls.getY().length()) {
                    //create RefRangeVCF between refRangeStartPosition and snpPosition-2
                    if(refRangeStartPosition.compareTo(Position.of(currentSNP.lowerEndpoint().getChromosome(),currentSNP.lowerEndpoint().getPosition()-2)) <= 0) {

                        Position refRangeEndPosition = Position.of(currentSNP.lowerEndpoint().getChromosome(), currentSNP.lowerEndpoint().getPosition() - 2);
                        Tuple asmStartAndEnd = determineGVCFRefBlockASMStartAndEnd(refRangeStartPosition, refRangeEndPosition, isASMCoordIncreasing,lastProcessedASMPosition);
                        anchorVariants.add(createRefRangeVC(refSequence, assemblyName, refRangeStartPosition, refRangeEndPosition ,asmStartAndEnd.getX(),asmStartAndEnd.getY()));
                        //Update lastProcessedASMPosition so we know what the next refRange should start at
                        lastProcessedASMPosition = asmStartAndEnd.getY();
                    }
                    //Create indel vcf record
                    //Add in the previous bp to both ref and alt
                    String firstAllele = refSequence.genotypeAsString(currentSNP.lowerEndpoint().getChromosome(),currentSNP.lowerEndpoint().getPosition()-1);
                    Tuple indelCalls = new Tuple(firstAllele+refAndAssemblyCalls.getX(), firstAllele + refAndAssemblyCalls.getY());


                    Tuple asmStartAndEnd = determineSNPASMStartAndEnd(indelCalls,lastProcessedASMPosition,isASMCoordIncreasing);

                    anchorVariants.add(createSNPVC(assemblyName,Position.of(currentSNP.lowerEndpoint().getChromosome(),currentSNP.lowerEndpoint().getPosition()-1),
                            Position.of(currentSNP.lowerEndpoint().getChromosome(),currentSNP.upperEndpoint().getPosition()),indelCalls,asmStartAndEnd.getX(),asmStartAndEnd.getY()));
                    lastProcessedASMPosition = asmStartAndEnd.getY();
                    refRangeStartPosition = Position.of(currentSNP.upperEndpoint().getChromosome(),currentSNP.upperEndpoint().getPosition()+1);
                }
                else {
                    //Its a normal SNP

                    //create RefRangeVCF between refRangeStartPosition and snpPosition-1
                    if(refRangeStartPosition.compareTo(Position.of(currentSNP.lowerEndpoint().getChromosome(),currentSNP.lowerEndpoint().getPosition()-1)) <= 0) {

                        Position refRangeEndPosition = Position.of(currentSNP.lowerEndpoint().getChromosome(), currentSNP.lowerEndpoint().getPosition() - 1);
                        Tuple asmStartAndEnd = determineGVCFRefBlockASMStartAndEnd(refRangeStartPosition, refRangeEndPosition, isASMCoordIncreasing,lastProcessedASMPosition);
                        anchorVariants.add(createRefRangeVC(refSequence, assemblyName, refRangeStartPosition, refRangeEndPosition, asmStartAndEnd.getX(), asmStartAndEnd.getY()));
                        //Update lastProcessedASMPosition so we know what the next refRange should start at
                        lastProcessedASMPosition = asmStartAndEnd.getY();
                    }
                    //Create SNP vcf record
                    //figure out the assembly coordinates
                    Tuple asmStartAndEnd = determineSNPASMStartAndEnd(refAndAssemblyCalls,lastProcessedASMPosition,isASMCoordIncreasing);
                    anchorVariants.add(createSNPVC(assemblyName,currentSNP.lowerEndpoint(),currentSNP.upperEndpoint(),refAndAssemblyCalls, asmStartAndEnd.getX(),asmStartAndEnd.getY()));
                    lastProcessedASMPosition = asmStartAndEnd.getY();
                    refRangeStartPosition = Position.of(currentSNP.upperEndpoint().getChromosome(),currentSNP.upperEndpoint().getPosition()+1);
                }

                //Need to check to see if the currentSNP was at the end of the mapped region
                //if so we need to update currentRange
                if(currentSNP.lowerEndpoint().compareTo(currentRange.upperEndpoint())==0 && currentRangeCounter < mappedCoordinateRanges.size()) {
                    while(currentSNP.lowerEndpoint().compareTo(currentRange.upperEndpoint())>=0) {
                        currentRange = mappedCoordinateRanges.get(currentRangeCounter);
                        currentRangeCounter++;
                        refRangeStartPosition = currentRange.lowerEndpoint();
                    }
                }
                if(currentRangeCounter >= mappedCoordinateRanges.size()){
                    //It is outside of an anchor so we should stop processing
                    break;
                }
            }
            
            // Handle the odd case where the SNP position 
            if (currentSNP.lowerEndpoint().compareTo(currentRange.upperEndpoint())==0 &&
                    refRangeStartPosition.getPosition() ==  currentRange.upperEndpoint().getPosition()+1) {
                myLogger.info("extractAnchorVariants: SNP at last bp of range: currentSNP lower " 
                    + currentSNP.lowerEndpoint().getPosition() + ", currentRange upper " + currentRange.upperEndpoint().getPosition()
                    + ", nextRangeStart: " + refRangeStartPosition.getPosition());
                
                // Just slide this up.  This is an odd case where the SNP falls as the
                // last bp of a range, and the next range starts at the next BP - no gap.
                // WIthout skipping here, we end up attempting to create a refRangeVC with
                // the start > end by 1.
                //Need to check to see if the currentSNP was at the end of the mapped region
                //if so we need to update currentRange
                if(currentSNP.lowerEndpoint().compareTo(currentRange.upperEndpoint())==0 && currentRangeCounter < mappedCoordinateRanges.size()) {
                    while(currentSNP.lowerEndpoint().compareTo(currentRange.upperEndpoint())>=0) {
                        currentRange = mappedCoordinateRanges.get(currentRangeCounter);
                        currentRangeCounter++;
                        refRangeStartPosition = currentRange.lowerEndpoint();
                    }
                }
                if(currentRangeCounter >= mappedCoordinateRanges.size()){
                    //It is outside of an anchor so we should stop processing
                    break;
                }
            }
        }
        } catch (Exception exc) {
            myLogger.debug("extractAnchorVariantContextsFromAssemblyAlignments failed ", exc);
            throw exc;
        }
        //Get a list of ranges which are anchor regions
        return anchorVariants;
    }

    /**
     * Method to split up the reference range by anchor mappings.  Basically this method will take the variant contexts and the anchor coordinates.
     * If a variant context is a reference block which is spanning the start or end of the anchor(should happen frequently if anchor ends are truly conserved), 
     * we need to break up the variant context into two adjacent reference blocks with the end point being the start or end of one of the variants.
     * This will allow for easy querying of the list of Variants when attempting to load into the db.
     * @param variantContexts
     * @param anchorMapping
     * @param refSequence
     * @return
     */
    public static List splitRefRange(List variantContexts, Map anchorMapping, GenomeSequence refSequence) {

        RangeMap positionRangeToVariantContextMap = TreeRangeMap.create();
        for(VariantContext variantContext : variantContexts) {
            positionRangeToVariantContextMap.put(Range.closed(Position.of(variantContext.getContig(),variantContext.getStart()),
                    Position.of(variantContext.getContig(),variantContext.getEnd())),
                    variantContext);
        }

        for(Integer anchorId : anchorMapping.keySet()) {
            ReferenceRange referenceRange = anchorMapping.get(anchorId);
            Position stPos = Position.of(referenceRange.chromosome(),referenceRange.start());
            Position endPos = Position.of(referenceRange.chromosome(),referenceRange.end());

            VariantContext overlappingStartVC = positionRangeToVariantContextMap.get(stPos);

            // Check to make sure this is a reference GVCF block
            if(overlappingStartVC != null && isRefBlock(overlappingStartVC) && overlappingStartVC.getStart() != stPos.getPosition()) {
                List resizedVariants = resizeRefBlock(overlappingStartVC,refSequence, stPos, true);
                //Figure out the offset for the reference
                //Resize the RefRangeVC

                //remove the overlappingStartVC from the list/map
                Range overlappingRange = positionRangeToVariantContextMap.getEntry(stPos).getKey();
                positionRangeToVariantContextMap.remove(overlappingRange);
                //Add in the new 2 reference ranges
                positionRangeToVariantContextMap.put(Range.closed(Position.of(resizedVariants.get(0).getContig(),resizedVariants.get(0).getStart()),
                        Position.of(resizedVariants.get(0).getContig(),resizedVariants.get(0).getEnd())),
                        resizedVariants.get(0));
                positionRangeToVariantContextMap.put(Range.closed(Position.of(resizedVariants.get(1).getContig(),resizedVariants.get(1).getStart()),
                        Position.of(resizedVariants.get(1).getContig(),resizedVariants.get(1).getEnd())),
                        resizedVariants.get(1));
            }

            VariantContext overlappingEndVC = positionRangeToVariantContextMap.get(endPos);

            if(overlappingEndVC != null && isRefBlock(overlappingEndVC) && overlappingEndVC.getEnd() != endPos.getPosition()) {
                List resizedVariants = resizeRefBlock(overlappingEndVC,refSequence, endPos, false);
                //Figure out the offset for the reference
                //Resize the RefRangeVC

                //remove the overlappingStartVC from the list/map
                Range overlappingRange = positionRangeToVariantContextMap.getEntry(endPos).getKey();
                positionRangeToVariantContextMap.remove(overlappingRange);
                //Add in the new 2 reference ranges
                positionRangeToVariantContextMap.put(Range.closed(Position.of(resizedVariants.get(0).getContig(),resizedVariants.get(0).getStart()),
                        Position.of(resizedVariants.get(0).getContig(),resizedVariants.get(0).getEnd())),
                        resizedVariants.get(0));
                positionRangeToVariantContextMap.put(Range.closed(Position.of(resizedVariants.get(1).getContig(),resizedVariants.get(1).getStart()),
                        Position.of(resizedVariants.get(1).getContig(),resizedVariants.get(1).getEnd())),
                        resizedVariants.get(1));
            }

        }

        Map,VariantContext> finalVariantMapping = positionRangeToVariantContextMap.asMapOfRanges();

        return finalVariantMapping.keySet().stream().map(refRange -> finalVariantMapping.get(refRange)).collect(Collectors.toList());
    }

    /**
     * Helper method to create a Reference Range VariantContext for assemblies.  The DP value
     * is defaulted to 0 for assemblies.  If this is not set, -1 is used as default in
     * GenotypeBuilder.   That causes assembly problems down the line when storing the
     * value as a byte in a long.
     * @param refSequence
     * @param assemblyTaxon
     * @param refRangeStart
     * @param refRangeEnd
     * @param asmStart
     * @param asmEnd
     * @return
     */
    public static VariantContext createRefRangeVC(GenomeSequence refSequence, String assemblyTaxon, Position refRangeStart, Position refRangeEnd, Position asmStart, Position asmEnd) {
        Allele firstRefAllele = Allele.create(refSequence.genotypeAsString(refRangeStart.getChromosome(),refRangeStart.getPosition()),true);
        Genotype gt = new GenotypeBuilder().name(assemblyTaxon).alleles(Arrays.asList(firstRefAllele)).DP(2).AD(new int[]{2,0}).make();

        if (refRangeStart.getPosition() > refRangeEnd.getPosition()) {
            throw new IllegalStateException("createRefRangeVC - start postion greater than end: start=" + 
                    refRangeStart.getPosition() + " end=" + refRangeEnd.getPosition());
        }
        VariantContextBuilder vcb = new VariantContextBuilder()
                .chr(refRangeStart.getChromosome().getName())
                .start(refRangeStart.getPosition())
                .stop(refRangeEnd.getPosition())
                .attribute("END",refRangeEnd.getPosition())

                .alleles(Arrays.asList(firstRefAllele, Allele.NON_REF_ALLELE))
                .genotypes(gt);

        if(asmStart != null &&  asmEnd != null) {
            // Set the asm coordinates as VC record attributes
            vcb = vcb.attribute("ASM_Start",asmStart.getPosition());
            vcb = vcb.attribute("ASM_End",asmEnd.getPosition());
//            if (asmStart.getPosition() > asmEnd.getPosition()) {
//                vcb = vcb.attribute("ASM_Start",asmEnd.getPosition());
//                vcb = vcb.attribute("ASM_End",asmStart.getPosition());
//            } else {
//                vcb = vcb.attribute("ASM_Start",asmStart.getPosition());
//                vcb = vcb.attribute("ASM_End",asmEnd.getPosition());
//            }
        }


        return vcb.make();
    }

    /**
     * Helper method to create a SNP Variant context for assemblies.  The DP value
     * is defaulted to 0 for assemblies.  If this is not set, -1 is used as default in
     * GenotypeBuilder.   That causes assembly problems down the line when storing the
     * value as a byte in a long.
     * @param assemblyTaxon
     * @param startPosition
     * @param endPosition
     * @param calls
     * @param asmStart
     * @param asmEnd
     * @return
     */
    public static VariantContext createSNPVC(String assemblyTaxon, Position startPosition, Position endPosition, Tuple calls, Position asmStart, Position asmEnd) {
        Allele refCall = Allele.create(calls.getX(),true);
        Allele altCall = Allele.create(calls.getY(),false);
        
        if (startPosition.getPosition() > endPosition.getPosition()) {
            throw new IllegalStateException("createSNPVC - start postion greater than end: start=" + 
                startPosition.getPosition() + " end=" + endPosition.getPosition());
        }
        //Need to add AD for Alt >0 here so that the API will work correctly.  Otherwise it is treated as missing as it thinks AD = 0,0.
        // When coming from an assembly it should always use the ALT in a SNP pos
        Genotype gt = new GenotypeBuilder().name(assemblyTaxon).alleles(Arrays.asList(altCall)).DP(2).AD(new int[]{0,2,0}).make();
         
        VariantContextBuilder vcb = new VariantContextBuilder()
                .chr(startPosition.getChromosome().getName())
                .start(startPosition.getPosition())
                .stop(endPosition.getPosition())
                .alleles(Arrays.asList(refCall,altCall, Allele.NON_REF_ALLELE))
                .genotypes(gt);


        //Only add in the Assembly start and end if they exist
        if(asmStart != null &&  asmEnd != null) {
            // Set the asm coordinates as VC record attributes
            vcb = vcb.attribute("ASM_Start",asmStart.getPosition());
            vcb = vcb.attribute("ASM_End",asmEnd.getPosition());
//            if (asmStart.getPosition() > asmEnd.getPosition()) {
//                vcb = vcb.attribute("ASM_Start",asmEnd.getPosition());
//                vcb = vcb.attribute("ASM_End",asmStart.getPosition());
//            } else {
//                vcb = vcb.attribute("ASM_Start",asmStart.getPosition());
//                vcb = vcb.attribute("ASM_End",asmEnd.getPosition());
//            }
        }

        return vcb.make();
    }

    /**
     * Helper method to easily determine the assembly start and end for a GVCF reference block
     * @param currentRange
     * @param refRangeStartPosition
     * @param isASMCoordIncreasing
     * @param lastProcessedASMPosition
     * @return
     */
    private static Tuple determineGVCFRefBlockASMStartAndEnd(Range currentRange, Position refRangeStartPosition, boolean isASMCoordIncreasing, Position lastProcessedASMPosition) {
        int lengthOfReferenceCovered = currentRange.upperEndpoint().getPosition() - refRangeStartPosition.getPosition();
        int asmStartIndex = (isASMCoordIncreasing) ? lastProcessedASMPosition.getPosition()+1 : lastProcessedASMPosition.getPosition()-1;
        int asmEndIndex = (isASMCoordIncreasing) ? asmStartIndex + lengthOfReferenceCovered : asmStartIndex - lengthOfReferenceCovered;
        Position vcASMStart = Position.of(lastProcessedASMPosition.getChromosome(),asmStartIndex);
        Position vcASMEnd = Position.of(vcASMStart.getChromosome(),asmEndIndex);

        return new Tuple<>(vcASMStart,vcASMEnd);
    }

    /**
     * Helper method to easily determine the assembly start and end for a GVCF reference block.  This version allows you to just use a reference range Start position rather than the whole range.
     * @param refRangeStartPosition
     * @param refRangeEndPosition
     * @param isASMCoordIncreasing
     * @param lastProcessedASMPosition
     * @return
     */
    private static Tuple determineGVCFRefBlockASMStartAndEnd(Position refRangeStartPosition, Position refRangeEndPosition, boolean isASMCoordIncreasing, Position lastProcessedASMPosition) {
        int lengthOfReferenceCovered = refRangeEndPosition.getPosition() - refRangeStartPosition.getPosition();
        int asmStartIndex = (isASMCoordIncreasing) ? lastProcessedASMPosition.getPosition()+1 : lastProcessedASMPosition.getPosition()-1;
        int asmEndIndex = (isASMCoordIncreasing) ? asmStartIndex + lengthOfReferenceCovered : asmStartIndex - lengthOfReferenceCovered;
        Position vcASMStart = Position.of(lastProcessedASMPosition.getChromosome(),asmStartIndex);
        Position vcASMEnd = Position.of(vcASMStart.getChromosome(),asmEndIndex);

        return new Tuple<>(vcASMStart,vcASMEnd);
    }

    /**
     * Helper method to easily determine the Assembly start and end for a SNP or INDEL
     * @param refAndAssemblyCalls
     * @param lastProcessedASMPosition
     * @param isASMCoordIncreasing
     * @return
     */
    private static Tuple determineSNPASMStartAndEnd(Tuple refAndAssemblyCalls, Position lastProcessedASMPosition, boolean isASMCoordIncreasing) {
        int lengthOfSNP = refAndAssemblyCalls.getY().length();
        int asmStartIndex = (isASMCoordIncreasing) ? lastProcessedASMPosition.getPosition()+1 : lastProcessedASMPosition.getPosition()-1;
        int asmEndIndex = (isASMCoordIncreasing) ? asmStartIndex + lengthOfSNP-1 : asmStartIndex - lengthOfSNP+1;
        Position vcASMStart = Position.of(lastProcessedASMPosition.getChromosome(),asmStartIndex);
        Position vcASMEnd = Position.of(vcASMStart.getChromosome(),asmEndIndex);

        return new Tuple<>(vcASMStart,vcASMEnd);
    }

    /**
     * Helper method to make a more readable debug statement for when a intermapped INDEL is not able to be created
     * @param firstRange
     * @param secondRange
     * @return
     */
    private static String createUserReadableStringPositionRanges(Range firstRange, Range secondRange) {
        StringBuilder sb = new StringBuilder();

        sb.append("\n\tFirstRange:[Chr: ");
        sb.append(""+firstRange.lowerEndpoint().getChromosome().getName());
        sb.append(" StPos: ");
        sb.append(""+firstRange.lowerEndpoint().getPosition());
        sb.append(" - Chr: ");
        sb.append(""+firstRange.upperEndpoint().getChromosome().getName());
        sb.append(" EndPos: ");
        sb.append(""+firstRange.upperEndpoint().getPosition());
        sb.append("\n\tSecondRange:[Chr: ");
        sb.append(""+secondRange.lowerEndpoint().getChromosome().getName());
        sb.append(" StPos: ");
        sb.append(""+secondRange.lowerEndpoint().getPosition());
        sb.append(" - Chr: ");
        sb.append(""+secondRange.upperEndpoint().getChromosome().getName());
        sb.append(" EndPos: ");
        sb.append(""+secondRange.upperEndpoint().getPosition());
        sb.append("\n");

        return sb.toString();
    }

    /**
     * Simple method to determine if the current variant context is a reference block or not.
     * @param vc
     * @return
     */
    public static boolean isRefBlock(VariantContext vc) {
      //if only 1 allele in reference but stop position is higher than start position
        if(vc.getReference().getBaseString().length()==1 && vc.getEnd() - vc.getStart() >0) {
            return true;
        }
        else {
            return false;
        }
    }

    /**
     * Method which will take a variant Context which needs to be split and will output 2 new variants while updating ASM_* annotations.
     *
     * Depending on if the splitting position is a start or end or if the assembly is increasing or decreasing, it will have to handle things differently.
     * @param vc
     * @param refSequence
     * @param positionToSplit
     * @param isStart
     * @return
     */
    public static List resizeRefBlock(VariantContext vc, GenomeSequence refSequence, Position positionToSplit, boolean isStart) {
        //Check to see if the ordering of the assembly is increasing or decreasing

        int asmStart = vc.getAttributeAsInt("ASM_Start",-1);
        int asmEnd = vc.getAttributeAsInt("ASM_End",-1);

        boolean hasASM = (asmStart!=-1 && asmEnd!=-1);

        List splitVariants = new ArrayList<>();
        Position newEndPosition = (isStart)?Position.of(vc.getContig(),positionToSplit.getPosition()-1):Position.of(vc.getContig(),positionToSplit.getPosition());
        Position newStartPosition = (isStart)?Position.of(vc.getContig(),positionToSplit.getPosition()):Position.of(vc.getContig(),positionToSplit.getPosition()+1);

        int bpOffset = newEndPosition.getPosition()-vc.getStart();

        if(hasASM) {
            //Figure out if the assembly is increasing or decreasing.  We need to check as we either need to add or subtract the number of bps we are splitting
            if(asmEnd>=asmStart) {
                Position newASMEndPosition = (isStart)?Position.of(vc.getContig(),asmStart + bpOffset):Position.of(vc.getContig(),asmStart+bpOffset);
                Position newASMStartPosition = (isStart)?Position.of(vc.getContig(),asmStart + bpOffset+1):Position.of(vc.getContig(),asmStart+bpOffset+1);

                splitVariants.add(createRefRangeVC(refSequence,vc.getSampleNamesOrderedByName().get(0),Position.of(vc.getContig(),vc.getStart()),newEndPosition,Position.of(vc.getContig(),asmStart),newASMEndPosition));
                splitVariants.add(createRefRangeVC(refSequence,vc.getSampleNamesOrderedByName().get(0),newStartPosition,Position.of(vc.getContig(),vc.getEnd()),newASMStartPosition,Position.of(vc.getContig(),asmEnd)));
            }
            else {
                Position newASMEndPosition = (isStart)?Position.of(vc.getContig(),asmStart - bpOffset + 1):Position.of(vc.getContig(),asmStart-bpOffset);
                Position newASMStartPosition = (isStart)?Position.of(vc.getContig(),asmStart - bpOffset):Position.of(vc.getContig(),asmStart-bpOffset-1);

                splitVariants.add(createRefRangeVC(refSequence,vc.getSampleNamesOrderedByName().get(0),Position.of(vc.getContig(),vc.getStart()),newEndPosition,Position.of(vc.getContig(),asmStart),newASMEndPosition));
                splitVariants.add(createRefRangeVC(refSequence,vc.getSampleNamesOrderedByName().get(0),newStartPosition,Position.of(vc.getContig(),vc.getEnd()),newASMStartPosition,Position.of(vc.getContig(),asmEnd)));
            }
        }
        else {
            splitVariants.add(createRefRangeVC(refSequence,vc.getSampleNamesOrderedByName().get(0),Position.of(vc.getContig(),vc.getStart()),newEndPosition,null,null));
            splitVariants.add(createRefRangeVC(refSequence,vc.getSampleNamesOrderedByName().get(0),newStartPosition,Position.of(vc.getContig(),vc.getEnd()),null,null));
        }

        return splitVariants;
    }
    
    /**
     * Find the lowest reference start entry from a mummer snp file list of entries.
     * There could be more than 1 string of indels for this asm snp.  Find the start
     * and end of the string of indels whose positions overlap the reference position
     * for the SNP in question.  Return the start position and the length of this string
     * of indels
     * @param refSnpPos
     * @param snpEntries
     * @return
     */
    public static Tuple findRefIndelStart(int refSnpPos,Collection snpEntries){
        int refStart = Integer.MAX_VALUE;
        List refPosList = new ArrayList();
        try {
            for (String currentEntry : snpEntries) {
                int tabIndex1 = currentEntry.indexOf("\t");
                int refPos = Integer.parseInt(currentEntry.substring(0,tabIndex1));
                refPosList.add(refPos);               
            }
        } catch (Exception exc) {
            throw new IllegalArgumentException("AssemblyProcessingUtils:findRefIndelStart: error finding ref start: " + exc.getMessage());
        }

        Collections.sort(refPosList); 
        
        // Walk through the sorted list, find the string of indels
        // that contain the reference position where we have our SNP.
        // Due to multiple alignmentsin the mummer coords file, the same assembly position
        // can map as a deletion to different places on the reference.
        int prevRefPos = refPosList.get(0);
        int curRefPos = prevRefPos;
        refStart = prevRefPos;
        int len=1;
 
        for (int refIdx = 1; refIdx < refPosList.size(); refIdx++) {
            curRefPos = refPosList.get(refIdx);            
            if ((curRefPos-prevRefPos) != 1){ // a gap in positions means multiple strings of indels
                if (curRefPos <= refSnpPos) {
                    refStart = curRefPos; // start counting from this gap
                    len = 0; // this is incremented to 1 below
                } else { // reference position falls within the previous string of indels                   
                    return new Tuple(refStart,len);
                }
            } 
            len++;
            prevRefPos = curRefPos;
        }
        // the full list has been processed, return the latest SNP start position
        // and indel length 
        return new Tuple(refStart,len);
    }
    
    // Find the lowest assembly (query) start entry from a mummer snp file list of entries
    public static int findAsmIndelStart(Collection snpEntries) {
        int asmStart = Integer.MAX_VALUE;
        try {
            for (String currentEntry : snpEntries) {
                int tabIndex1 = currentEntry.indexOf("\t");
                int tabIndex2 = currentEntry.indexOf("\t",tabIndex1+1);
                int tabIndex3 = currentEntry.indexOf("\t",tabIndex2+1);
                int tabIndex4 = currentEntry.indexOf("\t",tabIndex3+1);
                int tempAsm = Integer.parseInt(currentEntry.substring(tabIndex3+1, tabIndex4));
                if (tempAsm < asmStart) asmStart = tempAsm;
            }
        } catch (Exception exc) {
            throw new IllegalArgumentException("AssemblyProcessingUtils:findAsmIndelStart: error finding ref start: " + exc.getMessage());
        }

        return asmStart;
    }

    /**
     * Create a RangeSet from a map of ranges
     * @param anchorEntries
     * @return
     */
    public static RangeSet  getAnchorRangeSet(Map anchorEntries) {

        RangeSet anchors = TreeRangeSet.create();
        // For each value on the map, create an entry in the anchors rangeSet.
        // RangeSet coalesces entries, but there should be no overlaps in the anchors.
        for (ReferenceRange refRange : anchorEntries.values()) {
            anchors.add(Range.closed(Position.of(refRange.chromosome(),refRange.start()),Position.of(refRange.chromosome(),refRange.end())));
        }
        return anchors;
    }

    /**
     * Find all reference ranges for a particular chromosome
     * Query pulls all reference ranges for that chrom from the reference_ranges table.
     * The assembly should be processed against all defined reference ranges.
     *
     * @param database
     * @param chrom
     * @return A map of all ReferenceRange data for a specific chromosome.  The map is keyed by referenceRangeID.
     */
    public static Map referenceRangeForChromMap(Connection database,  String chrom) {

        if (database == null) {
            throw new IllegalArgumentException("AssemblyHaplotypesPlugin: referenceRangeForChromMap: Must specify database connection.");
        }

        long time = System.nanoTime();

        // Create method name for querying initial ref region and inter-region ref_range_group method ids
        String refLine = CreateGraphUtils.getRefLineName( database);

        // We need a method_name for the ReferenceRange object.  But it will not be used in
        // the assembly processing code.  Here, we want to grab all ranges.

        // Ideally, the methodName would be the method stored in the *_load_data.txt file provided
        // when the reference intervals were loaded.  We don't have that data unless we add additional
        // parameters.  Since this information is only needed for the ReferenceRange object, and that
        // data as used by the assembly code does not make use of the methodName field, we add a dummy value
        String methodName = "referenceIntervals"; // this is a dummy name, will be not used in ReferenceRange in Assembly code

        // Grab all the ranges stored in the reference_range table.
        StringBuilder querySB = new StringBuilder();
        querySB.append("select reference_ranges.ref_range_id, chrom, range_start, range_end from reference_ranges where chrom='");
        querySB.append(chrom);
        querySB.append("'");

        String query = querySB.toString();

        myLogger.info("referenceRangesForChromMap: query statement: " + query);

        ImmutableMap.Builder builder = ImmutableMap.builder();
        try (ResultSet rs = database.createStatement().executeQuery(query)) {

            while (rs.next()) {
                int id = rs.getInt("ref_range_id");
                String chromosome = rs.getString("chrom");
                int start = rs.getInt("range_start");
                int end = rs.getInt("range_end");
                builder.put(id, new ReferenceRange(refLine, Chromosome.instance(chromosome), start, end, id, methodName));
            }

        } catch (Exception exc) {
            myLogger.debug(exc.getMessage(), exc);
            throw new IllegalStateException("AssemblyHaplotypesPlugin: referenceRanges: Problem querying the database: " + exc.getMessage());
        }

        Map result = builder.build();

        myLogger.info("referenceRangeForChromMap: number of reference ranges: " + result.size());
        myLogger.info("referenceRangeForChromMap: time: " + ((System.nanoTime() - time) / 1e9) + " secs.");

        return result;
    }

    /**
     * load initial genotype and method data to the database
     * @param assemblyName
     * @param method
     * @param dbConn
     * @return
     */
    public static Tuple loadInitialAssemblyData(String assemblyName, String method, int clusterSize, Connection dbConn,
                                                                 Map pluginParams, List fastaInfo, boolean isTestMethod) {
        int ploidy = 1;
        boolean is_ref = false;
        int hapNumber = 0;
        String line_data = method + ": Assembly aligned via mummer4 with clusterSize " + clusterSize + "  and --mum parameters";
        boolean genesPhased = true;
        boolean chromsPhased = true;
        float conf = 1;


        // Get PHGdbAccess connection
        PHGDataWriter phg = new PHGdbAccess(dbConn);

        // add the assembly genotype/haplotype info to DB
        GenoHaploData ghd = new GenoHaploData(ploidy,is_ref,assemblyName,line_data, genesPhased, chromsPhased, hapNumber, conf);
        phg.putGenoAndHaploTypeData(ghd);

        // Get the assembly genoid created in call to putGenoAndHaplotypeData()
        // Needed for adding assembly genome_file_data entry
        int genoid = phg.getGenoidFromLine(assemblyName);

        String asm_genome_path = fastaInfo.get(4);
        String asm_file = fastaInfo.get(5);

        // This id needed for the haplotypes table
        int genomeDataFileID = phg.putGenomeFileData(asm_genome_path,asm_file,genoid,DBLoadingUtils.GenomeFileType.FASTA.getValue());

        // Load the gamete_groups and gamete_haplotypes table
        String nameWithHap = assemblyName + "_" + hapNumber; // ref line is always hap0
        List gameteGroupList = new ArrayList();
        gameteGroupList.add(nameWithHap);
        phg.putGameteGroupAndHaplotypes(gameteGroupList);

        // Put the method data - identifies for each assembly how the anchors were created
        myLogger.info("loadInitialAssemblyData: calling putMethod with method name " + method);
        pluginParams.put("notes",line_data);

        DBLoadingUtils.MethodType methodType = DBLoadingUtils.MethodType.ASSEMBLY_HAPLOTYPES;
        if (isTestMethod) {
            methodType = DBLoadingUtils.MethodType.TEST_ASSEMBLY_HAPLOTYPES;
        }

        int methodId = phg.putMethod(method, methodType,pluginParams); // line_data and method_details are the same in this plugin

        // Get gamete_grp_id to return. This is used when loading the assembly haploytpe data
        int gamete_grp_id = phg.getGameteGroupIDFromTaxaList(gameteGroupList);

        // do NOT close phg here - it will drop the connection needed for later processing
        return new Tuple (gamete_grp_id,genomeDataFileID);
    }

    /**
     * Load the assembly haplotype data to the database
     * @param gamete_grp_id
     * @param method
     * @param dbConn
     * @param anchorSequences
     */

    public static void loadAssemblyDataToDB(int gamete_grp_id,  String method, Connection dbConn, Map anchorSequences,
                                            String chromosome, int genomeFileId) {
        Long time = System.nanoTime();

        // Get PHGdbAccess connection
        PHGDataWriter phg = new PHGdbAccess(dbConn);

        int method_id = phg.getMethodIdFromName(method);

        // Load the haplotypes data to haplotypes table
        // THis is called from the original AssemblyHaplotypesMultiThreadPlugin,
        // which loads the genome_file_data table prior to loading haplotypes.
        // Based on this, we send null for asmServerDir and asmFasta - no need to populate them here.
        myLogger.info("loadAssemblyDataToDB: starting putHaplotypesData for gamete_grp_id " + gamete_grp_id + ", chr " + chromosome);
        phg.putHaplotypesData(gamete_grp_id, method_id, anchorSequences, chromosome, genomeFileId, -1);

        // phg connection is closed in createAndLoadAssemblyData()
        myLogger.info(" loadAssemblyDataToDB time to load haplotypes for gamete_grp_id " + gamete_grp_id + ", chr " + chromosome + ": " + (System.nanoTime()-time)/1e9 + " seconds");
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy