All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.pangenome.fastaExtraction.ExtractFastaUtils Maven / Gradle / Ivy

There is a newer version: 1.10
Show newest version
package net.maizegenetics.pangenome.fastaExtraction;

import com.google.common.collect.Range;
import com.google.common.collect.RangeMap;
import com.google.common.collect.TreeRangeMap;
import htsjdk.variant.variantcontext.VariantContext;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.pangenome.api.ReferenceRange;

import java.util.List;
import java.util.Map;

/**
 * Class which holds various utilities for extracting fastas
 * Created by zrm22 on 2/8/18.
 */
public class ExtractFastaUtils {

    /**
     * Method to extract out a fasta sequence given a list of variantContexts
     * This will fill in Ns whenever we do not have a variant context record for a given position
     * @param variants
     * @param referenceRange
     * @return
     */
    public static String extractFastaSequence(List variants, ReferenceRange referenceRange) {
        StringBuilder builder = new StringBuilder();
        RangeMap rangeToAlleleMap = createMapping(variants);

        for(int i = referenceRange.start(); i < referenceRange.end() +1; i++) {
            Map.Entry,String> alleleEntry = rangeToAlleleMap.getEntry(Position.of(referenceRange.chromosome(),i));
            if(alleleEntry == null) {
                builder.append("N");
            }
            else {
                String alleleString = alleleEntry.getValue();
                if(alleleString.equals(".")) {
                    builder.append("N");
                }
                else {
                    builder.append(alleleEntry.getValue());
                }
                i = alleleEntry.getKey().upperEndpoint().getPosition(); //Shift up i to the end of the current vcf block.
            }


        }

        return builder.toString();
    }

    /**
     * Simple method to unwrap a variantContext to be a RangeMap of Positions and their allele strings.
     * @param variants
     * @return
     */
    private static RangeMap createMapping(List variants) {
        RangeMap rangeToAlleleMap = TreeRangeMap.create();

        for(VariantContext vc : variants) {
            rangeToAlleleMap.put(Range.closed(Position.of(vc.getContig(),vc.getStart()),
                                                Position.of(vc.getContig(),vc.getEnd())),
                                vc.getGenotype(0).getAllele(0).getBaseString());
        }

        return rangeToAlleleMap;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy