net.maizegenetics.pangenome.fastaExtraction.ExtractFastaUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome.fastaExtraction;
import com.google.common.collect.Range;
import com.google.common.collect.RangeMap;
import com.google.common.collect.TreeRangeMap;
import htsjdk.variant.variantcontext.VariantContext;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.pangenome.api.ReferenceRange;
import java.util.List;
import java.util.Map;
/**
* Class which holds various utilities for extracting fastas
* Created by zrm22 on 2/8/18.
*/
public class ExtractFastaUtils {
/**
* Method to extract out a fasta sequence given a list of variantContexts
* This will fill in Ns whenever we do not have a variant context record for a given position
* @param variants
* @param referenceRange
* @return
*/
public static String extractFastaSequence(List variants, ReferenceRange referenceRange) {
StringBuilder builder = new StringBuilder();
RangeMap rangeToAlleleMap = createMapping(variants);
for(int i = referenceRange.start(); i < referenceRange.end() +1; i++) {
Map.Entry,String> alleleEntry = rangeToAlleleMap.getEntry(Position.of(referenceRange.chromosome(),i));
if(alleleEntry == null) {
builder.append("N");
}
else {
String alleleString = alleleEntry.getValue();
if(alleleString.equals(".")) {
builder.append("N");
}
else {
builder.append(alleleEntry.getValue());
}
i = alleleEntry.getKey().upperEndpoint().getPosition(); //Shift up i to the end of the current vcf block.
}
}
return builder.toString();
}
/**
* Simple method to unwrap a variantContext to be a RangeMap of Positions and their allele strings.
* @param variants
* @return
*/
private static RangeMap createMapping(List variants) {
RangeMap rangeToAlleleMap = TreeRangeMap.create();
for(VariantContext vc : variants) {
rangeToAlleleMap.put(Range.closed(Position.of(vc.getContig(),vc.getStart()),
Position.of(vc.getContig(),vc.getEnd())),
vc.getGenotype(0).getAllele(0).getBaseString());
}
return rangeToAlleleMap;
}
}