net.maizegenetics.pangenome.processAssemblyGenomes.ResizeRefBlockPlugin Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phg Show documentation
Show all versions of phg Show documentation
PHG - Practical Haplotype Graph
package net.maizegenetics.pangenome.processAssemblyGenomes;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFFileReader;
import net.maizegenetics.dna.map.Chromosome;
import net.maizegenetics.dna.map.GenomeSequence;
import net.maizegenetics.dna.map.GenomeSequenceBuilder;
import net.maizegenetics.dna.map.Position;
import net.maizegenetics.pangenome.hapCalling.HapCallingUtils;
import net.maizegenetics.plugindef.AbstractPlugin;
import net.maizegenetics.plugindef.DataSet;
import net.maizegenetics.plugindef.GeneratePluginCode;
import net.maizegenetics.plugindef.PluginParameter;
import org.apache.log4j.Logger;
import javax.swing.*;
import java.awt.Frame;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public class ResizeRefBlockPlugin extends AbstractPlugin {
private static final Logger myLogger = Logger.getLogger(ResizeRefBlockPlugin.class);
private PluginParameter myVCFFile = new PluginParameter.Builder("normVCF", null, String.class).guiName("Normalized VCF File").required(true).inFile()
.description("Normalized VCF file which needs to be corrected").build();
private PluginParameter myVCFFileCorrected = new PluginParameter.Builder("correctedVCF", null, String.class).guiName("Output VCF File").required(true).outFile()
.description("Corrected RefBlock VCF file").build();
private PluginParameter myRef = new PluginParameter.Builder("ref",null, String.class).guiName("Reference").required(true).description("Reference fasta File").inFile().build();
private PluginParameter myTaxon = new PluginParameter.Builder("taxon",null, String.class).guiName("Taxon Name").required(true).description("Name to be assigned for the taxon").build();
public ResizeRefBlockPlugin(Frame parentFrame) {
super(parentFrame, false);
}
public ResizeRefBlockPlugin(Frame parentFrame, boolean isInteractive) {
super(parentFrame, isInteractive);
}
@Override
public DataSet processData(DataSet input) {
resizeReferenceBlocks(vCFFile(),vCFFileCorrected());
return null;
}
/**
* Method to resize the Reference blocks contained in a GVCF file.
*
* If the Indel now overlaps a Reference Block, we resize the reference block END annotation to be 1 less than the start of the indel
* If there is a GAP caused by the indel being moved, we resize the next Reference Block to be 1 plus the end of the indel.
* @param inputFile
* @param outputFile
*/
private void resizeReferenceBlocks(String inputFile, String outputFile) {
try(VCFFileReader inputVCFReader = new VCFFileReader(new File(inputFile),false)) {
List variants = inputVCFReader.iterator().stream().collect(Collectors.toList());
GenomeSequence gs = GenomeSequenceBuilder.instance(ref());
List adjustedVariants = new ArrayList<>();
adjustedVariants.add(variants.get(0));
int errorCount = 0;
for(int i = 1; i < variants.size()-1; i++) {
VariantContext currentVC = variants.get(i);
VariantContext prevVC = adjustedVariants.get(adjustedVariants.size()-1);
VariantContext nextVC = variants.get(i+1);
//Check to see if current Variant object is an indel
//if so we may need to resize the reference block
if(currentVC.isIndel()) {
//Check to see if its start site overlaps the previous entry
//If So we need to resize the previous block
//If not the indel must have already been in the leftmost position
if(currentVC.getStart() <= prevVC.getEnd()) {
if(!AssemblyProcessingUtils.isRefBlock(prevVC)) {
myLogger.warn("Overlapping indel with a nonRefBlock \n"+prevVC+"\n"+currentVC);
errorCount++;
continue;
}
//Pull out the previous variant
adjustedVariants.remove(adjustedVariants.size()-1);
if(prevVC.getStart() < currentVC.getStart()-1) {
adjustedVariants.add(createRefRangeVC(prevVC.getSampleNamesOrderedByName().get(0), Position.of(prevVC.getContig(), prevVC.getStart()), Position.of(prevVC.getContig(), currentVC.getStart() - 1), prevVC.getReference()));
}else {
myLogger.warn("Indel completely overlapping previous ref block, deleting previous ref \n"+prevVC+"\n"+currentVC);
errorCount++;
}
}
adjustedVariants.add(currentVC);
//check to see if there is a gap caused by shifting up the indel
if(currentVC.getEnd() < nextVC.getStart()-1) {
adjustedVariants.add(createRefRangeVC(taxon(),
Position.of(currentVC.getContig(),currentVC.getEnd()+1),
Position.of(nextVC.getContig(),nextVC.getEnd()),
Allele.create(gs.genotypeAsString(Chromosome.instance(currentVC.getContig()),currentVC.getEnd()+1),true)));
i++;
}
}
else {
adjustedVariants.add(currentVC);
}
}
myLogger.info("Number of errors:"+errorCount);
HapCallingUtils.writeVariantContextsToVCF(adjustedVariants,outputFile,ref(),taxon());
}
catch(Exception e) {
myLogger.debug("Error resizing the VC Ref Blocks",e);
throw new IllegalStateException("Error resizing the VCF Reference Blocks.",e);
}
}
/**
* Helper method to create a Reference Block VariantContext for a given range
* @param assemblyTaxon
* @param refRangeStart
* @param refRangeEnd
* @param refAllele
* @return
*/
private static VariantContext createRefRangeVC( String assemblyTaxon, Position refRangeStart, Position refRangeEnd, Allele refAllele) {
Genotype gt = new GenotypeBuilder().name(assemblyTaxon).alleles(Arrays.asList(refAllele)).make();
if (refRangeStart.getPosition() > refRangeEnd.getPosition()) {
throw new IllegalStateException("createRefRangeVC - start postion greater than end: start=" +
refRangeStart.getPosition() + " end=" + refRangeEnd.getPosition());
}
VariantContextBuilder vcb = new VariantContextBuilder()
.chr(refRangeStart.getChromosome().getName())
.start(refRangeStart.getPosition())
.stop(refRangeEnd.getPosition())
.attribute("END",refRangeEnd.getPosition())
.alleles(Arrays.asList(refAllele))
.genotypes(gt);
return vcb.make();
}
@Override
public ImageIcon getIcon() {
return null;
}
@Override
public String getButtonName() {
return ("Resize RefBlocks");
}
@Override
public String getToolTipText() {
return ("Resize Reference block errors caused by indel normalization");
}
// The following getters and setters were auto-generated.
// Please use this method to re-generate.
//
// public static void main(String[] args) {
// GeneratePluginCode.generate(ResizeRefBlockPlugin.class);
// }
/**
* Normalized VCF file which needs to be corrected
*
* @return Normalized VCF File
*/
public String vCFFile() {
return myVCFFile.value();
}
/**
* Set Normalized VCF File. Normalized VCF file which
* needs to be corrected
*
* @param value Normalized VCF File
*
* @return this plugin
*/
public ResizeRefBlockPlugin vCFFile(String value) {
myVCFFile = new PluginParameter<>(myVCFFile, value);
return this;
}
/**
* Corrected RefBlock VCF file
*
* @return Output VCF File
*/
public String vCFFileCorrected() {
return myVCFFileCorrected.value();
}
/**
* Set Output VCF File. Corrected RefBlock VCF file
*
* @param value Output VCF File
*
* @return this plugin
*/
public ResizeRefBlockPlugin vCFFileCorrected(String value) {
myVCFFileCorrected = new PluginParameter<>(myVCFFileCorrected, value);
return this;
}
/**
* Reference fasta File
*
* @return Reference
*/
public String ref() {
return myRef.value();
}
/**
* Set Reference. Reference fasta File
*
* @param value Reference
*
* @return this plugin
*/
public ResizeRefBlockPlugin ref(String value) {
myRef = new PluginParameter<>(myRef, value);
return this;
}
/**
* Name to be assigned for the taxon
*
* @return Taxon Name
*/
public String taxon() {
return myTaxon.value();
}
/**
* Set Taxon Name. Name to be assigned for the taxon
*
* @param value Taxon Name
*
* @return this plugin
*/
public ResizeRefBlockPlugin taxon(String value) {
myTaxon = new PluginParameter<>(myTaxon, value);
return this;
}
}