All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.walkers.fasta.FastaReferenceMaker Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.walkers.fasta;

import com.google.common.primitives.Bytes;
import htsjdk.samtools.reference.FastaReferenceWriter;
import htsjdk.samtools.reference.FastaReferenceWriterBuilder;
import it.unimi.dsi.fastutil.bytes.ByteArrayList;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.ReferenceWalker;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import picard.cmdline.programgroups.ReferenceProgramGroup;

import java.io.IOException;
import java.nio.file.Path;

/**
 * Create a subset of a FASTA reference sequence
 *
 * 

This tool creates a new reference in FASTA format consisting of only those positions or intervals * provided in the input data set. The output format can be partially controlled using the provided command-line * arguments. Specify intervals with the usual -L argument to output only the reference bases within your intervals. * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a * separate fasta sequence (named numerically in order).

* *

Input

*

* The reference and requested intervals. *

* *

Output

*

* A fasta file representing the requested intervals. Each interval has a description line starting with a greater-than (">") symbol followed by sequence data. * The description begins with the contig name followed by the beginning position on the contig. *

 * For example, the fasta file for contig 1 and intervals 1:3-1:4 and 1:6-1:9
 * >1 1:3
 * AT
 * >1 1:6
 * GGGG
 * 
*

* *

Usage example

*
 * gatk FastaReferenceMaker \
 *   -R reference.fasta \
 *   -O output.fasta \
 *   -L input.intervals
 * 
* */ @DocumentedFeature @CommandLineProgramProperties( summary = "Create snippets of a fasta file by subsetting to an interval list", oneLineSummary = "Create snippets of a fasta file", programGroup = ReferenceProgramGroup.class ) public class FastaReferenceMaker extends ReferenceWalker { @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "Path to write the output fasta to") protected String output; public static final String LINE_WIDTH_LONG_NAME = "line-width"; @Argument(fullName= LINE_WIDTH_LONG_NAME, doc="Maximum length of sequence to write per line", optional=true) public int basesPerLine = FastaReferenceWriter.DEFAULT_BASES_PER_LINE; protected FastaReferenceWriter writer; private int contigCount = 0; private int currentSequenceStartPosition = 0; private SimpleInterval lastPosition = null; private ByteArrayList sequence = new ByteArrayList(10000); @Override public void onTraversalStart() { final Path path = IOUtils.getPath(output); try { writer = new FastaReferenceWriterBuilder() .setFastaFile(path) .setBasesPerLine(basesPerLine) .build(); } catch (IOException e) { throw new UserException.CouldNotCreateOutputFile("Couldn't create " + output + ", encountered exception: " + e.getMessage(), e); } } @Override public void apply(ReferenceContext referenceContext, ReadsContext read, FeatureContext featureContext) { addToReference(referenceContext.getInterval(), referenceContext.getBase()); } protected void addToReference(SimpleInterval interval, byte base) { advancePosition(interval); sequence.add(base); } protected void advancePosition(SimpleInterval interval) { if(lastPosition == null ){ initializeNewSequence(interval); } else if ( !lastPosition.withinDistanceOf(interval, 1)) { finalizeSequence(); initializeNewSequence(interval); } lastPosition = interval; } private void finalizeSequence() { final String description = lastPosition.getContig() + ":" + currentSequenceStartPosition + "-" + lastPosition.getEnd(); try { writer.appendSequence(String.valueOf(contigCount), description, basesPerLine, Bytes.toArray(sequence)); } catch (IOException e) { throw new UserException.CouldNotCreateOutputFile("Failed while writing " + output + ".", e); } } private void initializeNewSequence(SimpleInterval interval) { lastPosition = interval; contigCount++; currentSequenceStartPosition = lastPosition.getStart(); sequence.clear(); } @Override public Object onTraversalSuccess(){ finalizeSequence(); return null; } @Override public void closeTool() { super.closeTool(); try{ if( writer != null ) { writer.close(); } } catch (IllegalStateException e){ //sink this } catch (IOException e) { throw new UserException("Failed to write fasta due to " + e.getMessage(), e); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy