All Downloads are FREE. Search and download functionalities are using the official Maven repository.

picard.vcf.processor.VariantProcessor Maven / Gradle / Ivy

/*
 * The MIT License
 *
 * Copyright (c) 2015 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package picard.vcf.processor;

import htsjdk.samtools.util.IntervalList;
import htsjdk.variant.variantcontext.VariantContext;

import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

/**
 * Describes an object that processes variants and produces a result.
 * 

* A consumer typically builds an instance of this class via {@link Builder}, providing it the appropriate {@link AccumulatorGenerator} and * {@link ResultMerger}, then calls {@link #process()} to obtain the RESULT of the processing. *

* Future work...? * - Make more efficient for the single-thread case. * - A {@link VcfFileSegmentGenerator} that is based on an interval list, so that segments' span a constant-size total-base-count overlap with * the interval list (or something in that vein). * * @author mccowan */ public class VariantProcessor> { /** * Handles {@link VariantContext}s, and accumulates their data in some fashion internally. * A call to {@link #result()} produces an embodiment of the results of this processing (which may or may not be the accumulator itself). * * @author mccowan */ public static interface Accumulator { void accumulate(final VariantContext vc); RESULT result(); } /** * Generates instances of {@link Accumulator}s. * * @author mccowan */ public static interface AccumulatorGenerator, RESULT> { ACCUMULATOR build(); } /** * Takes a collection of results produced by {@link Accumulator#result()} and merges them into a single RESULT. * * @author mccowan */ public static interface ResultMerger { RESULT merge(final Collection resultsToReduce); } final ResultMerger merger; final VariantAccumulatorExecutor executor; VariantProcessor( final ResultMerger merger, final VariantAccumulatorExecutor executor) { this.merger = merger; this.executor = executor; } public RESULT process() { executor.start(); try { executor.awaitCompletion(); } catch (final InterruptedException e) { throw new RuntimeException(e); } final List results = new ArrayList(); for (final ACCUMULATOR a : executor.accumulators()) { results.add(a.result()); } return merger.merge(results); } /** Simple builder of {@link VariantProcessor}s. */ public static class Builder, R> { final AccumulatorGenerator accumulatorGenerator; ResultMerger reducer = null; IntervalList intervals = null; final List inputs = new ArrayList(); int threadCount = 1; Builder(final AccumulatorGenerator accumulatorGenerator) { this.accumulatorGenerator = accumulatorGenerator; } public Builder multithreadingBy(final int threadCount) { if (threadCount < 1) throw new IllegalArgumentException("Multithreading value must exceed 0."); this.threadCount = threadCount; return this; } public Builder withInput(final File... vcfs) { Collections.addAll(inputs, vcfs); return this; } public Builder limitingProcessedRegionsTo(final IntervalList intervals) { if (this.intervals != null) throw new IllegalStateException("Already provided an interval list."); this.intervals = IntervalList.copyOf(intervals); return this; } public Builder combiningResultsBy(final ResultMerger reducer) { if (this.reducer != null) throw new IllegalStateException("Already provided a reducer."); this.reducer = reducer; return this; } public static , R> Builder generatingAccumulatorsBy(final AccumulatorGenerator generator) { return new Builder(generator); } public VariantProcessor build() { if (inputs.isEmpty()) throw new IllegalStateException("You need to provided some inputs before building."); if (reducer == null) throw new IllegalStateException("You must provide a reducer before building."); return new VariantProcessor(reducer, new VariantAccumulatorExecutor.MultiThreadedChunkBased( threadCount, composeVcfIteratorProducerFromBuilderArguments(), accumulatorGenerator )); } private VariantIteratorProducer composeVcfIteratorProducerFromBuilderArguments() { /** * Be careful; if we pick chunkings that are highly granular (e.g., a chunking based on each interval in an exome-like * interval list), it will result in a {@link htsjdk.variant.vcf.VCFFileReader#query(String, int, int)} call * per tiny chunk, which is very non-performant due to some implementations of that method. */ final VariantIteratorProducer ret; if (intervals == null) { ret = VariantIteratorProducer.byHundredMegabaseChunks(inputs); } else { ret = VariantIteratorProducer.byHundredMegabaseChunksWithOnTheFlyFilteringByInterval(inputs, intervals); } return ret; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy