All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.samtools.SamReaderFactory Maven / Gradle / Ivy

/*
 * The MIT License
 *
 * Copyright (c) 2016 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

package htsjdk.samtools;

import htsjdk.samtools.cram.ref.CRAMReferenceSource;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.sra.SRAAccession;
import htsjdk.samtools.util.*;
import htsjdk.samtools.util.zip.InflaterFactory;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Path;
import java.util.Collections;
import java.util.EnumSet;
import java.util.function.Function;
import java.util.zip.GZIPInputStream;

/**
 * 

Describes the functionality for producing {@link SamReader}, and offers a * handful of static generators.

*
 *     SamReaderFactory.makeDefault().open(new File("/my/bam.bam");
 * 
*

Example: Configure a factory

*
 *      final {@link SamReaderFactory} factory =
 *          SamReaderFactory.makeDefault()
 *              .enable({@link Option#INCLUDE_SOURCE_IN_RECORDS}, {@link Option#VALIDATE_CRC_CHECKSUMS})
 *              .validationStringency({@link ValidationStringency#SILENT});
 *
 * 
*

Example: Open two bam files from different sources, using different options

*
 *     final {@link SamReaderFactory} factory =
 *          SamReaderFactory.makeDefault()
 *              .enable({@link Option#INCLUDE_SOURCE_IN_RECORDS}, {@link Option#VALIDATE_CRC_CHECKSUMS})
 *              .validationStringency({@link ValidationStringency#SILENT});
 *
 *     // File-based bam
 *     final {@link SamReader} fileReader = factory.open(new File("/my/bam.bam"));
 *
 *     // HTTP-hosted BAM with index from an arbitrary stream
 *     final SeekableStream myBamIndexStream = ...
 *     final {@link SamInputResource} resource =
 *          {@link SamInputResource}.of(new URL("http://example.com/data.bam")).index(myBamIndexStream);
 *     final {@link SamReader} complicatedReader = factory.open(resource);
 * 
* * @author mccowan */ public abstract class SamReaderFactory { private static ValidationStringency defaultValidationStringency = ValidationStringency.DEFAULT_STRINGENCY; abstract public SamReader open(final File file); /** * Open the specified path (without using any wrappers). * * @param path the SAM or BAM file to open. */ public SamReader open(final Path path) { return open(path, null, null); } /** * Open the specified path, using the specified wrappers for prefetching/caching. * * @param path the SAM or BAM file to open * @param dataWrapper the wrapper for the data (or null for none) * @param indexWrapper the wrapper for the index (or null for none) */ public SamReader open(final Path path, Function dataWrapper, Function indexWrapper) { final SamInputResource r = SamInputResource.of(path, dataWrapper); final Path indexMaybe = SamFiles.findIndex(path); if (indexMaybe != null) r.index(indexMaybe, indexWrapper); return open(r); } abstract public SamReader open(final SamInputResource resource); abstract public ValidationStringency validationStringency(); abstract public CRAMReferenceSource referenceSource(); /** Set this factory's {@link htsjdk.samtools.SAMRecordFactory} to the provided one, then returns itself. */ abstract public SamReaderFactory samRecordFactory(final SAMRecordFactory samRecordFactory); /** * Set this factory's {@link htsjdk.samtools.util.zip.InflaterFactory} to the provided one, then returns itself. * Note: The inflaterFactory provided here is only used for BAM decompression implemented with {@link BAMFileReader}, * it is not used for CRAM or other formats like a gzipped SAM file. */ abstract public SamReaderFactory inflaterFactory(final InflaterFactory inflaterFactory); /** Enables the provided {@link Option}s, then returns itself. */ abstract public SamReaderFactory enable(final Option... options); /** Disables the provided {@link Option}s, then returns itself. */ abstract public SamReaderFactory disable(final Option... options); /** Sets a specific Option to a boolean value. * */ abstract public SamReaderFactory setOption(final Option option, boolean value); /** Sets the specified reference sequence * */ abstract public SamReaderFactory referenceSequence(File referenceSequence); /** Sets the specified reference sequence. */ abstract public SamReaderFactory referenceSequence(Path referenceSequence); /** Sets the specified reference sequence * */ abstract public SamReaderFactory referenceSource(CRAMReferenceSource referenceSequence); /** Utility method to open the file get the header and close the file */ abstract public SAMFileHeader getFileHeader(File samFile); /** Utility method to open the file get the header and close the file */ abstract public SAMFileHeader getFileHeader(Path samFile); /** Reapplies any changed options to the reader * */ abstract public void reapplyOptions(SamReader reader); /** Set this factory's {@link ValidationStringency} to the provided one, then returns itself. */ abstract public SamReaderFactory validationStringency(final ValidationStringency validationStringency); /** Set whether readers created by this factory will use asynchronous IO. * If this methods is not called, this flag will default to the value of {@link Defaults#USE_ASYNC_IO_READ_FOR_SAMTOOLS}. * Note that this option may not be applicable to all readers returned from this factory. * Returns the factory itself. */ abstract public SamReaderFactory setUseAsyncIo(final boolean asynchronousIO); private static SamReaderFactoryImpl DEFAULT = new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, DefaultSAMRecordFactory.getInstance(), BlockGunzipper.getDefaultInflaterFactory()); public static void setDefaultValidationStringency(final ValidationStringency defaultValidationStringency) { SamReaderFactory.defaultValidationStringency = defaultValidationStringency; // The default may have changed, so reset the default SamReader DEFAULT = new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, DefaultSAMRecordFactory.getInstance(), BlockGunzipper.getDefaultInflaterFactory()); } /** Creates a copy of the default {@link SamReaderFactory}. */ public static SamReaderFactory makeDefault() { return SamReaderFactoryImpl.copyOf(DEFAULT); } /** * Creates an "empty" factory with no enabled {@link Option}s, {@link ValidationStringency#DEFAULT_STRINGENCY}, * no path wrapper, and {@link htsjdk.samtools.DefaultSAMRecordFactory}. */ public static SamReaderFactory make() { return new SamReaderFactoryImpl(EnumSet.noneOf(Option.class), ValidationStringency.DEFAULT_STRINGENCY, DefaultSAMRecordFactory.getInstance(), BlockGunzipper.getDefaultInflaterFactory()); } private static class SamReaderFactoryImpl extends SamReaderFactory { private final static Log LOG = Log.getInstance(SamReaderFactory.class); private final EnumSet

* This option increases memory footprint slightly per {@link htsjdk.samtools.SAMRecord}. */ INCLUDE_SOURCE_IN_RECORDS { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableFileSource(reader, true); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { underlyingReader.enableFileSource(reader, true); } @Override void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableFileSource(reader, true); } @Override void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { underlyingReader.enableFileSource(reader, true); } }, /** * The factory's {@link SamReader}s' {@link SamReader#indexing()}'s calls to {@link SamReader.Indexing#getIndex()} will produce * {@link BAMIndex}es that do some caching in memory instead of reading the index from the disk for each query operation. * * @see SamReader#indexing() * @see htsjdk.samtools.SamReader.Indexing#getIndex() */ CACHE_FILE_BASED_INDEXES { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexCaching(true); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } @Override void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexCaching(true); } @Override void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexCaching(true); } }, /** * The factory's {@link SamReader}s' will not use memory mapping for accessing index files (which is used by default). This is * slower but more scalable when accessing large numbers of BAM files sequentially. * * @see SamReader#indexing() * @see htsjdk.samtools.SamReader.Indexing#getIndex() */ DONT_MEMORY_MAP_INDEX { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexMemoryMapping(false); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } @Override void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexMemoryMapping(false); } @Override void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { underlyingReader.enableIndexMemoryMapping(false); } }, /** * Eagerly decode {@link htsjdk.samtools.SamReader}'s {@link htsjdk.samtools.SAMRecord}s, which can reduce memory footprint if many * fields are being read per record, or if fields are going to be updated. */ EAGERLY_DECODE { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.setEagerDecode(true); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } @Override void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } @Override void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } }, /** * For {@link htsjdk.samtools.SamReader}s backed by block-compressed streams, enable CRC validation of those streams. This is an * expensive operation, but serves to ensure validity of the stream. */ VALIDATE_CRC_CHECKSUMS { @Override void applyTo(final BAMFileReader underlyingReader, final SamReader reader) { underlyingReader.enableCrcChecking(true); } @Override void applyTo(final SAMTextReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } @Override void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } @Override void applyTo(final SRAFileReader underlyingReader, final SamReader reader) { logDebugIgnoringOption(reader, this); } }; public static final EnumSet





© 2015 - 2025 Weber Informatics LLC | Privacy Policy