All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.utils.bwa.BwaMemIndex Maven / Gradle / Ivy

package org.broadinstitute.hellbender.utils.bwa;

import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

/**
 * BwaMemIndex manages the mapping of a bwa index image file into (non-Java) memory.
 * It's typically a huge chunk of memory, so you need to manage it as a precious resource.
 *
 * Usage pattern is:
 *   Create a BwaMemIndex for some reference.
 *   Create Aligners as needed to do some aligning -- they're pretty lightweight and thread safe.
 *     (But you may need to manage memory by controlling the number of reads that you align in one chunk.)
 *   Close the BwaMemIndex when you're done aligning.
 *
 * This class doesn't know anything about Spark.  You can use it in a distributed setting if you distribute the index
 * file to each node using the Yarn --files mechanism.  You might find it convenient to manage a singleton instance
 * of a BwaMemIndex on each Java VM when you're running distributed:  check out BwaMemIndexSingleton in GATK.
 *
 * Alternatively, you could use this class directly to run bwa multi-threaded on a single machine,
 * if that's what you want to do.
 */
public final class BwaMemIndex implements AutoCloseable {

    private static final int MAXIMUM_NUMBER_OF_CHARACTER_BEFORE_FIRST_HEADER_IN_FASTA_FILES =  4092;
    private static final char FASTA_HEADER_PREFIX_CHAR = '>';

    public static final List INDEX_FILE_EXTENSIONS =
            Collections.unmodifiableList(Arrays.asList(
                    ".amb", ".ann", ".bwt", ".pac", ".sa" ));

    public static final String IMAGE_FILE_EXTENSION = ".img";

    public static final List FASTA_FILE_EXTENSIONS =
            Collections.unmodifiableList(Arrays.asList(".fasta", ".fa"));

    /**
     * Indexing algorithms supported by Bwa.
     */
    public enum Algorithm {

        /**
         * Chooses the most appropriate algorithm based on characteristic
         * of the reference. For references shorter than 2Gbases it will use {@link #IS}
         * whereas for larger reference it will employ {@link #RB2}.
         */
        AUTO,

        /**
          * Linear-time algorithm for constructing suffix array.
          * It requires 5.37N memory where N is the size of the database.
          * IS is moderately fast, but does not work with database larger than 2GB.
          * IS is the default algorithm due to its simplicity. The current codes for
          * IS algorithm are reimplemented by Yuta Mori.
          *
          * @see "http://bio-bwa.sourceforge.net/bwa.shtml"
          */
        IS,

        /**
         * The Ropebwt2 algorithm.
         *
         * @see "https://arxiv.org/pdf/1406.0426.pdf"
         */
        RB2;

        /**
         * The string name use by the Bwa command line to denote that algorithm.
         * @return never {@code null} and unique across algorithms.
         */
        public String toBwaName() {
            return this.toString().toLowerCase();
        }
    }

    private final String indexImageFile; // stash this for error messages
    private volatile long indexAddress; // address where the index was memory-mapped (for use by C code)
    private final AtomicInteger refCount; // keep track of how many threads are actively aligning
    private final List refContigNames; // the reference dictionary from the index
    private static volatile boolean nativeLibLoaded = false; // whether we've loaded the native library or not

    private static String resolveFastaFileExtension(final String fasta) {
        final Optional extension = FASTA_FILE_EXTENSIONS.stream()
                .filter(fasta::endsWith).findFirst();
        if (!extension.isPresent()) {
            throw new IllegalArgumentException(
                    String.format("the fasta file provided '%s' does not have any of the standard fasta extensions: %s",
                            fasta, FASTA_FILE_EXTENSIONS.stream().collect(Collectors.joining(", "))));
        } else {
            return extension.get();
        }
    }

    /**
     * Create the index image file for a complete set of BWA index files
     * @param indexPrefix the location of the index files.
     * @param imageFile the location of the new index image file.
     *
     * 

* WARNING!: Notice that currently this method is making JNI call that might result in an abrupt process * interruption (e.g. exit or abort system call) and so the control may never be returned. *

* * @throws IllegalArgumentException if {@code indexPrefix} is {@code null} * or it does not look like it points to a complete set of index files. * @throws IllegalArgumentException if {@code imageFile} is {@code null}. */ public static void createIndexImageFromIndexFiles(final String indexPrefix, final String imageFile) { if (indexPrefix == null) { throw new IllegalArgumentException("the index prefix cannot be null"); } else if (imageFile == null) { throw new IllegalArgumentException("the image file cannot be null"); } assertLooksLikeIndexPrefix(indexPrefix); loadNativeLibrary(); createIndexImageFile(indexPrefix, imageFile); } /** * Create the index image file for a fasta file. *

The name of the output index file will be determined by the name of the fasta file; * it substitutes its extension (typically .fasta or .fas) by .img.

* *

* This is equivalent to calling * {@code {@link #createIndexImageFromFastaFile(String,String) createIndexImageFromFastaFile(X, X.replace("\.(fasta)|(fa)$", ".img"))}} *

*

* WARNING!: Notice that currently this method is making JNI call that might result in an abrupt process * interruption (e.g. exit or abort system call) and so the control may never be returned. *

* * @param fasta the location of the fasta reference file. * @throws IllegalArgumentException if {@code fasta} is {@code null} or * does not finish with out of the standard fasta extension names (listed in {@link #FASTA_FILE_EXTENSIONS}). * @throws InvalidFileFormatException if {@code fasta} does not seem to be a fasta formatted regular readable file. * @throws CouldNotCreateIndexImageException if for some reason we could not create the index file. */ public static void createIndexImageFromFastaFile(final String fasta) { if (fasta == null) { throw new IllegalArgumentException("the input fasta file name cannot be null"); } else { final String extension = resolveFastaFileExtension(fasta); final String prefix = fasta.substring(0, fasta.length() - extension.length()); final String imageFile = prefix + IMAGE_FILE_EXTENSION; createIndexImageFromFastaFile(fasta, imageFile); } } /** * Creates the index image file for a reference fasta file. *

* The index will be created using the default algorithm {@link Algorithm#AUTO}. *

*

* Calling this method is equivalent to calling {@link #createIndexImageFromFastaFile(String, String, Algorithm) createIndexImageFromFastaFile(a, b, Algorithm.AUTO)} *

* WARNING!: Notice that currently this method is making JNI call that might result in an abrupt process * interruption (e.g. exit or abort system call) and so the control may never be returned. *

* @param fasta the location of the targeted reference. * @param imageFile the location of the new index image file. * @throws IllegalArgumentException if {@code fasta} is {@code null} * or it does not look like it points to a fasta formatted readable file. * @throws IllegalArgumentException if {@code imageFile} is {@code null}. * @throws InvalidFileFormatException if {@code fasta} does not seem to be * a fasta formatted regular and readable file. * @throws CouldNotCreateIndexImageException if there was a problem creating * the output image. * @throws CouldNotCreateIndexException if there was some problem while creating * the intermediary index file set. */ public static void createIndexImageFromFastaFile(final String fasta, final String imageFile) { createIndexImageFromFastaFile(fasta, imageFile, Algorithm.AUTO); } /** * Creates the index image file for a reference fasta file. *

* WARNING!: Notice that currently this method is making JNI call that might result in an abrupt process * interruption (e.g. exit or abort system call) and so the control may never be returned. *

* @param fasta the location of the targeted reference. * @param imageFile the location of the new index image file. * @param algo the algorithm to use to construct the index (see {@link Algorithm} to see what there is available.). * @throws IllegalArgumentException if {@code fasta} is {@code null} * or it does not look like it points to a fasta formatted readable file. * @throws IllegalArgumentException if {@code imageFile} is {@code null}. * @throws InvalidFileFormatException if {@code fasta} does not seem to be * a fasta formatted regular and readable file. * @throws CouldNotCreateIndexImageException if there was a problem creating * the output image. * @throws CouldNotCreateIndexException if there was some problem while creating * the intermediary index file set. */ public static void createIndexImageFromFastaFile( final String fasta, final String imageFile, final Algorithm algo) { assertLooksLikeFastaFile(fasta); assertCanCreateOrOverwriteImageFile(imageFile); if (algo == null) { throw new IllegalArgumentException("the input algorithm must not be null"); } final File indexPrefix = createTempIndexPrefix(fasta); loadNativeLibrary(); createReferenceIndex(fasta, indexPrefix.getPath(), algo.toBwaName()); createIndexImageFile(indexPrefix.getPath(), imageFile); deleteIndexFiles(indexPrefix); } private static void assertCanCreateOrOverwriteImageFile(final String imageFile) { if (imageFile == null) { throw new IllegalArgumentException("the image file cannot be null"); } else { final File file = new File(imageFile); try { if (!file.createNewFile()) { if (!file.isFile() || !file.canWrite()) { throw new CouldNotCreateIndexImageException(imageFile, "already exists as a non-regular or unwritable file"); } } else { file.delete(); } } catch (final IOException ex) { throw new CouldNotCreateIndexImageException(imageFile, ex.getMessage(), ex); } } } /** * Checks whether the input index prefix seems to point to a complete set * of readable index files. * * @param indexPrefix the target index prefix. * @throws IllegalArgumentException if {@code indexPrefix} is {@code null}. * @throws CouldNotReadIndexException if that is not the case. */ private static void assertLooksLikeIndexPrefix(final String indexPrefix) { if (indexPrefix == null) { throw new IllegalArgumentException("the input index prefix cannot be null"); } INDEX_FILE_EXTENSIONS.stream() .map(ext -> indexPrefix + ext) .forEach(file -> assertNonEmptyReadableIndexFile(indexPrefix, file)); } private static void deleteIndexFiles(final File indexPrefix) { INDEX_FILE_EXTENSIONS.stream() .map(ext -> new File(indexPrefix + ext)) .forEach(File::delete); indexPrefix.delete(); } private static File createTempIndexPrefix(final String fasta) { final File indexPrefix; try { indexPrefix = File.createTempFile("temporal-index",""); } catch (final IOException ex) { throw new CouldNotCreateIndexException(fasta, "no-location","failure to create a temporal file"); } indexPrefix.deleteOnExit(); INDEX_FILE_EXTENSIONS.stream() .map(ext -> new File(indexPrefix + ext)) .forEach(File::deleteOnExit); return indexPrefix; } private static void assertLooksLikeFastaFile(final String fasta) { resolveFastaFileExtension(fasta); if (!nonEmptyReadableFile(fasta)) { throw new CouldNotReadReferenceException(fasta, "input file unreachable or not a file"); } try (final BufferedReader reader = new BufferedReader(new FileReader(fasta))) { int c; int offset = 0; while (offset++ < MAXIMUM_NUMBER_OF_CHARACTER_BEFORE_FIRST_HEADER_IN_FASTA_FILES && (c = reader.read()) != -1) { if (!Character.isSpaceChar(c)) { if (c == FASTA_HEADER_PREFIX_CHAR) { break; } else { throw new InvalidFileFormatException(fasta, "the file provided does not seem to be a fasta file (first non-space character in the first 4K is not '" + FASTA_HEADER_PREFIX_CHAR + "'"); } } } } catch (final IOException ex) { throw new InvalidFileFormatException(fasta, "problems reading the content of the reference fasta file'", ex); } } /** * Loads an index from an image file. *

* You can use other methods to create such * indexes from fasta reference ({@link #createIndexImageFromFastaFile} or their index files * ({@link #createIndexImageFromIndexFiles}). *

*

* WARNING!: Notice that currently this method is making JNI call that might result in an abrupt process * interruption (e.g. exit or abort system call) and so the control may never be returned. *

* * @throws IllegalArgumentException if {@code indexImageFile} is {@code null}. * @throws CouldNotReadImageException if some problem occurred when loading the * image file. */ public BwaMemIndex( final String indexImageFile ) { this.indexImageFile = indexImageFile; loadNativeLibrary(); assertNonEmptyReadableImageFile(indexImageFile); refCount = new AtomicInteger(); indexAddress = openIndex(indexImageFile); if ( indexAddress == 0L ) { throw new CouldNotReadImageException(indexImageFile, "unable to open bwa-mem index"); } ByteBuffer refContigNamesBuf = getRefContigNames(indexAddress); if ( refContigNamesBuf == null ) { throw new CouldNotReadImageException("unable to retrieve reference contig names from bwa-mem index"); } refContigNamesBuf.order(ByteOrder.nativeOrder()).position(0).limit(refContigNamesBuf.capacity()); int nRefContigNames = refContigNamesBuf.getInt(); refContigNames = new ArrayList<>(nRefContigNames); for ( int idx = 0; idx < nRefContigNames; ++idx ) { int nameLen = refContigNamesBuf.getInt(); byte[] nameBytes = new byte[nameLen]; refContigNamesBuf.get(nameBytes); refContigNames.add(new String(nameBytes)); } destroyByteBuffer(refContigNamesBuf); } private void assertNonEmptyReadableImageFile(final String image) { if (!nonEmptyReadableFile(image)) { throw new CouldNotReadImageException(image, "is empty or is not readable"); } } /** true if index has not been closed */ public boolean isOpen() { return indexAddress != 0L; } /** there's someone using the index -- don't allow it to be closed */ public long refIndex() { refCount.incrementAndGet(); if ( indexAddress == 0L ) { throw new IllegalStateException("Index image " + indexImageFile + " has been closed"); } return indexAddress; } /** done using the index -- if ref count has fallen to 0, a call to close can be expected to succeed */ public void deRefIndex() { refCount.decrementAndGet(); } /** * Close the index and release the (non-Java) memory that's been allocated * *

* WARNING!: Notice that currently this method is making JNI call that might result in an abrupt process * interruption (e.g. exit or abort system call) and so the control may never be returned. *

*/ @Override public void close() { long addr = indexAddress; if ( addr != 0L ) { synchronized (BwaMemIndex.class) { addr = indexAddress; if ( addr != 0L ) { if ( refCount.intValue() != 0 ) { throw new IllegalStateException("Index image "+indexImageFile+" can't be closed: it's in use."); } indexAddress = 0L; destroyIndex(addr); } } } } /** retrieve list of contig names in the reference dictionary */ public List getReferenceContigNames() { return refContigNames; } /** returns github GUID for the version of bwa that has been compiled */ public static String getBWAVersion() { loadNativeLibrary(); return getVersion(); } ByteBuffer doAlignment( final ByteBuffer seqs, final ByteBuffer opts ) { final ByteBuffer alignments = createAlignments(seqs, indexAddress, opts); if ( alignments == null ) { throw new IllegalStateException("Unable to get alignments from bwa-mem index "+indexImageFile+": We don't know why."); } return alignments; } private static void assertNonEmptyReadableIndexFile(final String index, final String fileName ) { if ( !nonEmptyReadableFile(fileName) ) throw new CouldNotReadIndexException(index, "Missing bwa index file: "+ fileName); } private static boolean nonEmptyReadableFile( final String fileName ) { if (fileName == null) { throw new IllegalArgumentException("the input file name cannot be null"); } try ( final FileInputStream is = new FileInputStream(fileName) ) { return is.read() != -1; } catch ( final IOException ioe ) { return false; } } private static void loadNativeLibrary() { if ( !nativeLibLoaded ) { synchronized(BwaMemIndex.class) { if ( !nativeLibLoaded ) { final String libNameOverride = System.getProperty("LIBBWA_PATH"); if ( libNameOverride != null ) { System.load(libNameOverride); } else { final String osName = System.getProperty("os.name", "unknown").toUpperCase(); final String osArch = System.getProperty("os.arch"); final String libName; if ( !"x86_64".equals(osArch) && !"amd64".equals(osArch) ) { throw new IllegalStateException( "We have pre-built fermi-lite binaries only for x86_64 and amd64. "+ "Your os.arch is "+osArch+"."+ "Set property LIBBWA_PATH to point to a native library for your architecture."); } if ( osName.startsWith("MAC") ) libName = "/libbwa.Darwin.dylib"; else if ( osName.startsWith("LINUX") ) libName = "/libbwa.Linux.so"; else { throw new IllegalStateException( "We have pre-built fermi-lite binaries only for Linux and Mac. "+ "Your os.name is "+osName+"."+ "Set property LIBBWA_PATH to point to a native library for your operating system."); } try ( final InputStream is = BwaMemIndex.class.getResourceAsStream(libName) ) { if ( is == null ) { throw new IllegalStateException("Can't find resource "+libName); } final File tmpFile = File.createTempFile("libbwa.",".jnilib"); tmpFile.deleteOnExit(); Files.copy(is, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING); System.load(tmpFile.getPath()); } catch (IOException ioe ) { throw new IllegalStateException("Misconfiguration: Unable to load fermi-lite native library "+libName, ioe); } } nativeLibLoaded = true; } } } } private static native boolean createReferenceIndex(String referenceName, String indexPrefix, String algorithmName); private static native boolean createIndexImageFile(String indexPrefix, String imageName ); private static native long openIndex( String indexImageFile ); private static native int destroyIndex( long indexAddress ); static native ByteBuffer createDefaultOptions(); private static native ByteBuffer getRefContigNames( long indexAddress ); private static native ByteBuffer createAlignments( ByteBuffer seqs, long indexAddress, ByteBuffer opts ); static native void destroyByteBuffer( ByteBuffer alignments ); private static native String getVersion(); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy