All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.engine.spark.datasources.ReferenceMultiSparkSource Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.engine.spark.datasources;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.FileExtensions;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.SerializableFunction;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.reference.ReferenceBases;

import java.io.IOException;
import java.io.Serializable;

/**
 * Wrapper to load a reference sequence from a file stored on HDFS, GCS, or locally.
 *
 * This class needs to subclassed by test code, so it cannot be declared final.
 */
public class ReferenceMultiSparkSource implements ReferenceSparkSource, Serializable {
    private static final long serialVersionUID = 1L;

    private ReferenceSparkSource referenceSource;
    private SerializableFunction referenceWindowFunction;

    @VisibleForTesting
    protected ReferenceMultiSparkSource() {};

    /**
     * @param referencePathSpecifier local path or URL to the reference file
     * @param referenceWindowFunction the custom reference window function used to map reads to desired reference bases
     */
    public ReferenceMultiSparkSource( final GATKPath referencePathSpecifier,
                                      final SerializableFunction referenceWindowFunction) {
        Utils.nonNull(referenceWindowFunction);
        if ( ReferenceTwoBitSparkSource.isTwoBit(referencePathSpecifier)) {
            try {
                referenceSource = new ReferenceTwoBitSparkSource(referencePathSpecifier);
            } catch (IOException e) {
                throw new UserException("Failed to create a ReferenceTwoBitSource object" + e.getMessage());
            }
        } else if (referencePathSpecifier.isFasta()) {
            if (referencePathSpecifier.isHadoopURL()) {
                referenceSource = new ReferenceHadoopSparkSource(referencePathSpecifier);
            } else {
                referenceSource = new ReferenceFileSparkSource(referencePathSpecifier);
            }
        } else {
            throw new UserException.CouldNotReadInputFile("Couldn't read the given reference, reference must be a .fasta or .2bit file.\n" +
                    " Reference provided was: " + referencePathSpecifier);
        }
        this.referenceWindowFunction = referenceWindowFunction;
    }

    static boolean isFasta(final GATKPath referencePathSpecifier) {
        final String referencePathString = referencePathSpecifier.getURI().getPath();
        for (final String ext : FileExtensions.FASTA) {
            if (referencePathString.endsWith(ext)) {
                return true;
            }
        }
        return false;
    }

    /**
     * @return the custom reference window function used to map reads to desired reference bases
     */
    public SerializableFunction getReferenceWindowFunction() {
        return referenceWindowFunction;
    }

    /**
     * Return reference bases for the given interval.
     * @param interval the interval to return reference bases for
     * @return reference bases for the given interval
     */
    @Override
    public ReferenceBases getReferenceBases(final SimpleInterval interval) throws IOException {
        return referenceSource.getReferenceBases(interval);
    }

    /**
     * Return a sequence dictionary for the reference.
     * @param optReadSequenceDictionaryToMatch - (optional) the sequence dictionary of the reads, we'll match its order if possible.
     * @return sequence dictionary for the reference
     */
    @Override
    public SAMSequenceDictionary getReferenceSequenceDictionary(final SAMSequenceDictionary optReadSequenceDictionaryToMatch) {
        try {
            return referenceSource.getReferenceSequenceDictionary(optReadSequenceDictionaryToMatch);
        }
        catch ( IOException e ) {
            throw new GATKException("Error getting reference sequence dictionary");
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy