All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.engine.spark.datasources.ReferenceWindowFunctions Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.engine.spark.datasources;

import org.broadinstitute.hellbender.utils.SerializableFunction;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.read.GATKRead;

/**
 * A library of reference window functions suitable for passing in to transforms such as AddContextDataToRead.
 * These are functions from {@link GATKRead} to {@link SimpleInterval}, with the output interval representing
 * the bases of reference context that should be retrieved for the input read.
 */
public class ReferenceWindowFunctions {

    /**
     * A function for requesting only reference bases that directly overlap each read. To maintain the ability to serialize the reference,
     * this needs to be a class rather than a simple lambda function because of errors in serializing lambda functions
     * in Kryo, at least in version 2.21 which we currently using. Partial support for lambda serialization was added in Kryo 3.0.
     * See https://github.com/broadinstitute/gatk/pull/1489, https://github.com/EsotericSoftware/kryo/issues/215,
     * https://issues.apache.org/jira/browse/SPARK-7708
     */
    public static final SerializableFunction IDENTITY_FUNCTION = new SerializableFunction() {
        private static final long serialVersionUID = 1L;

        @Override
        public SimpleInterval apply(final GATKRead input) {
            return new SimpleInterval(input);
        }
    };

    /**
     * A function for requesting a fixed number of extra bases of reference context on either side
     * of each read. For example, a "new FixedWindowFunction(3, 5)" would request 3 extra reference bases
     * before each read and 5 extra bases after each read, in addition to the reference bases spanning
     * each read.
     */
    public static final class FixedWindowFunction implements SerializableFunction {
        private static final long serialVersionUID = 1L;

        private final int leadingWindowBases;
        private final int trailingWindowBases;

        /**
         * @param leadingWindowBases number of bases of additional reference context to request before each read's start position
         * @param trailingWindowBases number of bases of additional reference context to request after each read's end position
         */
        public FixedWindowFunction( final int leadingWindowBases, final int trailingWindowBases ) {
            this.leadingWindowBases = leadingWindowBases;
            this.trailingWindowBases = trailingWindowBases;
        }

        @Override
        public SimpleInterval apply( GATKRead read ) {
            // TODO: truncate interval at contig end (requires a sequence dictionary)
            return new SimpleInterval(read.getContig(), Math.max(read.getStart() - leadingWindowBases, 1), read.getEnd() + trailingWindowBases);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy