org.broadinstitute.hellbender.engine.FeatureContext Maven / Gradle / Ivy
The newest version!
package org.broadinstitute.hellbender.engine;
import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.Feature;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;
/**
* Wrapper around FeatureManager that presents Feature data from a particular interval to a client tool
* without improperly exposing engine internals.
*
* The client passes in one or more FeatureInputs that were declared as tool arguments, and gets back a List
* of all Features from those FeatureInputs overlapping the interval spanned by this FeatureContext.
*
* Features returned may optionally be additionally constrained to start at a particular position.
*
* Features are returned strongly-typed based on the type parameter of each FeatureInput requested,
* so a query on a FeatureInput will return VariantContext objects (no casting
* required by tool authors).
*
* Feature sources are lazily queried, so there's no overhead if the client chooses not to examine
* the FeatureContext it's passed.
*
* A FeatureContext may have no backing data source and/or interval. In these cases, queries on it will always
* return empty Lists. You can determine whether there is a backing source of Features via
* {@link #hasBackingDataSource()}, and whether there is an interval via {@link #getInterval}
*
* Note: This class is NOT intended to be extended outside of the testing harness.
*/
@DoNotSubclass
public class FeatureContext {
/**
* FeatureManager containing backing data sources for all discovered Feature arguments.
* Null if there are no sources of Features.
*/
private final FeatureManager featureManager;
/**
* We will return Features overlapping this interval. Null if this context has no known location
* (eg., we are dealing with unmapped data).
*/
private final SimpleInterval interval;
/**
* Creates an empty FeatureContext with no backing data source. All queries on this context will
* return an empty List.
*/
public FeatureContext() {
this((FeatureManager)null, null);
}
/**
* Creates a new FeatureContext given a FeatureManager and a query interval. These may be null if
* no sources of Features are available and/or we don't have a known location on the reference,
* in which case all queries on this context will return an empty List.
*
* @param featureManager FeatureManager containing backing data sources for all discovered Feature arguments. Null if there are no sources of Features.
* @param interval Interval to constrain queries on this FeatureContext. Null if we have no known location.
*/
public FeatureContext(final FeatureManager featureManager, final SimpleInterval interval) {
this.featureManager = featureManager;
this.interval = interval;
}
/**
* Creates a new FeatureContext given a FeatureContext and a query interval. This will reference the FeatureManager
* from the original and use the supplied interval.
*
*/
public FeatureContext (FeatureContext featureContext, SimpleInterval interval){
this(featureContext.featureManager, interval);
}
/**
* Determines whether this FeatureContext has a backing source of Features. A FeatureContext with
* no backing data source will always return an empty List in response to a query.
*
* @return true if this FeatureContext has a backing source of Features, otherwise false
*/
public boolean hasBackingDataSource() {
return featureManager != null;
}
/**
* Gets our query interval (the interval that all Features returned by this FeatureContext overlap).
* Null if this context has no interval.
*
* @return query interval for this FeatureContext (may be null)
*/
public SimpleInterval getInterval() {
return interval;
}
/**
* Gets the header associated with the provided FeatureInput
*
* @param featureDescriptor FeatureInput whose header we want to retrieve
* @param type of Feature in our FeatureInput
* @return header for the provided FeatureInput (null if we have no backing data sources)
*/
public Object getHeader(final FeatureInput featureDescriptor) {
return featureManager != null ? featureManager.getHeader(featureDescriptor) : null;
}
/**
* Gets all Features from the source represented by the provided FeatureInput argument that overlap
* this FeatureContext's query interval. Will return an empty List if this FeatureContext has
* no backing source of Features and/or interval.
*
* Returned Features are not guaranteed to be in any particular order.
*
* @param featureDescriptor FeatureInput argument for which to fetch Features
* @param type of Feature in the data source backing the provided FeatureInput
* @return All Features in the data source backing the provided FeatureInput that overlap
* this FeatureContext's query interval. Empty List if there is no backing data source and/or interval.
*/
public List getValues(final FeatureInput featureDescriptor) {
return getValues(featureDescriptor, interval);
}
/**
* Gets all Features from the source represented by the provided FeatureInput argument that overlap this
* FeatureContext's query interval as expanded by the specified number of leading/trailing bases.
* Returns an empty List if this FeatureContext has no backing source of Features and/or interval.
*
* Returned Features are not guaranteed to be in any particular order.
*
* Note: if windowLeadingBases > 0 and query lookahead caching is enabled for the underlying FeatureDataSource,
* there will be a cache miss on every call, since the current caching scheme prefetches Features after
* the current query interval but not before it (on the assumption that
* the common access pattern involves gradually increasing query intervals).
*
* @param featureDescriptor FeatureInput argument for which to fetch Features
* @param type of Feature in the data source backing the provided FeatureInput
* @param windowLeadingBases Number of extra reference bases to include before the start of our interval. Must be >= 0.
* @param windowTrailingBases Number of extra reference bases to include after the end of our interval. Must be >= 0.
* @return All Features in the data source backing the provided FeatureInput that overlap
* this FeatureContext's query interval as expanded by the specified number of leading/trailing bases.
* Empty List if there is no backing data source and/or interval.
*/
public List getValues(final FeatureInput featureDescriptor, final int windowLeadingBases, final int windowTrailingBases) {
return getValues(featureDescriptor, getQueryInterval(windowLeadingBases, windowTrailingBases));
}
/**
* Gets all Features from the source represented by the provided FeatureInput argument that overlap the given interval.
* Returns an empty List if this FeatureContext has no backing source of Features and/or interval.
*
* Returned Features are not guaranteed to be in any particular order.
*
* Note: if query lookahead caching is enabled for the underlying FeatureDataSource,
* there will be a cache miss on almost every call, since the current caching scheme prefetches Features after
* the current query interval but not before it (on the assumption that
* the common access pattern involves gradually increasing query intervals).
*
* @param featureDescriptor FeatureInput argument for which to fetch Features
* @param type of Feature in the data source backing the provided FeatureInput
* @return All Features in the data source backing the provided FeatureInput that overlap
* this FeatureContext's query interval as expanded by the specified number of leading/trailing bases.
* Empty List if there is no backing data source and/or interval.
*/
public List getValues(final FeatureInput featureDescriptor, final Locatable queryInterval) {
if (featureManager == null || queryInterval == null || featureDescriptor == null) {
return Collections.emptyList();
}
return featureManager.getFeatures(featureDescriptor, queryInterval);
}
/**
* Gets the query interval expanded by the specified number of leading/trailing bases, or null if this context has no interval.
*
* @param windowLeadingBases Number of extra bases to include before the start of our interval. Must be >= 0.
* @param windowTrailingBases Number of extra bases to include after the end of our interval. Must be >= 0.
* @return full expanded window of bases spanned by this context as a SimpleInterval
* (will be null if this context has no interval)
*/
private SimpleInterval getQueryInterval(final int windowLeadingBases, final int windowTrailingBases){
Utils.validateArg(windowLeadingBases >= 0, "Window starts after the current interval");
Utils.validateArg(windowTrailingBases >= 0, "Window ends before the current interval");
if (interval == null) {
return null;
} else if (windowLeadingBases == 0 && windowTrailingBases == 0){
return interval;
}
return new SimpleInterval(interval.getContig(), windowStart(interval, windowLeadingBases), windowStop(interval, windowTrailingBases));
}
/**
* Determines the start of the expanded query window, bounded by 1.
*
* @param locus The locus to expand.
* @param windowLeadingBases number of bases to attempt to expand relative to the locus start (>= 0)
* @return The start of the expanded window.
*/
private int windowStart(final SimpleInterval locus, final int windowLeadingBases) {
return Math.max(locus.getStart() - windowLeadingBases, 1);
}
/**
* Determines the stop of the expanded query window.
*
* @param locus The locus to expand.
* @param windowTrailingBases number of bases to attempt to expand relative to the locus end (>= 0)
* @return The end of the expanded window.
*/
private int windowStop(final SimpleInterval locus, final int windowTrailingBases) {
//Note: queries past the end of contig are handled but overflow
// blows up too late so we change it here to blow up early.
return Math.addExact(locus.getEnd(), windowTrailingBases);//blow up on overflow
}
/**
* Gets all Features from the source represented by the provided FeatureInput argument that overlap
* this FeatureContext's query interval AND that start at the specified start position.
* Will return an empty List if this FeatureContext has no backing source of Features and/or interval.
*
* Returned Features are not guaranteed to be in any particular order.
*
* @param featureDescriptor FeatureInput argument for which to fetch Features
* @param featureStart All returned Features must start at this position, in addition to overlapping this
* FeatureContext's query interval
* @param type of Feature in the data source backing the provided FeatureInput
* @return All Features in the data source backing the provided FeatureInput that overlap
* this FeatureContext's query interval AND that start at the specified start position.
* Empty List if there is no backing data source and/or interval.
*/
public List getValues(final FeatureInput featureDescriptor, final int featureStart) {
if (featureManager == null || interval == null) {
return Collections.emptyList();
}
return subsetToStartPosition(getValues(featureDescriptor), featureStart);
}
/**
* Gets all Features from the sources represented by the provided FeatureInput arguments that overlap
* this FeatureContext's query interval. Will return an empty List if this FeatureContext has no
* backing source of Features and/or interval.
*
* Returned Features are not guaranteed to be in any particular order, or to be globally unique
* across all sources of Features.
*
* @param featureDescriptors FeatureInput arguments for which to fetch Features
* @param type of Feature in the data sources backing the provided FeatureInputs
* @return All Features in the data sources backing the provided FeatureInputs that overlap
* this FeatureContext's query interval. Empty List if there is no backing data source and/or interval.
*/
public List getValues(final Collection> featureDescriptors) {
if (featureManager == null || interval == null || featureDescriptors.isEmpty()) {
return Collections.emptyList();
}
final List features = new ArrayList<>();
for (FeatureInput featureSource : featureDescriptors) {
features.addAll(getValues(featureSource));
}
return features;
}
/**
* Gets all Features from the sources represented by the provided FeatureInput arguments that overlap
* this FeatureContext's query interval, AND that start at the specified start position. Will return
* an empty List if this FeatureContext has no backing source of Features and/or interval.
*
* Returned Features are not guaranteed to be in any particular order, or to be globally unique
* across all sources of Features.
*
* @param featureDescriptors FeatureInput arguments for which to fetch Features
* @param featureStart All returned Features must start at this position, in addition to overlapping this
* FeatureContext's query interval
* @param type of Feature in the data sources backing the provided FeatureInputs
* @return All Features in the data sources backing the provided FeatureInputs that overlap
* this FeatureContext's query interval, AND that start at the specified start position.
* Empty List if there is no backing data source and/or interval.
*/
public List getValues(final Collection> featureDescriptors, final int featureStart) {
if (featureManager == null || interval == null) {
return Collections.emptyList();
}
return subsetToStartPosition(getValues(featureDescriptors), featureStart);
}
/**
* Helper method to subset a list of Features to only those that start at a particular start position
*
* @param features list of Features to subset
* @param start the required start position for returned Features
* @param type of Feature we're dealing with
* @return List of all Features from the features list that start at the specified start position
*/
private List subsetToStartPosition(final Collection features, final int start) {
return features.stream().filter(feat -> feat.getStart() == start).collect(Collectors.toList());
}
/**
* Convenience method to create a new instance for test methods.
* This method should be used for testing only.
*
* @param featureInputsWithType {@link Map} of a {@link FeatureInput} to the output type that must extend {@link Feature}.
* Never {@code null}, but empty list is acceptable.
* @param dummyToolInstanceName A name to use for the "tool". Any string will work here. Never {@code null}.
* @param interval genomic interval for the result. Typically, this would be the interval of the variant. Never {@link null}.
* @param featureQueryLookahead When querying FeatureDataSources, cache this many extra bases of context beyond
* the end of query intervals in anticipation of future queries. Must be >= 0. If uncertain, use zero.
* @param cloudPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBOptions)} If uncertain, use zero.
* @param cloudIndexPrefetchBuffer See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBOptions)} If uncertain, use zero.
* @param reference See {@link FeatureManager#FeatureManager(CommandLineProgram, int, int, int, org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBOptions)} If uncertain, use {@code null}.
*/
@VisibleForTesting
public static FeatureContext createFeatureContextForTesting(final Map, Class extends Feature>> featureInputsWithType, final String dummyToolInstanceName,
final SimpleInterval interval, final int featureQueryLookahead, final int cloudPrefetchBuffer,
final int cloudIndexPrefetchBuffer, final Path reference) {
Utils.nonNull(featureInputsWithType);
Utils.nonNull(dummyToolInstanceName);
Utils.nonNull(interval);
final FeatureManager featureManager = new FeatureManager(featureInputsWithType, dummyToolInstanceName,
featureQueryLookahead, cloudPrefetchBuffer, cloudIndexPrefetchBuffer, reference);
return new FeatureContext(featureManager, interval);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy