
org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKAnnotationPluginDescriptor Maven / Gradle / Ivy
package org.broadinstitute.hellbender.cmdline.GATKPlugin;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.barclay.argparser.CommandLinePluginDescriptor;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation;
import org.broadinstitute.hellbender.tools.walkers.annotator.PedigreeAnnotation;
import org.broadinstitute.hellbender.utils.SerializableConsumer;
import org.broadinstitute.hellbender.utils.SerializableFunction;
import org.broadinstitute.hellbender.utils.SerializablePredicate;
import org.broadinstitute.hellbender.tools.walkers.annotator.flow.FlowAnnotatorBase;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.config.ConfigFactory;
import org.broadinstitute.hellbender.utils.config.GATKConfig;
import java.io.Serializable;
import java.lang.reflect.Modifier;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* A plugin descriptor for managing the dynamic discovery of both {@link org.broadinstitute.hellbender.tools.walkers.annotator.InfoFieldAnnotation} and {@link org.broadinstitute.hellbender.tools.walkers.annotator.GenotypeAnnotation} objects
* within the packages defined by the method getPackageNames() (default {@link org.broadinstitute.hellbender.tools.walkers.annotator}).
* Also handles integrating annotation specific arguments from the command line with tool specified defaults.
*
* Unlike {@link GATKReadFilterPluginDescriptor} annotation order is not important and thus argument order is not guaranteed to be
* preserved in all cases, especially when group annotations are involved.
*
* An alternative method for discovering annotations is ClassUtils.knownSubInterfaceSimpleNames(Annotation.class), which can
* be invoked in absence of command line inputs.
*
* NOTE: this class enforces that annotations with required arguments must see their arguments, yet this is not currently tested
* as no such annotations exist in the GATK.
*/
public class GATKAnnotationPluginDescriptor extends CommandLinePluginDescriptor implements Serializable {
private static final long serialVersionUID = 1L;
/**
* At startup, set the plugin package name to the one(s) in the configuration file.
*/
private static final List PLUGIN_PACKAGE_NAMES;
static {
// Get our configuration:
final GATKConfig config = ConfigFactory.getInstance().getGATKConfig();
// Exclude abstract classes and interfaces from the list of discovered codec classes
PLUGIN_PACKAGE_NAMES = Collections.unmodifiableList(config.annotation_packages());
}
private static final Class> PLUGIN_BASE_CLASS = org.broadinstitute.hellbender.tools.walkers.annotator.Annotation.class;
protected transient Logger logger = LogManager.getLogger(this.getClass());
@ArgumentCollection
private final GATKAnnotationArgumentCollection userArgs;
// Map of Annotation (simple) class names to the corresponding discovered plugin instance
private final Map allDiscoveredAnnotations = new HashMap<>();
// Map of Annotation (simple) class names to the corresponding default plugin instance.
// We keep the actual instances in case they have any additional state provided by the tool
// when they were created
private final Map toolDefaultAnnotations = new HashMap<>();
private final Set toolDefaultGroups = new HashSet<>();
// Set of predecessor annotations for which we've seen arguments exist either as a tool default or be supplied by the user
// (eg. InbreedingCoeff if we see "--founderID" on the command line)
private final Set requiredPredecessors = new HashSet<>();
// Map of annotation group name to list of annotations flagged with that group. The key here the simple name of the interface
// that describes the annotation group.
private final Map> discoveredGroups = new HashMap<>();
// Resolved instances calculated from the user input and tool defaults together
private List resolvedInstances;
// Annotation arguments that are shared and thus must be part of the plugin descriptor
@Argument(fullName = "founder-id", shortName = "founder-id", doc="Samples representing the population \"founders\"", optional=true)
private List founderIds;
@Argument(fullName = StandardArgumentDefinitions.PEDIGREE_FILE_LONG_NAME, shortName = StandardArgumentDefinitions.PEDIGREE_FILE_SHORT_NAME, doc="Pedigree file for determining the population \"founders\"", optional=true)
private GATKPath pedigreeFile;
@Argument(fullName = StandardArgumentDefinitions.FLOW_ORDER_FOR_ANNOTATIONS, doc = "flow order used for this annotations. [readGroup:]flowOrder", optional = true)
private List flowOrder;
/**
* @return the class object for the base class of all plugins managed by this descriptor
*/
@Override
public Class> getPluginBaseClass() {
return PLUGIN_BASE_CLASS;
}
/**
* A list of package names which will be searched for plugins managed by the descriptor.
*
* @return Strings of package names over which the plugin descriptor will search for Annotation Classes
*/
@Override
public List getPackageNames() {
return PLUGIN_PACKAGE_NAMES;
}
/**
* Constructor that allows client tools to specify what annotations (optionally with parameters specified) to use as their defaults
* before discovery of user specified annotations.
*
* @param userArgs Argument collection to control the exposure of the command line arguments.
* @param toolDefaultAnnotations Default annotations that may be supplied with arguments
* on the command line. May be null.
* @param toolDefaultGroups List of tool specified default annotation group names. Annotations specified this way
* will be instantiated with default arguments. may be null.
*/
public GATKAnnotationPluginDescriptor(final GATKAnnotationArgumentCollection userArgs, final List toolDefaultAnnotations, final List> toolDefaultGroups) {
this.userArgs = userArgs;
if (null != toolDefaultAnnotations) {
toolDefaultAnnotations.forEach((SerializableConsumer) (f -> {
final Class extends Annotation> annotClass = f.getClass();
// anonymous classes have a 0-length simple name, and thus cannot be accessed or
// controlled by the user via the command line, but they should still be valid
// as default annotations, so use the full name to ensure that their map entries
// don't clobber each other
String className = annotClass.getSimpleName();
if (className.length() == 0) {
className = annotClass.getName();
}
populateAnnotationGroups(className, f);
this.toolDefaultAnnotations.put(className, f);
}));
}
if (null != toolDefaultGroups) {
toolDefaultGroups.forEach((SerializableConsumer>) (a -> {
if (a.isInterface() && a!=Annotation.class) {
this.toolDefaultGroups.add(a.getSimpleName());
} else {
throw new GATKException(String.format("Tool specified annotation group %s is not a valid annotation group, must be an interface extending Annotation", a.getSimpleName()));
}
}));
}
}
/**
* Constructor that allows client tools to specify what annotations (optionally with parameters specified) to use as their defaults
* before discovery of user specified annotations. Defaults to using an empty GATKAnnotationArgumentCollection object.
*
* @param toolDefaultAnnotations Default annotations that may be supplied with arguments
* on the command line. May be null.
* @param toolDefaultGroups List of tool specified default annotation group names. Annotations specified this way
* will be instantiated with default arguments. may be null.
*/
public GATKAnnotationPluginDescriptor(final List toolDefaultAnnotations, final List> toolDefaultGroups) {
this(new DefaultGATKVariantAnnotationArgumentCollection(), toolDefaultAnnotations, toolDefaultGroups);
}
@Override
public boolean includePluginClass(Class> c) {
return !c.getName().equals(this.getPluginBaseClass().getName()) &&
!Modifier.isAbstract(c.getModifiers()) &&
!c.getName().contains("UnitTest$");
}
/**
* Return a display name to identify this plugin to the user
*
* @return A short user-friendly name for this plugin.
*/
@Override
public String getDisplayName()
{
// The value returned by this method is placed into the freemarker property map by the docgen system,
// and must be a valid variable name in the freemarker template language (it cannot be a kebabified
// string with an embedded "-").
return StandardArgumentDefinitions.ANNOTATION_LONG_NAME;
}
/**
* Return an instance of the specified pluggable class. The descriptor should
* instantiate or otherwise obtain (possibly by having been provided an instance
* through the descriptor's constructor) an instance of this plugin class.
* The descriptor should maintain a list of these instances so they can later
* be retrieved by {@link #getResolvedInstances()} ()}.
*
* In addition, this method should recognize and reject any attempt to instantiate
* a second instance of any specific Annotation since the user has no way to disambiguate
* these on the command line).
*
* @param pluggableClass a plugin class discovered by the command line parser that
* was not rejected by {@link #includePluginClass(Class)} ()}
* @return the instantiated object that will be used by the command line parser
* as an argument source
* @throws IllegalAccessException if thrown when calling the {@code pluginClass} constructor
* @throws InstantiationException if thrown when calling the {@code pluginClass} constructor
*/
@Override
@SuppressWarnings("deprecation")
public Annotation createInstanceForPlugin(Class> pluggableClass) throws IllegalAccessException, InstantiationException {
Annotation annot = null;
final String simpleName = pluggableClass.getSimpleName();
if (allDiscoveredAnnotations.containsKey(simpleName)) {
// we found a plugin class with a name that collides with an existing class;
// plugin names must be unique even across packages
throw new IllegalArgumentException(
String.format("A plugin class name collision was detected (%s/%s). " +
"Simple names of plugin classes must be unique across packages.",
pluggableClass.getName(),
allDiscoveredAnnotations.get(simpleName).getClass().getName())
);
} else if (toolDefaultAnnotations.containsKey(simpleName)) {
// an instance of this class was provided by the tool as one of it's default annotations;
// use the default instance as the target for command line argument values
// rather than creating a new one, in case it has state provided by the tool
annot = toolDefaultAnnotations.get(simpleName);
} else {
annot = (Annotation) pluggableClass.newInstance();
}
// Add all annotations to the allDiscoveredAnnotations list, even if the instance came from the
// tool defaults list (we want the actual instances to be shared to preserve state)
allDiscoveredAnnotations.put(simpleName, annot);
populateAnnotationGroups(simpleName, annot);
return annot;
}
// Dynamic discovery of annotation groups
// We must discover annotation groups and store them for each instance so we can resolve group membership
// for command line including of groups based on their simple name.
private void populateAnnotationGroups(final String simpleName, final Annotation annot) {
Queue> interfaces = new LinkedList<>();
Collections.addAll(interfaces, annot.getClass().getInterfaces());
while (!interfaces.isEmpty()) {
Class> inter = interfaces.poll();
// Following with how groups are currently defined and discovered, namely they are interfaces that
// extend Annotation, groups are discovered by interrogating annotations for their interfaces and
// associating the discovered annotations with their defined groups.
// If a duplicate annotation is added, the group will opt to keep the old instantiation around
if ((inter != PLUGIN_BASE_CLASS) && (PLUGIN_BASE_CLASS.isAssignableFrom(inter))) {
Map groupIdentity = (discoveredGroups.containsKey(inter.getSimpleName()) ? discoveredGroups.get(inter.getSimpleName()) : new HashMap<>());
groupIdentity.putIfAbsent(simpleName, annot);
discoveredGroups.put(inter.getSimpleName(), groupIdentity);
Collections.addAll(interfaces, inter.getInterfaces());
}
}
}
/**
* Return the allowed values for annotationNames/disableAnnotations/annotationGroups for use by the help system.
*
* @param longArgName long name of the argument for which help is requested
* @return the set of allowed values for the argument, or null if the argument is not controlled by this descriptor
*/
@Override
public Set getAllowedValuesForDescriptorHelp(String longArgName) {
if (longArgName.equals(StandardArgumentDefinitions.ANNOTATION_LONG_NAME)) {
return allDiscoveredAnnotations.keySet();
}
if (longArgName.equals(StandardArgumentDefinitions.ANNOTATIONS_TO_EXCLUDE_LONG_NAME)) {
Set annotations = toolDefaultGroups.stream().map(
k -> discoveredGroups.get(k).keySet())
.flatMap(Collection::stream).collect(Collectors.toSet());
annotations.addAll(toolDefaultAnnotations.keySet());
return annotations;
}
if (longArgName.equals(StandardArgumentDefinitions.ANNOTATION_GROUP_LONG_NAME)) {
return discoveredGroups.keySet();
}
return null;
}
@Override
public boolean isDependentArgumentAllowed(final Class> predecessorClass) {
// Make sure the predecessor for a dependent argument was either specified on the command line or
// is a tool default, otherwise reject it.
// NOTE: This method is called by the CLP during parsing at the time the depended argument is seen
// on the command line. Even if this check passes at the time this method is called, its possible
// for the user to subsequently disable the required predecessor. That case is caught during final
// validation done by the validateArguments method.
String predecessorName = predecessorClass.getSimpleName();
boolean isAllowed = (userArgs.getUserEnabledAnnotationNames().contains(predecessorName))
|| (toolDefaultAnnotations.get(predecessorName) != null);
if (!isAllowed) {
// Check whether any of the annotations have been added via groups (either tool default or user enabled)
isAllowed = Stream.of(userArgs.getUserEnabledAnnotationGroups(), toolDefaultGroups)
.flatMap((SerializableFunction, Stream>) t -> t.stream())
.anyMatch((SerializablePredicate) (group ->
discoveredGroups.containsKey(group) &&
discoveredGroups.get(group).keySet().stream()
.anyMatch((SerializablePredicate) (s -> s.equals(predecessorName)))));
}
if (isAllowed) {
// Keep track of the ones we allow so we can validate later that they weren't subsequently disabled
requiredPredecessors.add(predecessorName);
}
return isAllowed;
}
/**
* Validate the list of arguments and reduce the list of annotations to those
* actually seen on the command line. This is called by the command line parser
* after all arguments have been parsed. Tries to catch most cases where the user
* provides potentially confusing input.
*/
@Override
public void validateAndResolvePlugins() throws CommandLineException {
// throw if an annotation group is *enabled* more than once by the user
final Set duplicateUserEnabledAnnotationNames = Utils.getDuplicatedItems(userArgs.getUserEnabledAnnotationNames());
if (!duplicateUserEnabledAnnotationNames.isEmpty()) {
throw new CommandLineException.BadArgumentValue(
String.format("The annotation(s) are enabled more than once: %s",
Utils.join(", ", duplicateUserEnabledAnnotationNames)));
}
// throw if an annotation is *disabled* more than once by the user
final Set duplicateDisabledUserAnnotationNames = Utils.getDuplicatedItems(userArgs.getUserDisabledAnnotationNames());
if (!duplicateDisabledUserAnnotationNames.isEmpty()) {
throw new CommandLineException.BadArgumentValue(
String.format("The annotation(s) are disabled more than once: %s",
Utils.join(", ", duplicateDisabledUserAnnotationNames)));
}
// throw if an annotation is both enabled *and* disabled by the user
final Set enabledAndDisabled = new HashSet<>(userArgs.getUserEnabledAnnotationNames());
enabledAndDisabled.retainAll(userArgs.getUserDisabledAnnotationNames());
if (!enabledAndDisabled.isEmpty()) {
final String badAnnotationList = Utils.join(", ", enabledAndDisabled);
throw new CommandLineException(
String.format("The annotation(s): %s are both enabled and disabled", badAnnotationList));
}
// throw if a disabled annotation doesn't exist; warn if it wasn't enabled by the tool in the first place
userArgs.getUserDisabledAnnotationNames().forEach((SerializableConsumer) (s -> {
if (!allDiscoveredAnnotations.containsKey(s)) {
throw new CommandLineException.BadArgumentValue(String.format("Disabled annotation (%s) does not exist", s));
} else if (!toolDefaultAnnotations.containsKey(s)) {
logger.warn(String.format("Disabled annotation (%s) is not enabled by this tool", s));
}
}));
// warn if an annotation is both default and enabled by the user
final Set redundantAnnots = new HashSet<>(toolDefaultAnnotations.keySet());
redundantAnnots.retainAll(userArgs.getUserEnabledAnnotationNames());
redundantAnnots.forEach((SerializableConsumer)
(s -> {
logger.warn(String.format("Redundant enabled annotation (%s) is enabled for this tool by default", s));
}));
// warn if an annotation group is both default and enabled by the user
final Set redundantGroups = new HashSet<>(toolDefaultGroups);
redundantGroups.retainAll(userArgs.getUserEnabledAnnotationGroups());
redundantGroups.forEach((SerializableConsumer)
(s -> {
logger.warn(String.format("Redundant enabled annotation group (%s) is enabled for this tool by default", s));
}));
// Throw if args were specified for an annotation that was also disabled, or that was not enabled by the
// tool by default.
//
// Note that this is also checked during command line argument parsing, but needs to be checked again
// here. Whenever the command line parser sees a dependent argument on the command line, it delegates
// back to the descriptor's isDependentArgumentAllowed method to allow it to validate that the predecessor
// for that dependent argument has been supplied, either by a default annotation, or by an explicitly
// enabled annotation. However, its possible for the user to subsequently try to disable that
// predecessor, which is what we want to catch here.
//
userArgs.getUserDisabledAnnotationNames().forEach(s -> {
if (requiredPredecessors.contains(s)) {
String message = String.format("Values were supplied for (%s) that is also disabled", s);
if (toolDefaultAnnotations.containsKey(s)) {
// NOTE: https://github.com/broadinstitute/barclay/issues/23
// This is a special case to work around the issue where we can't really tell if the
// predecessor was added as a result of a user-provided value, or a default value. The
// CLP doesn't distinguish, so we only warn here for now.
logger.warn(message);
} else {
throw new CommandLineException(message);
}
}
});
// throw if an annotation name was specified that has no corresponding instance
userArgs.getUserEnabledAnnotationNames().forEach((SerializableConsumer) (s -> {
Annotation ta = allDiscoveredAnnotations.get(s);
if (null == ta) {
if (!toolDefaultAnnotations.containsKey(s)) {
throw new CommandLineException("Unrecognized annotation name: " + s);
}
}
}));
// throw if an annotation group was specified that has no corresponding instance
userArgs.getUserEnabledAnnotationGroups().forEach((SerializableConsumer) (s -> {
if (!discoveredGroups.containsKey(s)) {
throw new CommandLineException("Unrecognized annotation group name: " + s);
}
}));
// Populating the tool default annotations with the ones requested by groups
for (String group : toolDefaultGroups ) {
for (Annotation annot : discoveredGroups.get(group).values()) {
toolDefaultAnnotations.put(annot.getClass().getSimpleName(), annot);
}
}
// Populating any discovered pedigree annotations with the pedigree arguments from the command line.
if (((founderIds!=null && !founderIds.isEmpty()) || (pedigreeFile!=null)) && getResolvedInstances().stream()
.filter(PedigreeAnnotation.class::isInstance)
.map(a -> (PedigreeAnnotation) a)
.peek(a -> {
if (!founderIds.isEmpty()) a.setFounderIds(founderIds);
if (pedigreeFile != null) a.setPedigreeFile(pedigreeFile);
a.validateArguments();
})
.count() == 0) {
// Throwing an exception if no pedigree annotations were found
throw new CommandLineException(
String.format(
"Pedigree argument \"%s\" or \"%s\" was specified without a pedigree annotation being requested, (eg: %s))",
StandardArgumentDefinitions.PEDIGREE_FILE_LONG_NAME,
"founder-id",
allDiscoveredAnnotations.values().stream().filter(PedigreeAnnotation.class::isInstance).map(a -> a.getClass().getSimpleName()).collect(Collectors.joining(", "))));
}
//TODO: fix these lambdas to have serializable types
// Populating any discovered flow annotations with the flowOrder arguments from the command line.
if (flowOrder!=null && !flowOrder.isEmpty() && getResolvedInstances().stream()
.filter(FlowAnnotatorBase.class::isInstance)
.map(a -> (FlowAnnotatorBase) a)
.peek(a -> {
a.setFlowOrder(flowOrder);
})
.count() == 0) {
// Throwing an exception if no flow based annotations were found
throw new CommandLineException(
String.format(
"Flow argument \"%s\" was specified without a flow based annotation being requested, (eg: %s))",
StandardArgumentDefinitions.FLOW_ORDER_FOR_ANNOTATIONS,
allDiscoveredAnnotations.values().stream().filter(FlowAnnotatorBase.class::isInstance).map(a -> a.getClass().getSimpleName()).collect(Collectors.joining(", "))));
}
}
/**
* Get the list of default plugins used for this instance of this descriptor. Used for help/doc generation.
*
* NOTE: this method does not account for disabled default annotation and just return ALL default instances.
* The refactored interface in Barclay changes it's contract to allows returning a list with only 'enabled' default
* instances. We'll change the implementation when we integrate the updated interface.
*
* @return A list of Annotation objects that were enabled by the tool by default either by toolDefaultGroups or toolDefaultAnnotations
*/
@Override
public List getDefaultInstances() {
return new ArrayList<>(toolDefaultAnnotations.values());
}
/**
* Merge the default annotations with the users's command line annotation requests, then initialize
* the resulting annotations. Specifically, unless the user disables all tool default annotations it will
* first add all the tool enabled annotations which were not individually blocked by the user and then
* adds in annotations defined by the users specified groups, then individual annotations.
*
* NOTE: calling this method before argument parsing (and thus before {@link #validateAndResolvePlugins}
* has been called) may return a different list than calling it after parsing, because annotations associated
* with pedigree files will not have had their arguments input.
*
* @return An unordered Collection of annotations.
*/
@Override
public List getResolvedInstances() {
if (resolvedInstances == null) {
final SortedSet annotations = new TreeSet<>(Comparator.comparing(
(SerializableFunction) t -> t.getClass().getSimpleName()));
if (!userArgs.getDisableToolDefaultAnnotations()) {
annotations.addAll(toolDefaultAnnotations.values());
}
for (String group : userArgs.getUserEnabledAnnotationGroups()) {
annotations.addAll(discoveredGroups.get(group).values());
}
if (userArgs.getEnableAllAnnotations()) {
annotations.addAll(allDiscoveredAnnotations.values());
} else {
for (String annotation : userArgs.getUserEnabledAnnotationNames()) {
annotations.add(allDiscoveredAnnotations.get(annotation));
}
}
resolvedInstances = annotations.stream().filter(t -> !userArgs.getUserDisabledAnnotationNames().contains(t.getClass().getSimpleName())).collect(Collectors.toList());
}
return resolvedInstances;
}
/**
* Returns a map of the String to Annotations only in the resolved instances.
*
* @return a Map of Strings to Annotations of resolved instances
*/
public Map getResolvedInstancesMap() {
return allDiscoveredAnnotations.entrySet().stream()
.filter(e -> getResolvedInstances().contains(e.getValue()))
.collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()));
}
/**
* Return the class representing the instance of the plugin specified by {@code pluginName}
*
* @param pluginName Name of the plugin requested
* @return Class object for the plugin instance requested
*/
@Override
public Class> getClassForPluginHelp(final String pluginName) {
return allDiscoveredAnnotations.containsKey(pluginName) ? allDiscoveredAnnotations.get(pluginName).getClass() : null;
}
}