picard.analysis.directed.CollectTargetedMetrics Maven / Gradle / Ivy
package picard.analysis.directed;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import picard.analysis.MetricAccumulationLevel;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.metrics.MultilevelMetrics;
import java.io.File;
import java.util.List;
import java.util.Set;
import static picard.cmdline.StandardOptionDefinitions.MINIMUM_MAPPING_QUALITY_SHORT_NAME;
/**
* Both CollectTargetedPCRMetrics and CollectHsSelection share virtually identical program structures except
* for the name of their targeting mechanisms (e.g. bait set or amplicon set). The shared behavior of these programs
* is encapsulated in CollectTargetedMetrics which is then subclassed by CollectHsMetrics and CollectTargetedPcrMetrics.
*
* This program verifies the input parameters to TargetMetricsCollector and converts all files to
* the format desired by TargetMetricsCollector. Then it instantiates a TargetMetricsCollector and
* collects metric information for all reads in the INPUT sam file.
*/
public abstract class CollectTargetedMetrics> extends CommandLineProgram {
private final Log log = Log.getInstance(getClass());
protected abstract IntervalList getProbeIntervals();
protected abstract String getProbeSetName();
/**
* A factory method for the TargetMetricsCollector to use this time. Examples of TargetMetricsCollector:
* (TargetedPcrMetricsCollector, HsMetricsCalculator)
*
* @return A TargetMetricsCollector to which we will pass SAMRecords
*/
protected abstract COLLECTOR makeCollector(final Set accumulationLevels,
final List samRgRecords,
final ReferenceSequenceFile refFile,
final File perTargetCoverage,
final File perBaseCoverage,
final IntervalList targetIntervals,
final IntervalList probeIntervals,
final String probeSetName,
final int nearProbeDistance);
@Option(shortName = "TI", doc = "An interval list file that contains the locations of the targets.", minElements=1)
public List TARGET_INTERVALS;
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "An aligned SAM or BAM file.")
public File INPUT;
@Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output file to write the metrics to.")
public File OUTPUT;
@Option(shortName = "LEVEL", doc = "The level(s) at which to accumulate metrics.")
public Set METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);
@Option(optional = true, doc = "An optional file to output per target coverage information to.")
public File PER_TARGET_COVERAGE;
@Option(optional = true, doc = "An optional file to output per base coverage information to. The per-base file contains " +
"one line per target base and can grow very large. It is not recommended for use with large target sets.")
public File PER_BASE_COVERAGE;
@Option(optional = true, doc= "The maximum distance between a read and the nearest probe/bait/amplicon for the read to be " +
"considered 'near probe' and included in percent selected.")
public int NEAR_DISTANCE = TargetedPcrMetricsCollector.NEAR_PROBE_DISTANCE_DEFAULT;
@Option(shortName = MINIMUM_MAPPING_QUALITY_SHORT_NAME, doc = "Minimum mapping quality for a read to contribute coverage.", overridable = true)
public int MINIMUM_MAPPING_QUALITY = 1;
@Option(shortName = "Q", doc = "Minimum base quality for a base to contribute coverage.", overridable = true)
public int MINIMUM_BASE_QUALITY = 0;
@Option(doc = "True if we are to clip overlapping reads, false otherwise.", optional=true, overridable = true)
public boolean CLIP_OVERLAPPING_READS = false;
@Option(shortName = "covMax", doc = "Parameter to set a max coverage limit for Theoretical Sensitivity calculations. Default is 200.", optional = true)
public int COVERAGE_CAP = 200;
@Option(doc="Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000.", optional = true)
public int SAMPLE_SIZE=10000;
/**
* Asserts that files are readable and writable and then fires off an
* HsMetricsCalculator instance to do the real work.
*/
protected int doWork() {
for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (PER_TARGET_COVERAGE != null) IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);
// Validate that the targets and baits have the same references as the reads file
SequenceUtil.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
targetIntervals.getHeader().getSequenceDictionary());
SequenceUtil.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(),
getProbeIntervals().getHeader().getSequenceDictionary()
);
ReferenceSequenceFile ref = null;
if (REFERENCE_SEQUENCE != null) {
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
SequenceUtil.assertSequenceDictionariesEqual(
reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(),
INPUT, REFERENCE_SEQUENCE
);
}
final COLLECTOR collector = makeCollector(
METRIC_ACCUMULATION_LEVEL,
reader.getFileHeader().getReadGroups(),
ref,
PER_TARGET_COVERAGE,
PER_BASE_COVERAGE,
targetIntervals,
getProbeIntervals(),
getProbeSetName(),
NEAR_DISTANCE
);
final ProgressLogger progress = new ProgressLogger(log);
for (final SAMRecord record : reader) {
collector.acceptRecord(record, null);
progress.record(record);
}
// Write the output file
final MetricsFile metrics = getMetricsFile();
collector.finish();
collector.addAllLevelsToFile(metrics);
metrics.write(OUTPUT);
CloserUtil.close(reader);
return 0;
}
/** Renders a probe name from the provided file, returning {@link java.io.File#getName()} with all extensions stripped. */
static String renderProbeNameFromFile(final File probeIntervalFile) {
final String name = probeIntervalFile.getName();
final int firstPeriodIndex = name.indexOf('.');
if (firstPeriodIndex == -1) {
return name;
} else {
return name.substring(0, firstPeriodIndex);
}
}
protected String[] customCommandLineValidation() {
if (PER_TARGET_COVERAGE != null && (METRIC_ACCUMULATION_LEVEL.size() != 1 ||
METRIC_ACCUMULATION_LEVEL.iterator().next() != MetricAccumulationLevel.ALL_READS)) {
return new String[]{"PER_TARGET_COVERAGE can be specified only when METRIC_ACCUMULATION_LEVEL is set " +
"to ALL_READS."};
}
if (PER_TARGET_COVERAGE != null && REFERENCE_SEQUENCE == null) {
return new String[]{"Must supply REFERENCE_SEQUENCE when supplying PER_TARGET_COVERAGE"};
}
return super.customCommandLineValidation();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy