All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.copynumber.CallCopyRatioSegments Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.copynumber;

import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.io.FilenameUtils;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup;
import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils;
import org.broadinstitute.hellbender.tools.copynumber.caller.SimpleCopyRatioCaller;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledLegacySegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioSegmentCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledCopyRatioSegment;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledLegacySegment;
import org.broadinstitute.hellbender.utils.Utils;

import java.io.File;
import java.util.stream.Collectors;

/**
 * Calls copy-ratio segments as amplified, deleted, or copy-number neutral.
 *
 * 

* This is a relatively naive caller that takes the modeled-segments output of {@link ModelSegments} and * performs a simple statistical test on the segmented log2 copy ratios to call amplifications and deletions, * given a specified range for determining copy-number neutral segments. This caller is * based on the calling functionality of ReCapSeg. *

* *

* If provided {@link ModelSegments} results that incorporate allele-fraction data, i.e. data with presumably improved segmentation, * the statistical test performed by CallCopyRatioSegments ignores the modeled minor-allele fractions when making calls. *

* *

Input

* *
    *
  • * Copy-ratio-segments .cr.seg file from {@link ModelSegments}. * This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, * a row specifying the column headers contained in {@link CopyRatioSegmentCollection.CopyRatioSegmentTableColumn}, * and the corresponding entry rows. *
  • *
* *

Outputs

* *
    *
  • * Called copy-ratio-segments file. * This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, * a row specifying the column headers contained in {@link CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn}, * and the corresponding entry rows. *
  • *
  • * CBS-formatted .igv.seg file (compatible with IGV). * This is a tab-separated values (TSV) file with CBS-format column headers * (see * http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide#CBS) * and the corresponding entry rows that can be plotted using IGV (see * * https://software.broadinstitute.org/software/igv/SEG). * The mean log2 copy ratio is given in the SEGMENT_MEAN column. *
  • *
* *

Usage examples

* *
 *     gatk CallCopyRatioSegments \
 *          -I tumor.cr.seg \
 *          -O tumor.called.seg
 * 
* * @author David Benjamin <[email protected]> * @author Samuel Lee <[email protected]> */ @CommandLineProgramProperties( summary = "Calls copy-ratio segments as amplified, deleted, or copy-number neutral", oneLineSummary = "Calls copy-ratio segments as amplified, deleted, or copy-number neutral", programGroup = CopyNumberProgramGroup.class ) @DocumentedFeature public final class CallCopyRatioSegments extends CommandLineProgram { public static final String NEUTRAL_SEGMENT_COPY_RATIO_LOWER_BOUND_LONG_NAME = "neutral-segment-copy-ratio-lower-bound"; public static final String NEUTRAL_SEGMENT_COPY_RATIO_UPPER_BOUND_LONG_NAME = "neutral-segment-copy-ratio-upper-bound"; public static final String OUTLIER_NEUTRAL_SEGMENT_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "outlier-neutral-segment-copy-ratio-z-score-threshold"; public static final String CALLING_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME = "calling-copy-ratio-z-score-threshold"; public static final String LEGACY_SEGMENTS_FILE_SUFFIX = ".igv.seg"; @Argument( doc = "Input file containing copy-ratio segments (.cr.seg output of ModelSegments).", fullName = StandardArgumentDefinitions.INPUT_LONG_NAME, shortName = StandardArgumentDefinitions.INPUT_SHORT_NAME ) private File inputCopyRatioSegmentsFile; @Argument( doc = "Output file for called copy-ratio segments.", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) private File outputCalledCopyRatioSegmentsFile; @Argument( doc = "Lower bound on non-log2 copy ratio used for determining copy-neutral segments.", fullName = NEUTRAL_SEGMENT_COPY_RATIO_LOWER_BOUND_LONG_NAME, optional = true, minValue = 0. ) private double neutralSegmentCopyRatioLowerBound = 0.9; @Argument( doc = "Upper bound on non-log2 copy ratio used for determining copy-neutral segments.", fullName = NEUTRAL_SEGMENT_COPY_RATIO_UPPER_BOUND_LONG_NAME, optional = true, minValue = 0. ) private double neutralSegmentCopyRatioUpperBound = 1.1; @Argument( doc = "Threshold on z-score of non-log2 copy ratio used for determining outlier copy-neutral segments. " + "If non-log2 copy ratio z-score is above this threshold for a copy-neutral segment, " + "it is considered an outlier and not used in the calculation of the length-weighted mean and standard deviation " + "used for calling.", fullName = OUTLIER_NEUTRAL_SEGMENT_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME, optional = true, minValue = 0. ) private double outlierNeutralSegmentCopyRatioZScoreThreshold = 2.; @Argument( doc = "Threshold on z-score of non-log2 copy ratio used for calling segments.", fullName = CALLING_COPY_RATIO_Z_SCORE_THRESHOLD_LONG_NAME, optional = true, minValue = 0. ) private double callingCopyRatioZScoreThreshold = 2.; private File outputCalledLegacySegmentsFile; @Override protected Object doWork() { validateArguments(); final CopyRatioSegmentCollection copyRatioSegments = new CopyRatioSegmentCollection(inputCopyRatioSegmentsFile); final CalledCopyRatioSegmentCollection calledCopyRatioSegments = new SimpleCopyRatioCaller(copyRatioSegments, neutralSegmentCopyRatioLowerBound, neutralSegmentCopyRatioUpperBound, outlierNeutralSegmentCopyRatioZScoreThreshold, callingCopyRatioZScoreThreshold) .makeCalls(); logger.info(String.format("Writing called segments to %s...", outputCalledCopyRatioSegmentsFile.getAbsolutePath())); calledCopyRatioSegments.write(outputCalledCopyRatioSegmentsFile); // Write an IGV compatible collection final CalledLegacySegmentCollection legacySegmentCollection = createCalledLegacySegmentCollection(calledCopyRatioSegments); logger.info(String.format("Writing called segments in IGV-compatible format to %s...", outputCalledLegacySegmentsFile.getAbsolutePath())); legacySegmentCollection.write(outputCalledLegacySegmentsFile); logger.info(String.format("%s complete.", getClass().getSimpleName())); return null; } private void validateArguments() { Utils.validateArg(neutralSegmentCopyRatioLowerBound < neutralSegmentCopyRatioUpperBound, String.format("Copy-neutral lower bound (%s) must be less than upper bound (%s).", NEUTRAL_SEGMENT_COPY_RATIO_LOWER_BOUND_LONG_NAME, NEUTRAL_SEGMENT_COPY_RATIO_UPPER_BOUND_LONG_NAME)); CopyNumberArgumentValidationUtils.validateInputs(inputCopyRatioSegmentsFile); outputCalledLegacySegmentsFile = createCalledLegacySegmentsFile(outputCalledCopyRatioSegmentsFile); CopyNumberArgumentValidationUtils.validateOutputFiles( outputCalledCopyRatioSegmentsFile, outputCalledLegacySegmentsFile); } @VisibleForTesting static File createCalledLegacySegmentsFile(final File calledCopyRatioSegmentsFile) { return new File(FilenameUtils.removeExtension(calledCopyRatioSegmentsFile.getAbsolutePath()) + LEGACY_SEGMENTS_FILE_SUFFIX); } private static CalledLegacySegmentCollection createCalledLegacySegmentCollection(final CalledCopyRatioSegmentCollection segments) { return new CalledLegacySegmentCollection(segments.getMetadata(), segments.getRecords().stream() .map(r -> convert(r, segments.getMetadata().getSampleName())).collect(Collectors.toList())); } private static CalledLegacySegment convert(final CalledCopyRatioSegment segment, final String sampleName) { return new CalledLegacySegment(sampleName, segment.getInterval(), segment.getNumPoints(), segment.getMeanLog2CopyRatio(), segment.getCall()); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy