All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broadinstitute.hellbender.tools.walkers.contamination.GatherPileupSummaries Maven / Gradle / Ivy

The newest version!
package org.broadinstitute.hellbender.tools.walkers.contamination;


import htsjdk.samtools.SAMSequenceDictionary;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.utils.reference.ReferenceUtils;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;

import java.io.File;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;

@CommandLineProgramProperties(
        summary="Combine output files from GetPileupSummary in the order defined by a sequence dictionary",
        oneLineSummary = "Combine output files from GetPileupSummary in the order defined by a sequence dictionary",
        programGroup = DiagnosticsAndQCProgramGroup.class
)
public class GatherPileupSummaries extends CommandLineProgram {
    @Argument(fullName = StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, doc = "sequence dictionary file")
    final File sequenceDictionaryFile = null;

    @Argument(fullName = StandardArgumentDefinitions.INPUT_SHORT_NAME, doc = "an output of PileupSummaryTable")
    final List input = null;

    @Argument(fullName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, doc = "output")
    final File output = null;

    SAMSequenceDictionary sequenceDictionary = null;

    @Override
    protected void onStartup(){
        sequenceDictionary = ReferenceUtils.loadFastaDictionary(sequenceDictionaryFile);
    }

    @Override
    protected Object doWork() {
        final List nonEmptyFiles = removeEmptyFiles(input);
        Collections.sort(nonEmptyFiles, new PileupSummaryFileComparator(sequenceDictionary));
        PileupSummary.writeToFile(nonEmptyFiles, output);
        return String.format("Successfully merged %d samples", nonEmptyFiles.size());
    }

    private List removeEmptyFiles(final List list){
        final List nonEmptyList = list.stream().filter(f -> PileupSummary.readFromFile(f).getRight().size() > 0)
                .collect(Collectors.toList());
        if (nonEmptyList.size() < list.size()){
            logger.info(String.format("Removed %d empty samples", list.size() - nonEmptyList.size()));
        }
        return nonEmptyList;
    }

    /**
     * Compare two PilupSummary files under the assumption that
     *   1. PileupSummaries are already sorted within each file
     *   2. Files do not overlap
     */
    private class PileupSummaryFileComparator implements Comparator {
        final SAMSequenceDictionary sequenceDictionary;

        private PileupSummaryFileComparator(final SAMSequenceDictionary sequenceDictionary){
            this.sequenceDictionary = sequenceDictionary;
        }

        @Override
        public int compare(File file1, File file2) {
            final PileupSummary ps1 = PileupSummary.readFromFile(file1).getRight().get(0);
            final PileupSummary ps2 = PileupSummary.readFromFile(file2).getRight().get(0);

            return new PileupSummary.PileupSummaryComparator(sequenceDictionary).compare(ps1, ps2);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy