All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.campagnelab.dl.somatic.tools.CombineWithGoldStandard Maven / Gradle / Ivy

The newest version!
package org.campagnelab.dl.somatic.tools;

import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap;
import it.unimi.dsi.fastutil.objects.Object2ObjectArrayMap;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.XorShift1024StarRandom;
import org.apache.commons.compress.utils.IOUtils;
import org.campagnelab.dl.framework.tools.arguments.AbstractTool;
import org.campagnelab.dl.somatic.storage.RecordReader;
import org.campagnelab.dl.somatic.storage.RecordWriter;
import org.campagnelab.dl.varanalysis.protobuf.BaseInformationRecords;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Random;

/**
 * Combine a raw SBI with gold standard annotations to set isMutated flag. TSV format is chromosome\tposition\t[toBases]
 * where toBases is optional and contain the mutated bases (SNP or indel genotype of the mutation).
 * Created by fac2003 on 11/22/16.
 */
public class CombineWithGoldStandard extends AbstractTool {
    @Override
    public CombineWithGoldStandardArguments createArguments() {
        return new CombineWithGoldStandardArguments();
    }

    static private Logger LOG = LoggerFactory.getLogger(CombineWithGoldStandard.class);

    public static void main(String[] args) {
        CombineWithGoldStandard tool = new CombineWithGoldStandard();
        tool.parseArguments(args, "CombineWithGoldStandard", tool.createArguments());
        tool.execute();
    }

    @Override
    public void execute() {
        loadAnnotations(args().annotationFilename);
        RecordWriter outputWriters = null;
        try {
            outputWriters = new RecordWriter(args().outputFilename);
        } catch (IOException e) {
            throw new RuntimeException("Unable to create output file+" + args().outputFilename, e);
        }
        Random rand = new XorShift1024StarRandom();
        try (RecordReader reader = new RecordReader(args().sbiFilename)) {


//set up logger
            ProgressLogger pgRead = new ProgressLogger(LOG);
            pgRead.itemsName = "records";
            pgRead.expectedUpdates = Math.min(reader.getTotalRecords(), reader.getTotalRecords());
            pgRead.displayFreeMemory = true;
            pgRead.start();
            long numWritten = 0;
            long numMutatedWritten = 0;
            for (BaseInformationRecords.BaseInformation record : reader) {
                record = annotate(record);
                double choice = rand.nextDouble();
                if (choice < args().samplingFraction || record.getMutated()) {
                    outputWriters.writeRecord(record);
                    numWritten += 1;
                    if (record.getMutated()) {
                        numMutatedWritten++;
                       /* System.out.printf("%s:%d\n\tnormal: %s\ttumor:  %s %n", record.getReferenceId(), record.getPosition(),
                                record.getSamples(0).getFormattedCounts(),
                                record.getSamples(1).getFormattedCounts());*/
                    }
                }
                pgRead.lightUpdate();
            }
            pgRead.stop();
            System.out.printf("Wrote %d records (of which %d are mutated).", numWritten, numMutatedWritten);
        } catch (IOException e) {
            throw new RuntimeException("Unable to load or write files. Check command line arguments.",e);
        } finally {
            IOUtils.closeQuietly(outputWriters);
        }
    }

    class Mutation {
        String from;
        String to;
        int position;
    }

    Object2ObjectArrayMap> annotations = new Object2ObjectArrayMap<>();

    class Annotation {
        int position;
        String toBase;
    }

    private void loadAnnotations(String annotationFilename) {
        try {
            LineIterator lines = new LineIterator(new FastBufferedReader(new FileReader(annotationFilename)));
            for (MutableString line : lines.allLines()) {
               if (line.startsWith("#")) continue;
                String tokens[] = line.toString().split("\t");
                final String chromosome = tokens[0];
                int position = Integer.parseInt(tokens[1]);
                // convert to zero-based position used by goby/variationanalysis:
                position -= 1;
                Int2ObjectAVLTreeMap perPositionMap = annotations.getOrDefault(chromosome, new Int2ObjectAVLTreeMap<>());                Annotation a = new Annotation();
                a.position = position;
                a.toBase = tokens.length >= 3 ? tokens[2] : null;
                perPositionMap.put(position, a);
                annotations.put(chromosome, perPositionMap);
            }
        } catch (FileNotFoundException e) {
            throw new RuntimeException("Unable to find annotation filename:" + annotationFilename, e);

        }
    }

    private BaseInformationRecords.BaseInformation annotate(BaseInformationRecords.BaseInformation record) {
        if (!isAnnotated(record.getReferenceId(), record.getPosition())) {
            return record;
        }
        final BaseInformationRecords.BaseInformation.Builder builder = record.toBuilder();
        Annotation a=getAnnotation(record.getReferenceId(), record.getPosition());
        if (a.toBase!=null) {
            builder.setMutatedBase(a.toBase);
        }
        return builder.setMutated(true).build();
    }

    private boolean isAnnotated(String referenceId, int position) {
        return annotations.getOrDefault(referenceId, new Int2ObjectAVLTreeMap()).getOrDefault(position, null)!=null;
    }
    private Annotation getAnnotation(String referenceId, int position) {
        return annotations.getOrDefault(referenceId, new Int2ObjectAVLTreeMap()).get(position);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy