All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nmdp.ngs.tools.ValidateInterpretation Maven / Gradle / Ivy

The newest version!
/*

    ngs-tools  Next generation sequencing (NGS/HTS) command line tools.
    Copyright (c) 2014-2015 National Marrow Donor Program (NMDP)

    This library is free software; you can redistribute it and/or modify it
    under the terms of the GNU Lesser General Public License as published
    by the Free Software Foundation; either version 3 of the License, or (at
    your option) any later version.

    This library is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
    License for more details.

    You should have received a copy of the GNU Lesser General Public License
    along with this library;  if not, write to the Free Software Foundation,
    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA.

    > http://www.gnu.org/licenses/lgpl.html

*/
package org.nmdp.ngs.tools;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import static org.dishevelled.compress.Readers.reader;
import static org.dishevelled.compress.Writers.writer;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;

import java.util.List;

import java.util.concurrent.Callable;

import com.google.common.base.Splitter;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ListMultimap;

import com.google.common.io.CharStreams;
import com.google.common.io.LineProcessor;

import org.dishevelled.commandline.ArgumentList;
import org.dishevelled.commandline.CommandLine;
import org.dishevelled.commandline.CommandLineParseException;
import org.dishevelled.commandline.CommandLineParser;
import org.dishevelled.commandline.Switch;
import org.dishevelled.commandline.Usage;

import org.dishevelled.commandline.argument.FileArgument;
import org.dishevelled.commandline.argument.IntegerArgument;
import org.dishevelled.commandline.argument.StringListArgument;

import org.nmdp.gl.Allele;
import org.nmdp.gl.AlleleList;
import org.nmdp.gl.Genotype;
import org.nmdp.gl.Haplotype;

import org.nmdp.gl.client.GlClient;
import org.nmdp.gl.client.GlClientException;

import org.nmdp.gl.client.local.LocalGlClient;

/**
 * Validate interpretation.
 */
public final class ValidateInterpretation implements Callable {
    private final File observedFile;
    private final File outputFile;
    private final File expectedFile;
    private final int resolution;
    private final List loci;
    private final boolean printSummary;
    private final GlClient glclient;
    static final int DEFAULT_RESOLUTION = 2;
    static final List DEFAULT_LOCI = ImmutableList.of("HLA-A", "HLA-B", "HLA-C", "HLA-DRB1", "HLA-DQB1");
    private static final String USAGE = "ngs-validate-interpretation -e expected.txt -b observed.txt -r 2 -l \"HLA-A,HLA-B\"";


    /**
     * Validate interpretation.
     *
     * @param expectedFile expected file, at least one of expected or observed file must not be null
     * @param observedFile observed file, at least one of expected or observed file must not be null
     * @param outputFile output file, if any
     * @param resolution resolution, must be in the range [1..4]
     * @param loci list of loci to validate, must not be null
     * @param printSummary print summary report
     * @param glclient genotype list client, must not be null
     */
    public ValidateInterpretation(final File expectedFile,
                                  final File observedFile,
                                  final File outputFile,
                                  final int resolution,
                                  final List loci,
                                  final boolean printSummary,
                                  final GlClient glclient) {
        checkNotNull(loci);
        checkNotNull(glclient);
        checkArgument(expectedFile != null || observedFile != null, "at least one of expected or observed file must not be null");
        checkArgument(resolution > 0 && resolution < 5, "resolution must be in the range [1..4]");
        this.observedFile = observedFile;
        this.expectedFile = expectedFile;
        this.outputFile = outputFile;
        this.resolution = resolution;
        this.loci = loci;
        this.printSummary = printSummary;
        this.glclient = glclient;
    }


    @Override
    public Integer call() throws Exception {
        PrintWriter writer = null;

        int passes = 0;
        int failures = 0;
        try {
            writer = writer(outputFile);

            // interpretations keyed by sample
            ListMultimap expected = read(expectedFile);
            ListMultimap observed = read(observedFile);

            // for each sample in observed
            for (String sample : observed.keySet()) {

                // for each interpretation in expected with matching sample
                for (Interpretation e : expected.get(sample)) {
                    Genotype expectedGenotype = asGenotype(e);
                    for (Haplotype expectedHaplotype : expectedGenotype.getHaplotypes()) {
                        for (AlleleList expectedAlleleList : expectedHaplotype.getAlleleLists()) {
                            if (shouldValidate(e, expectedAlleleList)) {

                                // for each pair of expected and observed alleles
                                for (Allele expectedAllele : expectedAlleleList.getAlleles()) {
                                    boolean match = false;
                                    for (Interpretation o : observed.get(sample)) {
                                        AlleleList observedAlleleList = asAlleleList(o);
                                        if (sameLocus(observedAlleleList, expectedAlleleList)) {
                                            for (Allele observedAllele : observedAlleleList.getAlleles()) {
                                                if (matchByField(expectedAllele.getGlstring(), observedAllele.getGlstring()) >= resolution) {
                                                    match = true;
                                                    break;
                                                }
                                            }
                                        }
                                        if (match) {
                                            passes++;
                                        }
                                        else {
                                            failures++;
                                        }
                                    }
                                    if (!printSummary) {
                                        writer.println((match ? "PASS" : "FAIL") + "\t" + sample + "\t" + expectedAllele);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            if (printSummary) {
                writer.println("PASS\t" + passes);
                writer.println("FAIL\t" + failures);
            }

            return 0;
        }
        finally {
            try {
                writer.close();
            }
            catch (Exception e) {
                e.printStackTrace();
                System.exit(1);
            }
        }
    }

    boolean shouldValidate(final Interpretation interpretation, final AlleleList alleleList) {
        checkNotNull(interpretation);
        checkNotNull(alleleList);
        return loci.contains(interpretation.locus()) && interpretation.locus().equals(alleleList.getAlleles().get(0).getLocus().getGlstring());
    }

    /*

      todo: allow for various strategies

      - GL String syntax checking only, use LocalGlClient with strict mode off (necessary if interpretations contain allele codes)
      - re-parse GL Strings under reported version of nomenclature, use LocalGlClient in strict mode with reported IMGT/HLA nomenclature
      - register GL Strings under reported version of nomenclature, use GlClient in strict mode against reported IMGT/HLA nomenclature namespace
      - re-parse GL Strings under latest version of nomenclature, regardless of the reported version, use LocalGlClient in strict mode with latest IMGT/HLA nomenclature
      - register GL Strings under latest version of nomenclature, regardless of the reported version, use GlClient in strict mode against latest IMGT/HLA nomenclature namespace
      - re-parse GL Strings under latest version of nomenclature, using liftover service to generate new GL Strings, then use LocalGlClient in strict mode with latest IMGT/HLA nomenclature
      - register GL Strings under latest version of nomenclature, using liftover service to generate new GL Strings, then use GlClient in strict mode against latest IMGT/HLA nomenclature namespace

     */
    AlleleList asAlleleList(final Interpretation interpretation) throws IOException {
        checkNotNull(interpretation);
        try {
            return glclient.createAlleleList(interpretation.glstring());
        }
        catch (GlClientException e) {
            throw new IOException("could not convert interpretation to allele list, caught " + e.getMessage(), e);
        }
    }

    Genotype asGenotype(final Interpretation interpretation) throws IOException {
        checkNotNull(interpretation);
        try {
            return glclient.createGenotype(interpretation.glstring());
        }
        catch (GlClientException e) {
            throw new IOException("could not convert interpretation to genotype, caught " + e.getMessage(), e);
        }
    }


    static int matchByField(final String allele0, final String allele1) {
        checkNotNull(allele0);
        checkNotNull(allele1);
        List allele0Parts = Splitter.on(":").splitToList(allele0);
        List allele1Parts = Splitter.on(":").splitToList(allele1);
        int smallest = Math.min(allele0Parts.size(), allele1Parts.size());

        for (int i = 0; i < smallest; i++) {
            if (!allele0Parts.get(i).equals(allele1Parts.get(i))) {
                return i;
            }
        }
        return smallest;
    }

    static boolean sameLocus(final AlleleList alleleList0, final AlleleList alleleList1) {
        checkNotNull(alleleList0);
        checkNotNull(alleleList1);
        return (alleleList0.getAlleles().get(0).getLocus().equals(alleleList1.getAlleles().get(0).getLocus()));
    }

    static ListMultimap read(final File file) throws IOException {
        BufferedReader reader = null;
        final ListMultimap interpretations = ArrayListMultimap.create();
        final Interpretation.Builder builder = Interpretation.builder();
        try {
            reader = reader(file);
            CharStreams.readLines(reader, new LineProcessor() {
                    private int count = 0;

                    @Override
                    public boolean processLine(final String line) throws IOException {
                        String[] tokens = line.split("\t");
                        if (tokens.length < 6) {
                            throw new IOException("illegal interpretation format, expected at least 6 columns, found " + tokens.length + "\nline=" + line);
                        }
                        Interpretation interpretation = builder.reset()
                            .withSample(tokens[0])
                            .withLocus(tokens[1])
                            .withGeneFamily(tokens[2])
                            .withAlleleDb(tokens[3])
                            .withAlleleVersion(tokens[4])
                            .withGlstring(tokens[5])
                            .withConsensus(tokens.length > 6 ? tokens[6] : null)
                            .build();

                        interpretations.put(interpretation.sample(), interpretation);
                        count++;
                        return true;
                    }

                    @Override
                    public Void getResult() {
                        return null;
                    }
                });

            return interpretations;
        }
        finally {
            try {
                reader.close();
            }
            catch (Exception e) {
                // ignore
            }
        }
    }

    static final class Interpretation {
        private final String sample;
        private final String locus;
        private final String geneFamily;
        private final String alleleDb;
        private final String alleleVersion;
        private final String glstring;
        private final String consensus;

        private Interpretation(final String sample,
                               final String locus,
                               final String geneFamily,
                               final String alleleDb,
                               final String alleleVersion,
                               final String glstring,
                               final String consensus) {
            this.sample = sample;
            this.locus = locus;
            this.geneFamily = geneFamily;
            this.alleleDb = alleleDb;
            this.alleleVersion = alleleVersion;
            this.glstring = glstring;
            this.consensus = consensus;
        }

        String sample() {
            return sample;
        }

        String locus() {
            return locus;
        }

        String geneFamily() {
            return geneFamily;
        }

        String alleleDb() {
            return alleleDb;
        }

        String alleleVersion() {
            return alleleVersion;
        }

        String glstring() {
            return glstring;
        }

        String consensus() {
            return consensus;
        }

        static Builder builder() {
            return new Builder();
        }

        static final class Builder {
            private String sample;
            private String locus;
            private String geneFamily;
            private String alleleDb;
            private String alleleVersion;
            private String glstring;
            private String consensus;

            private Builder() {
                // empty
            }

            Builder withSample(final String sample) {
                this.sample = sample;
                return this;
            }

            Builder withLocus(final String locus) {
                this.locus = locus;
                return this;
            }

            Builder withGeneFamily(final String geneFamily) {
                this.geneFamily = geneFamily;
                return this;
            }

            Builder withAlleleDb(final String alleleDb) {
                this.alleleDb = alleleDb;
                return this;
            }

            Builder withAlleleVersion(final String alleleVersion) {
                this.alleleVersion = alleleVersion;
                return this;
            }

            Builder withGlstring(final String glstring) {
                this.glstring = glstring;
                return this;
            }

            Builder withConsensus(final String consensus) {
                this.consensus = consensus;
                return this;
            }

            Builder reset() {
                sample = null;
                locus = null;
                geneFamily = null;
                alleleDb = null;
                alleleVersion = null;
                glstring = null;
                consensus = null;
                return this;
            }

            Interpretation build() {
                return new Interpretation(sample, locus, geneFamily, alleleDb, alleleVersion, glstring, consensus);
            }
        }
    }


    /**
     * Main.
     *
     * @param args command line args
     */
    public static void main(final String[] args) {
        Switch about = new Switch("a", "about", "display about message");
        Switch help = new Switch("h", "help", "display help message");

        FileArgument expectedFile = new FileArgument("e", "expected-file", "expected interpretation file, default stdin; at least one of expected or observed file must be provided", false);
        FileArgument observedFile = new FileArgument("b", "observed-file", "observed interpretation file, default stdin; at least one of expected or observed file must be provided", false);
        FileArgument outputFile = new FileArgument("o", "output-file", "output file, default stdout", false);
        IntegerArgument resolution = new IntegerArgument("r", "resolution", "resolution, must be in the range [1..4], default " + DEFAULT_RESOLUTION, false);
        StringListArgument loci = new StringListArgument("l", "loci", "list of loci to validate, default " + DEFAULT_LOCI, false);
        Switch printSummary = new Switch("s", "summary", "print summary");

        ArgumentList arguments = new ArgumentList(about, help, expectedFile, observedFile, outputFile, resolution, loci, printSummary);
        CommandLine commandLine = new CommandLine(args);

        ValidateInterpretation validateInterpretation = null;
        try
        {
            CommandLineParser.parse(commandLine, arguments);
            if (about.wasFound()) {
                About.about(System.out);
                System.exit(0);
            }
            if (help.wasFound()) {
                Usage.usage(USAGE, null, commandLine, arguments, System.out);
                System.exit(0);
            }

            // todo: allow for configuration of glclient
            validateInterpretation = new ValidateInterpretation(expectedFile.getValue(), observedFile.getValue(), outputFile.getValue(), resolution.getValue(DEFAULT_RESOLUTION), loci.getValue(DEFAULT_LOCI), printSummary.wasFound(), LocalGlClient.create());
        }
        catch (CommandLineParseException e) {
            if (about.wasFound()) {
                About.about(System.out);
                System.exit(0);
            }
            if (help.wasFound()) {
                Usage.usage(USAGE, null, commandLine, arguments, System.out);
                System.exit(0);
            }
            Usage.usage(USAGE, e, commandLine, arguments, System.err);
            System.exit(-1);
        }
        try {
            System.exit(validateInterpretation.call());
        }
        catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy