All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eqtlmappingpipeline.binaryInteraction.QueryBinaryInteraction Maven / Gradle / Ivy

package eqtlmappingpipeline.binaryInteraction;

import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;
import eqtlmappingpipeline.Main;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression;
import umcg.genetica.containers.Pair;
import umcg.genetica.io.binInteraction.BinaryInteractionCohort;
import umcg.genetica.io.binInteraction.BinaryInteractionFile;
import umcg.genetica.io.binInteraction.BinaryInteractionFileException;
import umcg.genetica.io.binInteraction.BinaryInteractionQtlZscores;
import umcg.genetica.io.binInteraction.BinaryInteractionQueryResult;
import umcg.genetica.io.binInteraction.BinaryInteractionZscores;
import umcg.genetica.io.binInteraction.gene.BinaryInteractionGene;
import umcg.genetica.io.binInteraction.variant.BinaryInteractionVariant;

/**
 *
 * @author Patrick Deelen
 */
public class QueryBinaryInteraction {

	private static final String VERSION = Main.VERSION;
	private static final DateFormat DATE_TIME_FORMAT = new SimpleDateFormat("dd-MM-yyyy HH:mm:ss");
	private static final Date currentDataTime = new Date();
	private static final Options OPTIONS;

	static {

		OPTIONS = new Options();

		OptionBuilder.withArgName("path");
		OptionBuilder.hasArg();
		OptionBuilder.withDescription("Binary interaction file");
		OptionBuilder.withLongOpt("input");
		OptionBuilder.isRequired();
		OPTIONS.addOption(OptionBuilder.create("i"));

		OptionBuilder.withArgName("path");
		OptionBuilder.hasArg();
		OptionBuilder.withDescription("Output file (optional)");
		OptionBuilder.withLongOpt("output");
		OPTIONS.addOption(OptionBuilder.create("o"));

		OptionBuilder.withArgName("string");
		OptionBuilder.hasArg();
		OptionBuilder.withDescription("Gene name (optional)");
		OptionBuilder.withLongOpt("gene");
		OPTIONS.addOption(OptionBuilder.create("g"));

		OptionBuilder.withArgName("string");
		OptionBuilder.hasArg();
		OptionBuilder.withDescription("Covariate name (optional)");
		OptionBuilder.withLongOpt("covariate");
		OPTIONS.addOption(OptionBuilder.create("c"));

		OptionBuilder.withArgName("string");
		OptionBuilder.hasArg();
		OptionBuilder.withDescription("Variant name (optional)");
		OptionBuilder.withLongOpt("variant");
		OPTIONS.addOption(OptionBuilder.create("v"));

		OptionBuilder.withArgName("path");
		OptionBuilder.hasArg();
		OptionBuilder.withDescription("File with queries. Must have header. All columns are optional, options are gene, variant and covariate. Any combination of headers is possible (optional)");
		OptionBuilder.withLongOpt("queryFile");
		OPTIONS.addOption(OptionBuilder.create("qf"));

		OptionBuilder.withArgName("double");
		OptionBuilder.hasArg();
		OptionBuilder.withDescription("Minimum absolute interaction z-score (not yet implemented)");
		OptionBuilder.withLongOpt("interactionZ");
		OPTIONS.addOption(OptionBuilder.create("iz"));

		OptionBuilder.withDescription("Only output meta z-scores");
		OptionBuilder.withLongOpt("onlyMetaZ");
		OPTIONS.addOption(OptionBuilder.create("oz"));

	}

	public static void main(String[] args) throws UnsupportedEncodingException, IOException, Exception {

		final File inputInteractionFile;
		final File outputFile;
		final String queryGeneName;
		final String queryCovariateName;
		final String queryVariantName;
		final double queryMinAbsInteractionZ;
		final File queryFile;
		final boolean onlyOutputMetaZ;

		try {
			final CommandLine commandLine = new PosixParser().parse(OPTIONS, args, false);

			inputInteractionFile = new File(commandLine.getOptionValue("i"));
			if (commandLine.hasOption("o")) {
				outputFile = new File(commandLine.getOptionValue("o"));
			} else {
				outputFile = null;
			}
			queryGeneName = commandLine.getOptionValue("g");
			queryCovariateName = commandLine.getOptionValue("c");
			queryVariantName = commandLine.getOptionValue("v");

			if (commandLine.hasOption("qf")) {
				queryFile = new File(commandLine.getOptionValue("qf"));
				if (queryGeneName != null || queryVariantName != null || queryCovariateName != null) {
					System.err.println("Cannot combine query file with commandline query arguments");
					System.exit(1);
					return;
				}
			} else {
				queryFile = null;
			}

			if (commandLine.hasOption("iz")) {
				try {
					queryMinAbsInteractionZ = Double.parseDouble(commandLine.getOptionValue("iz"));
				} catch (NumberFormatException ex) {
					System.err.println("Cannot not parse interactionZ as double: " + commandLine.getOptionValue("iz"));
					System.exit(1);
					return;
				}
			} else {
				queryMinAbsInteractionZ = -1;
			}

			onlyOutputMetaZ = commandLine.hasOption("oz");

		} catch (ParseException ex) {
			System.err.println("Invalid command line arguments: ");
			System.err.println(ex.getMessage());
			System.err.println();
			new HelpFormatter().printHelp(" ", OPTIONS);
			System.exit(1);
			return;
		}

		BinaryInteractionFile inputFile = BinaryInteractionFile.load(inputInteractionFile, true);

		final Writer outputWriter;
		if (outputFile != null && !onlyOutputMetaZ) {
			outputWriter = new BufferedWriter(new FileWriter(outputFile));
		} else {
			outputWriter = new OutputStreamWriter(System.out);
		}

		outputWriter.write("# Query result binary interaction file using software version: " + VERSION);
		outputWriter.write('\n');
		outputWriter.write("# Current data and time: " + DATE_TIME_FORMAT.format(currentDataTime));
		outputWriter.write('\n');
		outputWriter.write("# Command line options: ");
		outputWriter.write('\n');
		outputWriter.write("# - Input file: " + inputInteractionFile.getAbsolutePath());
		outputWriter.write('\n');
		if (outputFile != null) {
			outputWriter.write("# - Output file: " + outputFile.getAbsolutePath());
			outputWriter.write('\n');
		}
		if (queryGeneName != null) {
			outputWriter.write("# - Query gene: " + queryGeneName);
			outputWriter.write('\n');
		}
		if (queryCovariateName != null) {
			outputWriter.write("# - Query covariate: " + queryCovariateName);
			outputWriter.write('\n');
		}
		if (queryVariantName != null) {
			outputWriter.write("# - Query variant: " + queryVariantName);
			outputWriter.write('\n');
		}
		if (queryMinAbsInteractionZ > 0) {
			outputWriter.write("# - Query minimum absote interaction z-score: " + queryMinAbsInteractionZ);
			outputWriter.write('\n');
		}
		if (queryFile != null) {
			outputWriter.write("# - Query file: " + queryFile.getAbsolutePath());
			outputWriter.write('\n');
		}
		if (onlyOutputMetaZ) {
			outputWriter.write("# - Only outputing meta z-scores");
			outputWriter.write('\n');
		}
		outputWriter.write("#\n");

		outputWriter.write("# Interaction file meta data: ");
		outputWriter.write('\n');
		outputWriter.write("# - Description: " + inputFile.getFileDescription());
		outputWriter.write('\n');
		outputWriter.write("# - Creation data: " + inputFile.getCreationDataTimeString());
		outputWriter.write('\n');
		outputWriter.write("# - Cohorts: " + inputFile.getCohortCount());
		outputWriter.write('\n');
		for (BinaryInteractionCohort cohort : inputFile.getCohorts()) {
			outputWriter.write("#    * " + cohort.getName() + " (" + cohort.getSampleCount() + ")");
			outputWriter.write('\n');
		}
		outputWriter.write("# - Variants: " + inputFile.getVariantCount());
		outputWriter.write('\n');
		outputWriter.write("# - Genes: " + inputFile.getGeneCount());
		outputWriter.write('\n');
		outputWriter.write("# - Covariats: " + inputFile.getCovariateCount());
		outputWriter.write('\n');
		outputWriter.write("#\n");

		outputWriter.flush();

		final LinkedHashSet interactionQueries;
		final boolean interactionQueriesOnlyCovariates;
		if (queryFile != null) {
			Pair, Boolean> loadRes = loadInteractionQueries(queryFile);
			interactionQueries = loadRes.getLeft();
			interactionQueriesOnlyCovariates = loadRes.getRight();
		} else {
			interactionQueries = null;
			interactionQueriesOnlyCovariates = false;
		}

		CSVWriter tableWriter;
		if (onlyOutputMetaZ) {
			if (outputFile == null) {
				throw new Exception("Use of option --onlyMetaZ only possible in combination with output file");
			}
			tableWriter = new CSVWriter(new BufferedWriter(new FileWriter(outputFile)), '\t', '\0', '\0');
		} else {
			tableWriter = new CSVWriter(outputWriter, '\t', '\0', '\0');
		}

		if (onlyOutputMetaZ && !inputFile.isMetaAnalysis()) {
			throw new Exception("No meta analysis information detected cannot use option: --onlyMetaZ");
		}

		final String[] row;

		if (onlyOutputMetaZ) {
			row = new String[1];
		} else {
			final int columnCount = 7
					+ ((5 + (inputFile.isNormalQtlStored() ? 2 : 0) + (inputFile.isFlippedZscoreStored() ? 1 : 0)) * inputFile.getCohortCount())
					+ (inputFile.isMetaAnalysis() ? (3 + (inputFile.isNormalQtlStored() ? 1 : 0) + (inputFile.isFlippedZscoreStored() ? 1 : 0)) : 0);



			row = new String[columnCount];
			int c = 0;

			row[c++] = "Variant";
			row[c++] = "Gene";
			row[c++] = "Covariate";
			row[c++] = "Variant_chr";
			row[c++] = "Variant_pos";
			row[c++] = "Variant alleles";
			row[c++] = "Assessed_allele";

			for (BinaryInteractionCohort cohort : inputFile.getCohorts()) {

				String cohortName = cohort.getName();

				if (inputFile.isNormalQtlStored()) {
					row[c++] = cohortName + "_QTL_sample_count";
					row[c++] = cohortName + "_QTL_Z-score";
				}

				row[c++] = cohortName + "_interaction_sample_count";
				row[c++] = cohortName + "_interaction_r2";
				row[c++] = cohortName + "_variant_Z-score";
				row[c++] = cohortName + "_covariate_Z-score";
				row[c++] = cohortName + "_interaction_Z-score";

				if (inputFile.isFlippedZscoreStored()) {
					row[c++] = cohortName + "_flipped_interaction_Z-score";
				}

			}

			if (inputFile.isMetaAnalysis()) {
				if (inputFile.isNormalQtlStored()) {
					row[c++] = "Meta_QTL_Z-score";
				}
				row[c++] = "Meta_variant_Z-score";
				row[c++] = "Meta_covariate_Z-score";
				row[c++] = "Meta_interaction_Z-score";
				if (inputFile.isFlippedZscoreStored()) {
					row[c++] = "Meta_flipped_interaction_Z-score";
				}
			}

			tableWriter.writeNext(row);
		}

		if (interactionQueries != null) {
			
			if (interactionQueriesOnlyCovariates) {
				HashSet covariateNames = new HashSet<>(interactionQueries.size());
				for (InteractoinQuery interactionQuery : interactionQueries) {
					covariateNames.add(interactionQuery.getCovariate());
				}
				doQueryCovariates(covariateNames, inputFile, tableWriter, row, onlyOutputMetaZ);
			} else {
				for (InteractoinQuery interactionQuery : interactionQueries) {
					doQuery(interactionQuery.getGene(), interactionQuery.getVariant(), interactionQuery.getCovariate(), inputFile, tableWriter, row, onlyOutputMetaZ);
				}
			}
			
		} else {
			doQuery(queryGeneName, queryVariantName, queryCovariateName, inputFile, tableWriter, row, onlyOutputMetaZ);
		}



		tableWriter.close();
		outputWriter.close();

	}

	@SuppressWarnings({"null", "ConstantConditions"})
	private static void addRow(BinaryInteractionQueryResult queryRestult, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row, boolean onlyOutputMetaZ) throws BinaryInteractionFileException, IOException {

		if (onlyOutputMetaZ) {
			BinaryInteractionZscores zscroresInteraction = queryRestult.getInteractionZscores();
			row[0] = String.valueOf(zscroresInteraction.getZscoreInteractionMeta());
		} else {
			int c = 0;

			row[c++] = queryRestult.getVariantName();
			row[c++] = queryRestult.getGeneName();
			row[c++] = queryRestult.getCovariateName();

			BinaryInteractionVariant variant = inputFile.getVariant(queryRestult.getVariantName());
			row[c++] = variant.getChr();
			row[c++] = String.valueOf(variant.getPos());
			row[c++] = variant.getRefAllele().getAlleleAsString() + '/' + variant.getAltAllele().getAlleleAsString();
			row[c++] = variant.getAltAllele().toString();

			BinaryInteractionQtlZscores zscroresQtl = queryRestult.getQtlZscores();
			BinaryInteractionZscores zscroresInteraction = queryRestult.getInteractionZscores();

			for (int cohortIndex = 0; cohortIndex < inputFile.getCohortCount(); ++cohortIndex) {

				if (inputFile.isNormalQtlStored()) {
					row[c++] = String.valueOf(zscroresQtl.getSampleCounts()[cohortIndex]);
					row[c++] = String.valueOf(zscroresQtl.getZscores()[cohortIndex]);
				}

				row[c++] = String.valueOf(zscroresInteraction.getSamplesInteractionCohort()[cohortIndex]);
				row[c++] = String.valueOf(zscroresInteraction.getrSquaredCohort()[cohortIndex]);
				row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpCohort()[cohortIndex]);
				row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateCohort()[cohortIndex]);
				row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionCohort()[cohortIndex]);

				if (inputFile.isFlippedZscoreStored()) {
					row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedCohort()[cohortIndex]);
				}

			}

			if (inputFile.isMetaAnalysis()) {
				if (inputFile.isNormalQtlStored()) {
					row[c++] = String.valueOf(zscroresQtl.getMetaZscore());
				}
				row[c++] = String.valueOf(zscroresInteraction.getZscoreSnpMeta());
				row[c++] = String.valueOf(zscroresInteraction.getZscoreCovariateMeta());
				row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionMeta());
				if (inputFile.isFlippedZscoreStored()) {
					row[c++] = String.valueOf(zscroresInteraction.getZscoreInteractionFlippedMeta());
				}
			}
		}


		tableWriter.writeNext(row);
	}

	private static void doQuery(final String queryGeneName, final String queryVariantName, final String queryCovariateName, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row, boolean onlyOutputMetaZ) throws IOException, BinaryInteractionFileException {

		if (queryGeneName != null && queryVariantName != null && queryCovariateName != null) {

			addRow(inputFile.readVariantGeneCovariateResults(queryVariantName, queryGeneName, queryCovariateName), inputFile, tableWriter, row, onlyOutputMetaZ);

		} else if (queryGeneName != null && queryVariantName != null) {

			for (Iterator iterator = inputFile.readVariantGeneResults(queryVariantName, queryGeneName); iterator.hasNext();) {
				addRow(iterator.next(), inputFile, tableWriter, row, onlyOutputMetaZ);
			}

		} else if (queryVariantName != null) {

			int[] genePointers = inputFile.getVariant(queryVariantName).getGenePointers();
			for (int genePointer : genePointers) {

				BinaryInteractionGene gene = inputFile.getGene(genePointer);
				if (queryCovariateName != null) {

					if (inputFile.containsInteraction(queryVariantName, gene.getName(), queryCovariateName)) {
						addRow(inputFile.readVariantGeneCovariateResults(queryVariantName, gene.getName(), queryCovariateName), inputFile, tableWriter, row, onlyOutputMetaZ);
					}

				} else {
					for (Iterator iterator = inputFile.readVariantGeneResults(queryVariantName, gene.getName()); iterator.hasNext();) {
						addRow(iterator.next(), inputFile, tableWriter, row, onlyOutputMetaZ);
					}
				}

			}

		} else if (queryGeneName != null) {

			int[] variantPointers = inputFile.getGene(queryGeneName).getVariantPointers();
			for (int variantPointer : variantPointers) {

				BinaryInteractionVariant variant = inputFile.getVariant(variantPointer);
				if (queryCovariateName != null) {

					if (inputFile.containsInteraction(variant.getName(), queryGeneName, queryCovariateName)) {
						addRow(inputFile.readVariantGeneCovariateResults(variant.getName(), queryGeneName, queryCovariateName), inputFile, tableWriter, row, onlyOutputMetaZ);
					}

				} else {
					for (Iterator iterator = inputFile.readVariantGeneResults(variant.getName(), queryGeneName); iterator.hasNext();) {
						addRow(iterator.next(), inputFile, tableWriter, row, onlyOutputMetaZ);
					}
				}


			}

		} else {

			for (BinaryInteractionVariant variant : inputFile.getVariants()) {

				String variantName = variant.getName();

				int[] genePointers = inputFile.getVariant(variantName).getGenePointers();
				for (int genePointer : genePointers) {

					BinaryInteractionGene gene = inputFile.getGene(genePointer);
					if (queryCovariateName != null) {

						if (inputFile.containsInteraction(variantName, gene.getName(), queryCovariateName)) {
							addRow(inputFile.readVariantGeneCovariateResults(variantName, gene.getName(), queryCovariateName), inputFile, tableWriter, row, onlyOutputMetaZ);
						}

					} else {
						for (Iterator iterator = inputFile.readVariantGeneResults(variantName, gene.getName()); iterator.hasNext();) {
							addRow(iterator.next(), inputFile, tableWriter, row, onlyOutputMetaZ);
						}
					}

				}

			}



		}
	}

	private static void doQueryCovariates(final HashSet queryCovariateNames, BinaryInteractionFile inputFile, CSVWriter tableWriter, String[] row, boolean onlyOutputMetaZ) throws IOException, BinaryInteractionFileException {

		for (BinaryInteractionVariant variant : inputFile.getVariants()) {

			String variantName = variant.getName();

			int[] genePointers = inputFile.getVariant(variantName).getGenePointers();
			for (int genePointer : genePointers) {

				BinaryInteractionGene gene = inputFile.getGene(genePointer);

				for (Iterator iterator = inputFile.readVariantGeneResults(variantName, gene.getName()); iterator.hasNext();) {
					BinaryInteractionQueryResult next = iterator.next();
					if(queryCovariateNames.contains(next.getCovariateName())){
						addRow(next, inputFile, tableWriter, row, onlyOutputMetaZ);
					}
					
				}

			}

		}

	}

	private static Pair, Boolean> loadInteractionQueries(File queryFile) throws FileNotFoundException, IOException, Exception {

		LinkedHashSet interactionQueries = new LinkedHashSet();
		final CSVReader queryReader = new CSVReader(new FileReader(queryFile), '\t', '\0');

		String[] nextLine = queryReader.readNext();

		int variantCol = -1;
		int geneCol = -1;
		int covariateCol = -1;

		//Parse header
		for (int i = 0; i < nextLine.length; ++i) {
			String headerEntry = nextLine[i].toLowerCase();
			switch (headerEntry) {
				case "variant":
					if (variantCol != -1) {
						throw new Exception("Variant column found twice");
					}
					variantCol = i;
					break;
				case "gene":
					if (geneCol != -1) {
						throw new Exception("Gene column found twice");
					}
					geneCol = i;
					break;
				case "covariate":
					if (covariateCol != -1) {
						throw new Exception("Covariate column found twice");
					}
					covariateCol = i;
					break;

			}

		}

		if (variantCol == -1 && geneCol == -1 && covariateCol == -1) {
			throw new Exception("Did not detect appropiate header in query file");

		}

		while ((nextLine = queryReader.readNext()) != null) {
			String variant = null;
			String gene = null;
			String covariate = null;

			if (variantCol != -1) {
				variant = nextLine[variantCol];
			}
			if (geneCol != -1) {
				gene = nextLine[geneCol];
			}
			if (covariateCol != -1) {
				covariate = nextLine[covariateCol];
			}
			interactionQueries.add(new InteractoinQuery(variant, gene, covariate));
		}
		queryReader.close();

		return new Pair(interactionQueries, variantCol == -1 && geneCol == -1);
	}

	private static class InteractoinQuery {

		private final String variant;
		private final String gene;
		private final String covariate;

		public InteractoinQuery(String variant, String gene, String covariate) {
			this.variant = variant;
			this.gene = gene;
			this.covariate = covariate;
		}

		public String getVariant() {
			return variant;
		}

		public String getGene() {
			return gene;
		}

		public String getCovariate() {
			return covariate;
		}

		@Override
		public int hashCode() {
			int hash = 5;
			hash = 67 * hash + (this.variant != null ? this.variant.hashCode() : 0);
			hash = 67 * hash + (this.gene != null ? this.gene.hashCode() : 0);
			hash = 67 * hash + (this.covariate != null ? this.covariate.hashCode() : 0);
			return hash;
		}

		@Override
		public boolean equals(Object obj) {
			if (obj == null) {
				return false;
			}
			if (getClass() != obj.getClass()) {
				return false;
			}
			final InteractoinQuery other = (InteractoinQuery) obj;
			if ((this.variant == null) ? (other.variant != null) : !this.variant.equals(other.variant)) {
				return false;
			}
			if ((this.gene == null) ? (other.gene != null) : !this.gene.equals(other.gene)) {
				return false;
			}
			if ((this.covariate == null) ? (other.covariate != null) : !this.covariate.equals(other.covariate)) {
				return false;
			}
			return true;
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy