All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.charite.compbio.jannovar.cmd.annotate_csv.AnnotateCSVCommand Maven / Gradle / Ivy

package de.charite.compbio.jannovar.cmd.annotate_csv;

import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;

import de.charite.compbio.jannovar.JannovarException;
import de.charite.compbio.jannovar.annotation.AllAnnotationListTextGenerator;
import de.charite.compbio.jannovar.annotation.AnnotationException;
import de.charite.compbio.jannovar.annotation.BestAnnotationListTextGenerator;
import de.charite.compbio.jannovar.annotation.VariantAnnotations;
import de.charite.compbio.jannovar.annotation.VariantAnnotationsTextGenerator;
import de.charite.compbio.jannovar.annotation.VariantAnnotator;
import de.charite.compbio.jannovar.annotation.builders.AnnotationBuilderOptions;
import de.charite.compbio.jannovar.cmd.CommandLineParsingException;
import de.charite.compbio.jannovar.cmd.JannovarAnnotationCommand;
import de.charite.compbio.jannovar.hgvs.AminoAcidCode;
import de.charite.compbio.jannovar.reference.GenomePosition;
import de.charite.compbio.jannovar.reference.GenomeVariant;
import de.charite.compbio.jannovar.reference.PositionType;
import de.charite.compbio.jannovar.reference.Strand;
import net.sourceforge.argparse4j.inf.Namespace;

/**
 * Allows the annotation of a CVF file.
 *
 * @author Max Schubach
 */
public class AnnotateCSVCommand extends JannovarAnnotationCommand {

	/** Configuration */
	private JannovarAnnotateCSVOptions options;

	/**
	 * @param argv
	 * @param args
	 * @throws CommandLineParsingException
	 */
	public AnnotateCSVCommand(String argv[], Namespace args) throws CommandLineParsingException {
		this.options = new JannovarAnnotateCSVOptions();
		this.options.setFromArgs(args);
	}

	/**
	 * This function will simply annotate a csv file wehere positions are set
	 *
	 *
	 * @param options
	 *            configuration for the command
	 * @throws AnnotationException
	 *             on problems in the annotation process
	 */
	@Override
	public void run() throws JannovarException {
		System.err.println("Options");
		System.err.println(options.toString());

		System.err.println("Deserializing transcripts...");
		deserializeTranscriptDefinitionFile(options.getDatabaseFilePath());

		final VariantAnnotator annotator = new VariantAnnotator(refDict, chromosomeMap, new AnnotationBuilderOptions());

		try {
			Reader in = new FileReader(options.getCsv());
			final Appendable out = System.out;
			CSVParser parser = options.getFormat().parse(in);

			final CSVPrinter printer = options.getFormat().print(out);

			if (options.isHeader()) {
				List header = new ArrayList<>(parser.getHeaderMap().size() + 2);
				for (Map.Entry entry : parser.getHeaderMap().entrySet()) {
					header.add(entry.getValue(), entry.getKey());
				}
				header.add(parser.getHeaderMap().size(), "HGVS");
				header.add(parser.getHeaderMap().size()+1, "FunctionalClass");

				printer.printRecord(header);
			}

			for (CSVRecord record : parser) {

				// Parse the chromosomal change string into a GenomeChange object.
				String chromosomalChange = getChromosomalChange(record);
				final GenomeVariant genomeChange = parseGenomeChange(chromosomalChange);

				// Construct VariantAnnotator for building the variant annotations.
				VariantAnnotations annoList = null;
				try {
					annoList = annotator.buildAnnotations(genomeChange);
				} catch (Exception e) {
					System.err.println(String.format("[ERROR] Could not annotate variant %s!", chromosomalChange));
					e.printStackTrace(System.err);
					continue;
				}

				for (String string : record) {
					printer.print(string);
				}
				VariantAnnotationsTextGenerator textGenerator;
				if (options.isShowAll())
					textGenerator = new AllAnnotationListTextGenerator(annoList, 0, 1);
				else
					textGenerator = new BestAnnotationListTextGenerator(annoList, 0, 1);

				printer.print(textGenerator.buildHGVSText(options.isUseThreeLetterAminoAcidCode()
						? AminoAcidCode.THREE_LETTER : AminoAcidCode.ONE_LETTER));
				printer.print(annoList.getHighestImpactEffect());
				printer.println();
			}
			parser.close();
			printer.close();
		} catch (IOException e1) {
			e1.printStackTrace();
			throw new JannovarException(e1.getMessage());
		}

	}

	private String getChromosomalChange(CSVRecord record) {
		return record.get(options.getChr()) + ":" + record.get(options.getPos()) + record.get(options.getRef())
				+ ">" + record.get(options.getAlt());
	}

	private GenomeVariant parseGenomeChange(String changeStr) throws JannovarException {
		Pattern pat = Pattern.compile("(chr[0-9MXY]+):([0-9]+)([ACGTN]*)>([ACGTN]*)");
		Matcher match = pat.matcher(changeStr);

		if (!match.matches()) {
			System.err.println("[ERROR] Input string for the chromosomal change " + changeStr
					+ " does not fit the regular expression ... :(");
			System.exit(3);
		}

		int chr = refDict.getContigNameToID().get(match.group(1));
		int pos = Integer.parseInt(match.group(2));
		String ref = match.group(3);
		String alt = match.group(4);

		return new GenomeVariant(new GenomePosition(refDict, Strand.FWD, chr, pos, PositionType.ONE_BASED), ref, alt);
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy