org.snpeff.snpEffect.commandLine.SnpEffCmdEff Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.snpEffect.commandLine;

import java.io.*;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.snpeff.SnpEff;
import org.snpeff.fileIterator.BedFileIterator;
import org.snpeff.fileIterator.VariantFileIterator;
import org.snpeff.fileIterator.VcfFileIterator;
import org.snpeff.filter.VariantEffectFilter;
import org.snpeff.interval.Marker;
import org.snpeff.interval.Markers;
import org.snpeff.interval.Transcript;
import org.snpeff.interval.Variant;
import org.snpeff.interval.VariantNonRef;
import org.snpeff.interval.tree.IntervalForest;
import org.snpeff.outputFormatter.BedAnnotationOutputFormatter;
import org.snpeff.outputFormatter.BedOutputFormatter;
import org.snpeff.outputFormatter.OutputFormatter;
import org.snpeff.outputFormatter.VcfOutputFormatter;
import org.snpeff.snpEffect.EffectType;
import org.snpeff.snpEffect.SnpEffectPredictor;
import org.snpeff.snpEffect.VariantEffect;
import org.snpeff.snpEffect.VariantEffect.EffectImpact;
import org.snpeff.snpEffect.VariantEffects;
import org.snpeff.snpEffect.VcfAnnotator;
import org.snpeff.stats.CountByType;
import org.snpeff.stats.VariantEffectStats;
import org.snpeff.stats.VariantStats;
import org.snpeff.stats.VcfStats;
import org.snpeff.util.Gpr;
import org.snpeff.util.Timer;
import org.snpeff.util.Tuple;
import org.snpeff.vcf.EffFormatVersion;
import org.snpeff.vcf.Pedigree;
import org.snpeff.vcf.VcfEntry;

import freemarker.template.Configuration;
import freemarker.template.DefaultObjectWrapper;
import freemarker.template.Template;
import freemarker.template.TemplateException;

/**
 * Command line program: Predict variant effects
 *
 * @author Pablo Cingolani
 */
public class SnpEffCmdEff extends SnpEff implements VcfAnnotator {

	public static final String SUMMARY_TEMPLATE = "snpEff_summary.ftl"; // Summary template file name
	public static final String SUMMARY_CSV_TEMPLATE = "snpEff_csv_summary.ftl"; // Summary template file name
	public static final String SUMMARY_GENES_TEMPLATE = "snpEff_genes.ftl"; // Genes template file name

	public static final String DEFAULT_SUMMARY_HTML_FILE = "snpEff_summary.html";
	public static final String DEFAULT_SUMMARY_CSV_FILE = "snpEff_summary.csv";
	public static final String DEFAULT_SUMMARY_GENES_FILE = "snpEff_genes.txt";

	public static final int SHOW_EVERY = 10 * 1000;

	boolean anyCancerSample;
	boolean cancer = false; // Perform cancer comparisons
	boolean chromoPlots = true; // Create mutations by chromosome plots?
	boolean createSummaryCsv = false;
	boolean createSummaryHtml = true;
	boolean lossOfFunction = true; // Create loss of function LOF tag?
	boolean useGeneId = false; // Use gene ID instead of gene name (VCF output)
	boolean useLocalTemplate = false; // Use template from 'local' file instead of 'jar' (this is only used for
										// development and debugging)
	boolean useOicr = false; // Use OICR tag
	boolean useSequenceOntology = true; // Use Sequence Ontology terms
	int totalErrs = 0;
	int countVcfEntries = 0;
	long countInputLines = 0;
	long countVariants = 0;
	long countEffects = 0;
	String cancerSamples = null;
	String chrStr = "";
	String inputFile = ""; // Input file
	String fastaProt = null;
	String summaryFileCsv; // HTML Summary file name
	String summaryFileHtml; // CSV Summary file name
	String summaryGenesFile; // Gene table file
	InputFormat inputFormat = InputFormat.VCF; // Format use in input files
	OutputFormat outputFormat = OutputFormat.VCF; // Output format
	VariantEffectFilter variantEffectResutFilter; // Filter prediction results
	ArrayList filterIntervalFiles;// Files used for filter intervals
	ArrayList inputFiles;
	IntervalForest filterIntervals; // Filter only variants that match these intervals
	VariantStats variantStats;
	VariantEffectStats variantEffectStats;
	SnpEffectPredictor snpEffectPredictor;
	VcfStats vcfStats;
	List vcfEntriesDebug = null; // Use for debugging or testing (in some test-cases)
	EffFormatVersion formatVersion = EffFormatVersion.DEFAULT_FORMAT_VERSION;
	Pedigree pedigree;
	CountByType errByType, warnByType;
	OutputFormatter outputFormatter = null;
	Timer annotateTimer;

	public SnpEffCmdEff() {
		super();
		chrStr = ""; // Default: Don't show 'chr' before chromosome
		inputFile = ""; // variant input file
		variantEffectResutFilter = new VariantEffectFilter(); // Filter prediction results
		filterIntervalFiles = new ArrayList<>(); // Files used for filter intervals
		summaryFileHtml = DEFAULT_SUMMARY_HTML_FILE;
		summaryFileCsv = DEFAULT_SUMMARY_CSV_FILE;
		summaryGenesFile = DEFAULT_SUMMARY_GENES_FILE;
	}

	public void setOutputWriter(BufferedWriter writer) {
		this.outputFormatter.setOutputWriter(writer);
	}

	@Override
	public boolean addHeaders(VcfFileIterator vcfFile) {
		// This is done by VcfOutputFormatter, so there is nothing to do here.
		return false;
	}

	/**
	 * Annotate: Calculate the effect of variants and show results
	 */
	public boolean annotate(String inputFile, String outputFile) {
		// Initialize
		annotateInit(outputFile);
		VcfFileIterator vcf = null;

		// Iterate over input files
		switch (inputFormat) {
		case VCF:
			// FIXME: Need to fix multithreding mode
			// vcf = (multiThreaded ? annotateVcfMulti(inputFile, outputFormatter) :
			// :annotateVcf(inputFile));
			vcf = annotateVcf(inputFile);
			break;

		case BED:
			annotateBed(inputFile, outputFormatter);
			break;

		default:
			throw new RuntimeException("Cannot create variant file iterator on input format '" + inputFormat + "'");
		}
		outputFormatter.close();

		// Create reports and finish up
		boolean err = annotateFinish(vcf);

		return !err;
	}

	/**
	 * Annotate a VCF entry
	 */
	@Override
	public boolean annotate(VcfEntry vcfEntry) {
		boolean printed = false;
		boolean filteredOut = false;

		try {
			countInputLines++;
			countVcfEntries++;

			// Find if there is a pedigree and if it has any 'derived' entry
			if (cancer) {
				VcfFileIterator vcfFile = vcfEntry.getVcfFileIterator();
                if (vcfFile.isHeadeSection()) {
					pedigree = readPedigree(vcfFile);
					anyCancerSample = pedigree.anyDerived();
				}
			}

			// Sample vcf entry
			if (createSummaryHtml || createSummaryCsv)
				vcfStats.sample(vcfEntry);

			// Skip if there are filter intervals and they are not matched
			if ((filterIntervals != null) && (filterIntervals.query(vcfEntry).isEmpty())) {
				filteredOut = true;
				return false;
			}

			// Create new 'section'
			outputFormatter.startSection(vcfEntry);

			// ---
			// Analyze all changes in this VCF entry
			// Note, this is the standard analysis.
			// Next section deals with cancer: Somatic vs Germline comparisons
			// ---
			boolean impactLowOrHigher = false; // Does this entry have an impact (other than MODIFIER)?
			List variants = vcfEntry.variants();
			for (Variant variant : variants) {
				// Show progress
				showProgress();

				// Annotate variant
				impactLowOrHigher |= annotateVariant(variant);
			}

			// Perform cancer annotations
			if (anyCancerSample && impactLowOrHigher)
				annotateVariantCancer(variants, vcfEntry);

			// Finish up this section
			outputFormatter.printSection(vcfEntry);

			printed = true;
		} catch (Throwable t) {
			totalErrs++;
			error(t, "Error while processing VCF entry :\n\t" + vcfEntry + "\n"
					+ t);
		} finally {
			if (!printed && !filteredOut)
				outputFormatter.printSection(vcfEntry);
		}

		return true;
	}

	/**
	 * Iterate on all inputs and calculate effects. Note: This is used for all input
	 * formats except VCF, which has a different iteration modality
	 */
	void annotateBed(String inputFile, OutputFormatter outputFormatter) {
		SnpEffectPredictor snpEffectPredictor = config.getSnpEffectPredictor();

		// Create an input file iterator
		VariantFileIterator variantFileIterator = new BedFileIterator(inputFile, config.getGenome());

		// ---
		// Iterate over input file
		// ---
		for (Variant variant : variantFileIterator) {
			try {
				countInputLines++;

				countVariants++;
				if (verbose && (countVariants % SHOW_EVERY == 0))
					Timer.showStdErr("\t" + countVariants + " variants");

				// Does it pass the filter? => Analyze

				// Skip if there are filter intervals and they are not matched
				if ((filterIntervals != null) && (filterIntervals.stab(variant).size() <= 0))
					continue;

				// Perform basic statistics about this variant
				if (createSummaryHtml || createSummaryCsv)
					variantStats.sample(variant);

				// Calculate effects
				VariantEffects variantEffects = snpEffectPredictor.variantEffect(variant);

				// Create new 'section'
				outputFormatter.startSection(variant);

				// Show results
				for (VariantEffect variantEffect : variantEffects) {
					variantEffectStats.sample(variantEffect); // Perform basic statistics about this result
					outputFormatter.add(variantEffect);
					countEffects++;
				}

				// Finish up this section
				outputFormatter.printSection(variant);
			} catch (Throwable t) {
				totalErrs++;
				error(t, "Error while processing variant (line " + variantFileIterator.getLineNum() + ") :\n\t"
						+ variant + "\n" + t);
			}
		}

		// Close file iterator (not really needed, but just in case)
		variantFileIterator.close();
	}

	/**
	 * Finish annotations and create reports
	 */
	@Override
	public boolean annotateFinish(VcfFileIterator vcfFile) {
		boolean ok = true;

		if (vcfFile != null)
			vcfFile.close();

		// Creates a summary output file
		if (createSummaryCsv) {
			if (verbose)
				Timer.showStdErr("Creating summary file: " + summaryFileCsv);
			ok &= summary(SUMMARY_CSV_TEMPLATE, summaryFileCsv, true);
		}
		if (createSummaryHtml) {
			if (verbose)
				Timer.showStdErr("Creating summary file: " + summaryFileHtml);
			ok &= summary(SUMMARY_TEMPLATE, summaryFileHtml, false);
		}

		// Creates genes output file
		if (createSummaryHtml || createSummaryCsv) {
			if (verbose)
				Timer.showStdErr("Creating genes file: " + summaryGenesFile);
			ok &= summary(SUMMARY_GENES_TEMPLATE, summaryGenesFile, true);
		}

		if (totalErrs > 0)
			System.err.println(totalErrs + " errors.");
		return !ok;
	}

	/**
	 * Calculate the effect of variants and show results
	 */
	public void annotateInit(String outputFile) {
		snpEffectPredictor = config.getSnpEffectPredictor();

		// Reset all counters
		totalErrs = 0;
		countInputLines = countVariants = countEffects = 0; // = countVariantsFilteredOut = 0;
		anyCancerSample = false;
		pedigree = null;
		errByType = new CountByType();
		warnByType = new CountByType();
		countVcfEntries = 0;
		annotateTimer = new Timer();

		// Create 'stats' objects
		variantStats = new VariantStats(config.getGenome());
		variantEffectStats = new VariantEffectStats(config.getGenome());
		variantEffectStats.setUseSequenceOntology(useSequenceOntology);
		vcfStats = new VcfStats();

		if (fastaProt != null) {
			if ((new File(fastaProt)).delete() && verbose) {
				Timer.showStdErr("Deleted protein fasta output file '" + fastaProt + "'");
			}
		}

		// ---
		// Create output formatter
		// ---
		outputFormatter = null;
		switch (outputFormat) {
		case VCF:
			VcfOutputFormatter vof = new VcfOutputFormatter(vcfEntriesDebug);
			vof.setFormatVersion(formatVersion);
			vof.setLossOfFunction(lossOfFunction);
			vof.setConfig(config);
			outputFormatter = vof;
			break;
		case GATK:
			outputFormatter = new VcfOutputFormatter(vcfEntriesDebug);
			((VcfOutputFormatter) outputFormatter).setGatk(true);
			break;
		case BED:
			outputFormatter = new BedOutputFormatter();
			break;
		case BEDANN:
			outputFormatter = new BedAnnotationOutputFormatter();
			break;
		default:
			throw new RuntimeException("Unknown output format '" + outputFormat + "'");
		}

		outputFormatter.setVersion(VERSION_AUTHOR);
		outputFormatter.setCommandLineStr(commandLineStr(false));
		outputFormatter.setVariantEffectResutFilter(variantEffectResutFilter);
		outputFormatter.setSupressOutput(suppressOutput);
		outputFormatter.setChrStr(chrStr);
		outputFormatter.setUseSequenceOntology(useSequenceOntology);
		outputFormatter.setUseOicr(useOicr);
		outputFormatter.setUseHgvs(hgvs);
		outputFormatter.setUseGeneId(useGeneId);
		outputFormatter.setOutputFile(outputFile);
	}

	@Override
	public boolean annotateInit(VcfFileIterator vcfFile) {
		if (inputFormat != InputFormat.VCF || outputFormat != OutputFormat.VCF)
			throw new RuntimeException();
		annotateInit((String) null);
		return false;
	}

	/**
	 * Annotate a single variant
	 * 
	 * @param variant
	 * @return true if there is any impact 'Low' or higher
	 */
	boolean annotateVariant(Variant variant) {
		// Calculate effects: By default do not annotate non-variant sites
		if (!variant.isVariant())
			return false;

		boolean impactModerateOrHigh = false; // Does this entry have a 'MODERATE' or 'HIGH' impact?
		boolean impactLowOrHigher = false; // Does this entry have an impact (other than MODIFIER)?

		// Perform basic statistics about this variant
		if (createSummaryHtml || createSummaryCsv)
			variantStats.sample(variant);

		VariantEffects variantEffects = snpEffectPredictor.variantEffect(variant);

		// Create new 'section'
		outputFormatter.startSection(variant);

		// Show results
		for (VariantEffect variantEffect : variantEffects) {
			if (createSummaryHtml || createSummaryCsv)
				variantEffectStats.sample(variantEffect); // Perform basic statistics about this result

			// Any errors or warnings?
			if (variantEffect.hasError())
				errByType.inc(variantEffect.getError());
			if (variantEffect.hasWarning())
				warnByType.inc(variantEffect.getWarning());

			// Does this entry have an impact (other than MODIFIER)?
			impactLowOrHigher |= (variantEffect.getEffectImpact() != EffectImpact.MODIFIER);
			impactModerateOrHigh |= (variantEffect.getEffectImpact() == EffectImpact.MODERATE)
					|| (variantEffect.getEffectImpact() == EffectImpact.HIGH);

			outputFormatter.add(variantEffect);
			countEffects++;
		}

		// Finish up this section
		outputFormatter.printSection(variant);

		// Output protein changes to FASTA file
		if (fastaProt != null && impactModerateOrHigh)
			proteinAltSequence(variant, variantEffects);

		return impactLowOrHigher;
	}

	/**
	 * Compare two genotypes
	 */
	void annotateVariantCancer(List variants, int altGtNum, int refGtNum) {
		VariantNonRef varNonRef = variantCancer(variants, altGtNum, refGtNum);

		// No net variation? Skip
		if (!varNonRef.isVariant())
			return;

		// Calculate effects
		VariantEffects variantEffects = snpEffectPredictor.variantEffect(varNonRef);

		// Create new 'section'
		outputFormatter.startSection(varNonRef);

		// Show results (note, we don't add these to the statistics)
		for (VariantEffect variantEffect : variantEffects)
			outputFormatter.add(variantEffect);

		// Finish up this section
		outputFormatter.printSection(varNonRef);
	}

	/**
	 * Do we analyze cancer samples? Here we deal with Somatic vs Germline
	 * comparisons
	 */
	void annotateVariantCancer(List variants, VcfEntry vcfEntry) {
		if (!shouldAnnotateVariantCancer(variants, vcfEntry))
			return;

		// Calculate all required comparisons
		Set> comparisons = pedigree.compareCancerGenotypes(vcfEntry);

		// Analyze each comparison
		for (Tuple comp : comparisons) {
			// We have to compare comp.first vs comp.second
			int altGtNum = comp.first; // comp.first is 'derived' (our new ALT)
			int refGtNum = comp.second; // comp.second is 'original' (our new REF)
			annotateVariantCancer(variants, altGtNum, refGtNum);
		}
	}

	/**
	 * Iterate on all inputs (VCF) and calculate effects. Note: This is used only on
	 * input format VCF, which has a different iteration modality
	 */

	VcfFileIterator annotateVcf(String inputFile) {
		return annotateVcf(new VcfFileIterator(inputFile, genome));
	}

	public VcfFileIterator annotateVcf(VcfFileIterator vcfFile) {
		// Open VCF file
		vcfFile.setDebug(debug);

		// Iterate over VCF entries
		for (VcfEntry vcfEntry : vcfFile)
			annotate(vcfEntry);

		// Empty file? Show at least the header
		if (countVcfEntries == 0)
			outputFormatter.print(vcfFile.getVcfHeader().toString());

		// Show errors and warnings
		if (verbose) {
			if (!errByType.isEmpty())
				System.err.println(
						"\nERRORS: Some errors were detected\nError type\tNumber of errors\n" + errByType + "\n");
			if (!warnByType.isEmpty())
				System.err.println("\nWARNINGS: Some warning were detected\nWarning type\tNumber of warnings\n"
						+ warnByType + "\n");
		}

		return vcfFile;
	}

	// /**
	// * Multi-threaded iteration on VCF inputs and calculates effects.
	// * Note: This is used only on input format VCF, which has a different
	// iteration modality
	// */
	// VcfFileIterator annotateVcfMulti(String inputFile, final OutputFormatter
	// outputFormatter) {
	// if (verbose) Timer.showStdErr("Running multi-threaded mode (numThreads=" +
	// numWorkers + ").");
	//
	// outputFormatter.setShowHeader(false); // Master process takes care of the
	// header (instead of outputFormatter). Otherwise you get the header printed one
	// time per worker.
	//
	// // We need final variables for the inner class
	// final SnpEffectPredictor snpEffectPredictor = config.getSnpEffectPredictor();
	// final VcfOutputFormatter vcfOutForm = (VcfOutputFormatter) outputFormatter;
	// final SnpEffCmdEff snpEffCmdEff = this;
	//
	// VcfFileIterator vcf = new VcfFileIterator(inputFile, config.getGenome());
	//
	// // Master factory
	// Props props = new Props(new UntypedActorFactory() {
	//
	// private static final long serialVersionUID = 1L;
	//
	// @Override
	// public Actor create() {
	// MasterEff master = new MasterEff(numWorkers, snpEffCmdEff,
	// snpEffectPredictor, outputFormatter, filterIntervals);
	// master.setAddHeader(vcfOutForm.getNewHeaderLines().toArray(new String[0]));
	// return master;
	// }
	// });
	//
	// // Create and run queue
	// int batchSize = 10;
	// VcfWorkQueue vcfWorkQueue = new VcfWorkQueue(inputFile, config, batchSize,
	// -1, props);
	// vcfWorkQueue.run(true);
	//
	// return vcf;
	// }

	public VariantEffectStats getChangeEffectResutStats() {
		return variantEffectStats;
	}

	public int getTotalErrs() {
		return totalErrs;
	}

	public VariantStats getvariantStats() {
		return variantStats;
	}

	/**
	 * Create a suitable output file name
	 */
	String outputFile(String inputFile) {
		// Remove GZ extension
		String base = Gpr.baseName(inputFile, ".gz");

		// Remove extension according to input format
		switch (inputFormat) {
		case VCF:
			base = Gpr.baseName(inputFile, ".vcf");
			break;
		case BED:
			base = Gpr.baseName(inputFile, ".bed");
			break;
		default:
			throw new RuntimeException("Unimplemented option for input file type " + inputFormat);
		}

		String outputFile = Gpr.dirName(inputFile) + "/" + base + ".eff";

		// Add extension according to output format
		switch (outputFormat) {
		case BED:
		case BEDANN:
			outputFile += ".bed";
			break;
		case VCF:
		case GATK:
			outputFile += ".vcf";
			break;
		default:
			throw new RuntimeException("Unimplemented option for output file type " + outputFormat);
		}

		// Create summary file names
		if (createSummaryCsv)
			summaryFileCsv = Gpr.dirName(inputFile) + "/" + base + "_summary.csv";
		if (createSummaryHtml)
			summaryFileHtml = Gpr.dirName(inputFile) + "/" + base + "_summary.html";
		summaryGenesFile = Gpr.dirName(inputFile) + "/" + base + "_genes.txt";

		return outputFile;
	}

	/**
	 * Parse command line arguments
	 */
	@Override
	public void parseArgs(String[] args) {
		if (args == null)
			return;

		boolean isFileList = false;
		this.args = args;

		for (int i = 0; i < args.length; i++) {
			String arg = args[i];

			// Is it a command line option?
			// Note: Generic options (such as config, verbose, debug, quiet, etc.) are
			// parsed by SnpEff class
			// ---
			if (isOpt(arg)) {
				if (arg.equalsIgnoreCase("-fileList"))
					isFileList = true;
				else {
					arg = arg.toLowerCase();

					switch (arg) {
					// ---
					// Output options
					// ---
					case "-chr":
						chrStr = args[++i];
						break;

					case "-csvstats":
						createSummaryCsv = true; // Create a CSV formatted summary file.
						if ((i + 1) < args.length) {
							summaryFileCsv = args[++i];
							String base = Gpr.baseName(summaryFileCsv, ".csv");
							String dir = Gpr.dirName(summaryFileCsv);
							summaryGenesFile = (dir != null ? dir + "/" : "") + base + ".genes.txt";
						} else
							usage("Missing parameter: CSV stats file name ");
						break;

					case "-fastaprot":
						if ((i + 1) < args.length)
							fastaProt = args[++i]; // Output protein sequences in fasta files
						else
							usage("Missing -cancerSamples argument");
						break;

					case "-nochromoplots":
						chromoPlots = false;
						break;

					case "-nostats":
						createSummaryHtml = createSummaryCsv = false;
						break;

					case "-o": // Output format
						if ((i + 1) < args.length) {
							String outFor = args[++i].toUpperCase();

							// if (outFor.equals("TXT")) outputFormat = OutputFormat.TXT;
							if (outFor.equals("VCF"))
								outputFormat = OutputFormat.VCF;
							else if (outFor.equals("GATK")) {
								outputFormat = OutputFormat.GATK;
								useSequenceOntology = false;
								hgvs = false;
								nextProt = false;
								motif = false;

								// GATK doesn't support SPLICE_REGION at the moment.
								// Set parameters to zero so that splcie regions are not created.
								spliceRegionExonSize = spliceRegionIntronMin = spliceRegionIntronMax = 0;
							} else if (outFor.equals("BED")) {
								outputFormat = OutputFormat.BED;
								lossOfFunction = false;
							} else if (outFor.equals("BEDANN")) {
								outputFormat = OutputFormat.BEDANN;
								lossOfFunction = false;
							} else if (outFor.equals("TXT"))
								usage("Output format 'TXT' has been deprecated. Please use 'VCF' instead.\nYou can extract VCF fields to a TXT file using 'SnpSift extractFields' (http://snpeff.sourceforge.net/SnpSift.html#Extract).");
							else
								usage("Unknown output file format '" + outFor + "'");
						}
						break;

					case "-s":
					case "-stats":
					case "-htmlstats":
						createSummaryHtml = true;
						if ((i + 1) < args.length) {
							summaryFileHtml = args[++i];
							String base = Gpr.baseName(summaryFileHtml, ".html");
							String dir = Gpr.dirName(summaryFileHtml);
							summaryGenesFile = (dir != null ? dir + "/" : "") + base + ".genes.txt";
						} else
							usage("Missing parameter: HTML stats file name ");
						break;

					case "-uselocaltemplate": // Undocumented option (only used for development & debugging)
						useLocalTemplate = true;
						break;

					// ---
					// Annotation options
					// ---
					case "-cancer":
						cancer = true; // Perform cancer comparisons
						break;

					case "-cancersamples":
						if ((i + 1) < args.length)
							cancerSamples = args[++i]; // Read cancer samples from TXT files
						else
							usage("Missing -cancerSamples argument");
						break;

					case "-classic":
						useSequenceOntology = false;
						formatVersion = EffFormatVersion.FORMAT_EFF_4;
						hgvs = hgvsForce;
						break;

					case "-formateff":
						formatVersion = EffFormatVersion.FORMAT_EFF_4;
						break;

					case "-geneid":
						useGeneId = true; // Use gene ID instead of gene name
						break;

					case "-lof":
						lossOfFunction = true; // Add LOF tag
						break;

					case "-nohgvs":
						hgvs = false; // Do not use HGVS notation
						hgvsShift = false;
						break;

					case "-noshifthgvs":
					case "-no_shift_hgvs":
						hgvsShift = false;
						break;

					case "-nolof":
						lossOfFunction = false; // Do not add LOF tag
						break;

					case "-oicr":
						useOicr = true; // Use OICR tag
						break;

					case "-sequenceontology":
						useSequenceOntology = true; // Use SO temrs
						break;

					// ---
					// Input options
					// ---
					case "-fi":
					case "-filterinterval":
						if ((i + 1) < args.length)
							filterIntervalFiles.add(args[++i]);
						else
							usage("Option '-fi' without config filter_interval_file argument");
						break;

					case "-i":
						// Input format
						if ((i + 1) < args.length) {
							String inFor = args[++i].toUpperCase();

							if (inFor.equals("VCF")) {
								inputFormat = InputFormat.VCF;
								outputFormat = OutputFormat.VCF;
							} else if (inFor.equals("BED")) {
								inputFormat = InputFormat.BED;
								outputFormat = OutputFormat.BED;
								lossOfFunction = false;
							} else if (inFor.equals("TXT"))
								usage("Input format 'TXT' has been deprecated. Please use 'VCF' instead.");
							else
								usage("Unknown input file format '" + inFor + "'");
						} else
							usage("Missing input format in command line option '-i'");
						break;

					// ---
					// Filters
					// ---
					case "-no-downstream":
						variantEffectResutFilter.add(EffectType.DOWNSTREAM);
						break;

					case "-no-upstream":
						variantEffectResutFilter.add(EffectType.UPSTREAM);
						break;

					case "-no-intergenic":
						variantEffectResutFilter.add(EffectType.INTERGENIC);
						break;

					case "-no-intron":
						variantEffectResutFilter.add(EffectType.INTRON);
						break;

					case "-no-utr":
						variantEffectResutFilter.add(EffectType.UTR_3_PRIME);
						variantEffectResutFilter.add(EffectType.UTR_3_DELETED);
						variantEffectResutFilter.add(EffectType.UTR_5_PRIME);
						variantEffectResutFilter.add(EffectType.UTR_5_DELETED);
						break;

					case "-no":
						String filterOut = "";
						if ((i + 1) < args.length)
							filterOut = args[++i];

						String filterOutArray[] = filterOut.split(",");
						for (String filterStr : filterOutArray) {
							if (filterStr.equalsIgnoreCase("utr")) {
								variantEffectResutFilter.add(EffectType.UTR_3_PRIME);
								variantEffectResutFilter.add(EffectType.UTR_3_DELETED);
								variantEffectResutFilter.add(EffectType.UTR_5_PRIME);
								variantEffectResutFilter.add(EffectType.UTR_5_DELETED);
							} else if (filterStr.equalsIgnoreCase("None"))
								; // OK, nothing to do
							else
								variantEffectResutFilter.add(EffectType.valueOf(filterStr.toUpperCase()));
						}
						break;

					default:
						usage("Unknown option '" + arg + "'");
					}
				}
			} else if (genomeVer.isEmpty())
				genomeVer = arg;
			else if (inputFile.isEmpty())
				inputFile = arg;
			else
				usage("Unknown parameter '" + arg + "'");
		}

		// ---
		// Sanity checks
		// ---

		// Check: Do we have all required parameters?
		if (genomeVer == null || genomeVer.isEmpty())
			usage("Missing genomer_version parameter");

		// Check input file
		if (inputFile.isEmpty())
			inputFile = "-"; // Use STDIN as default
		else if (!Gpr.canRead(inputFile))
			usage("Cannot read input file '" + inputFile + "'");

		// Read input files from file list?
		if (isFileList) {
			inputFiles = new ArrayList<>();
			for (String file : readFile(inputFile).split("\n"))
				inputFiles.add(file);
		}

		// Sanity checks for VCF output format
		boolean isOutVcf = (outputFormat == OutputFormat.VCF) || (outputFormat == OutputFormat.GATK);
		if (isOutVcf && (inputFormat != InputFormat.VCF))
			usage("Output in VCF format is only supported when the input is also in VCF format");
		if (!isOutVcf && lossOfFunction)
			usage("Loss of function annotation is only supported when when output is in VCF format");
		if (!isOutVcf && cancer)
			usage("Canccer annotation is only supported when when output is in VCF format");

		// Sanity check for multi-threaded version
		if (multiThreaded) {
			createSummaryHtml = false; // This is implied ( '-t' => '-noStats' )
			createSummaryCsv = false;
		}
		if (multiThreaded && cancer)
			usage("Cancer analysis is currently not supported in multi-threaded mode.");
		if (multiThreaded && !isOutVcf)
			usage("Multi-threaded option is only supported when when output is in VCF format");
		if (multiThreaded && (createSummaryHtml || createSummaryCsv))
			usage("Multi-threaded option should be used with 'noStats'.");
	}

	/**
	 * Append ALT protein sequence to 'fastaProt' file
	 */
	void proteinAltSequence(Variant var, VariantEffects variantEffects) {
		Set doneTr = new HashSet<>();
		for (VariantEffect varEff : variantEffects) {
			Transcript tr = varEff.getTranscript();
			if (tr == null || doneTr.contains(tr))
				continue;

			// Calculate sequence after applying variant
			Transcript trAlt = tr.apply(var);

			// Build fasta entries and append to file
			StringBuilder sb = new StringBuilder();
			sb.append(">" + tr.getId() + " Ref\n" + tr.protein() + "\n");
			sb.append(">" + tr.getId() + " Variant " //
					+ var.getChromosomeName() //
					+ ":" + (var.getStart() + 1) //
					+ "-" + (var.getEnd() + 1) //
					+ " Ref:" + var.getReference() //
					+ " Alt:" + var.getAlt() //
					+ " HGVS.p:" + varEff.getHgvsProt() //
					+ "\n" //
					+ trAlt.protein() + "\n" //
			);
			Gpr.toFile(fastaProt, sb, true);

			doneTr.add(tr);
		}
	}

	/**
	 * Read a file after checking for some common error conditions
	 */
	String readFile(String fileName) {
		File file = new File(fileName);
		if (!file.exists())
			fatalError("No such file '" + fileName + "'");
		if (!file.canRead())
			fatalError("Cannot open file '" + fileName + "'");
		return Gpr.readFile(fileName);
	}

	/**
	 * Read a filter custom interval file
	 */
	int readFilterIntFile(String intFile) {
		Markers markers = loadMarkers(intFile);

		if (filterIntervals == null)
			filterIntervals = new IntervalForest();
		for (Marker filterInterval : markers)
			filterIntervals.add(filterInterval);

		return markers.size();
	}

	/**
	 * Read pedigree either from VCF header or from cancerSample file
	 */
	Pedigree readPedigree(VcfFileIterator vcfFile) {
		if (cancerSamples != null)
			return new Pedigree(vcfFile, cancerSamples);
		return new Pedigree(vcfFile);
	}

	@Override
	public HashMap reportValues() {
		HashMap report = super.reportValues();
		if (variantStats != null)
			report.put("variants", variantStats.getCount() + "");
		return report;
	}

	/**
	 * Run according to command line options
	 */
	@Override
	public boolean run() {
		return run(false) != null;
	}

	/**
	 * Run according to command line options
	 */
	public List run(boolean createList) {
		// ---
		// Prepare to run
		// ---

		// Nothing to filter out => don't waste time
		if (!variantEffectResutFilter.anythingSet())
			variantEffectResutFilter = null;

		filterIntervals = null;

		loadConfig(); // Read config file
		loadDb(); // Load database

		// Check if we can open the input file (no need to check if it is STDIN)
		if (!Gpr.canRead(inputFile))
			usage("Cannot open input file '" + inputFile + "'");

		// Read filter interval files
		for (String filterIntFile : filterIntervalFiles) {
			if (filterIntervals == null)
				filterIntervals = new IntervalForest();
			if (verbose)
				Timer.showStdErr("Reading filter interval file '" + filterIntFile + "'");
			int count = readFilterIntFile(filterIntFile);
			if (verbose)
				Timer.showStdErr("done (" + count + " intervals loaded). ");
		}

		// Build interval forest for filter (if any)
		if (filterIntervals != null) {
			if (verbose)
				Timer.showStdErr("Building filter interval forest");
			filterIntervals.build();
			if (verbose)
				Timer.showStdErr("done.");
		}

		// Store VCF results in a list?
		if (createList)
			vcfEntriesDebug = new ArrayList<>();

		// Predict
		boolean ok = true;
		if (verbose)
			Timer.showStdErr("Predicting variants");
		if (inputFiles == null) {
			// Single input file, output to STDOUT (typical usage)
			ok = annotate(inputFile, null);
		} else {
			// Multiple input and output files
			for (String inputFile : inputFiles) {
				String outputFile = outputFile(inputFile);
				if (verbose)
					Timer.showStdErr("Analyzing file" //
							+ "\n\tInput         : '" + inputFile + "'" //
							+ "\n\tOutput        : '" + outputFile + "'" //
							+ (createSummaryHtml ? "\n\tSummary (HTML): '" + summaryFileHtml + "'" : "") //
							+ (createSummaryCsv ? "\n\tSummary (CSV) : '" + summaryFileCsv + "'" : "") //
					);
				ok &= annotate(inputFile, outputFile);
			}
		}
		if (verbose)
			Timer.showStdErr("done.");

		if (!ok)
			return null;
		if (vcfEntriesDebug == null)
			return new ArrayList<>();
		return vcfEntriesDebug;
	}

	public void setFormatVersion(EffFormatVersion formatVersion) {
		this.formatVersion = formatVersion;
	}

	/**
	 * Should we annotate cancer variants?
	 */
	boolean shouldAnnotateVariantCancer(List variants, VcfEntry vcfEntry) {
		if (vcfEntry.isMultiallelic())
			return true;
		// Bi-allelic are analyzed only if there are "back to REF" mutation
		return pedigree.anyBackToRef(vcfEntry);
	}

	/**
	 * Show annotation progress
	 */
	void showProgress() {
		countVariants++;
		if (verbose && (countVariants % SHOW_EVERY == 0)) {
			int millisec = ((int) annotateTimer.elapsed());
			int secs = millisec / 1000;
			if (secs > 0) {
				int varsPerSec = (int) (countVariants * 1000.0 / millisec);
				Timer.showStdErr("\t" + countVariants + " variants (" + varsPerSec + " variants per second), "
						+ countVcfEntries + " VCF entries");
			}
		}
	}

	/**
	 * Creates a summary output file (using freeMarker and a template)
	 */
	boolean summary(String templateFile, String outputFile, boolean noCommas) {
		try {
			// Configure FreeMaker
			Configuration cfg = new Configuration();

			// Specify the data source where the template files come from
			if (useLocalTemplate)
				cfg.setDirectoryForTemplateLoading(new File("./templates/")); // Use local 'template' directory
			else
				cfg.setClassForTemplateLoading(SnpEffCmdEff.class, "/"); // Use current directory in JAR file

			cfg.setObjectWrapper(new DefaultObjectWrapper()); // Specify how templates will see the data-model. This is
																// an advanced topic...
			cfg.setLocale(java.util.Locale.US);
			if (noCommas)
				cfg.setNumberFormat("0.######");

			// Create the root hash (where data objects are)
			HashMap root = summaryCreateHash();

			// Get the template
			Template temp = cfg.getTemplate(templateFile);

			// Process the template
			Writer out = new OutputStreamWriter(new FileOutputStream(new File(outputFile)));
			temp.process(root, out);
			out.flush();
			out.close();
		} catch (IOException e) {
			error(e, "Error creating summary: " + e.getMessage());
			return false;
		} catch (TemplateException e) {
			error(e, "Error creating summary: " + e.getMessage());
			return false;
		}

		return true;
	}

	/**
	 * Create a hash with all variables needed for creating summary pages
	 */
	HashMap summaryCreateHash() {
		// Create the root hash (where data objects are)
		HashMap root = new HashMap<>();
		root.put("args", commandLineStr(createSummaryCsv ? false : true));
		root.put("changeStats", variantEffectStats);
		root.put("chromoPlots", chromoPlots);
		root.put("countEffects", countEffects);
		root.put("countInputLines", countInputLines);
		root.put("countVariants", countVariants);
		root.put("date", String.format("%1$TY-%1$Tm-%1$Td %1$TH:%1$TM", new Date()));
		root.put("genesFile", Gpr.baseName(summaryGenesFile, ""));
		root.put("genome", config.getGenome());
		root.put("genomeVersion", genomeVer);
		root.put("variantEffectResutFilter", variantEffectResutFilter);
		root.put("variantStats", variantStats);
		root.put("snpEffectPredictor", config.getSnpEffectPredictor());
		root.put("vcfStats", vcfStats);
		root.put("version", SnpEff.VERSION); // Version used

		return root;
	}

	/**
	 * Show 'usage;' message and exit with an error code '-1'
	 * 
	 * @param message
	 */
	@Override
	public void usage(String message) {
		if (message != null) {
			System.err.println("Error        :\t" + message);
			System.err.println("Command line :\t" + commandLineStr(false) + "\n");
		}

		System.err.println("snpEff version " + VERSION);
		System.err.println("Usage: snpEff [eff] [options] genome_version [input_file]");
		System.err.println("\n");
		System.err.println("\tvariants_file                   : Default is STDIN");
		System.err.println("\n");
		System.err.println("\nOptions:");
		System.err.println(
				"\t-chr                    : Prepend 'string' to chromosome name (e.g. 'chr1' instead of '1'). Only on TXT output.");
		System.err.println(
				"\t-classic                        : Use old style annotations instead of Sequence Ontology and Hgvs.");
		System.err.println("\t-csvStats                 : Create CSV summary file.");
		System.err.println(
				"\t-download                       : Download reference genome if not available. Default: " + download);
		System.err.println("\t-i                      : Input format [ vcf, bed ]. Default: VCF.");
		System.err.println("\t-fileList                       : Input actually contains a list of files to process.");
		System.err
				.println("\t-o                      : Ouput format [ vcf, gatk, bed, bedAnn ]. Default: VCF.");
		System.err.println("\t-s , -stats, -htmlStats         : Create HTML summary file.  Default is '"
				+ DEFAULT_SUMMARY_HTML_FILE + "'");
		System.err.println("\t-noStats                        : Do not create stats (summary) file");
		System.err.println("\nResults filter options:");
		System.err.println(
				"\t-fi , -filterInterval     : Only analyze changes that intersect with the intervals specified in this file (you may use this option many times)");
		System.err.println("\t-no-downstream                  : Do not show DOWNSTREAM changes");
		System.err.println("\t-no-intergenic                  : Do not show INTERGENIC changes");
		System.err.println("\t-no-intron                      : Do not show INTRON changes");
		System.err.println("\t-no-upstream                    : Do not show UPSTREAM changes");
		System.err.println("\t-no-utr                         : Do not show 5_PRIME_UTR or 3_PRIME_UTR changes");
		System.err.println(
				"\t-no                 : Do not show 'EffectType'. This option can be used several times.");
		System.err.println("\nAnnotations options:");
		System.err.println(
				"\t-cancer                         : Perform 'cancer' comparisons (Somatic vs Germline). Default: "
						+ cancer);
		System.err.println(
				"\t-cancerSamples            : Two column TXT file defining 'oringinal \\t derived' samples.");
		System.err.println(
				"\t-fastaProt                : Create an output file containing the resulting protein sequences.");
		System.err.println(
				"\t-formatEff                      : Use 'EFF' field compatible with older versions (instead of 'ANN').");
		System.err
				.println("\t-geneId                         : Use gene ID instead of gene name (VCF output). Default: "
						+ useGeneId);
		System.err.println(
				"\t-hgvs                           : Use HGVS annotations for amino acid sub-field. Default: " + hgvs);
		System.err.println("\t-hgvsOld                        : Use old HGVS notation. Default: " + hgvsOld);
		System.err.println(
				"\t-hgvs1LetterAa                  : Use one letter Amino acid codes in HGVS notation. Default: "
						+ hgvsOneLetterAa);
		System.err.println(
				"\t-hgvsTrId                       : Use transcript ID in HGVS notation. Default: " + hgvsTrId);
		System.err.println(
				"\t-lof                            : Add loss of function (LOF) and Nonsense mediated decay (NMD) tags.");
		System.err.println("\t-noHgvs                         : Do not add HGVS annotations.");
		System.err.println("\t-noLof                          : Do not add LOF and NMD annotations.");
		System.err.println(
				"\t-noShiftHgvs                    : Do not shift variants according to HGVS notation (most 3prime end).");
		System.err.println("\t-oicr                           : Add OICR tag in VCF file. Default: " + useOicr);
		System.err.println(
				"\t-sequenceOntology               : Use Sequence Ontology terms. Default: " + useSequenceOntology);

		usageGenericAndDb();

		System.exit(-1);
	}

	/**
	 * Create a cancer variant using alt and ref genotypes
	 */
	VariantNonRef variantCancer(List variants, int altGtNum, int refGtNum) {
		// Is this a "back to reference" variant?
		// Example:
		// Germline: 'A' -> 'T'
		// Somatic: 'T' -> 'A' (i.e. reverses the gerline mutation to the genome
		// reference)
		if (altGtNum == 0) {
			Variant variantRef = variants.get(refGtNum - 1); // After applying this variant, we get the new 'reference'
			Variant variantAlt = variantRef.reverse(); // This our new 'variant'. The effect of this variant is "back to
														// reference"
			return new VariantNonRef(variantAlt, variantRef);
		}

		Variant variantRef = variants.get(refGtNum - 1); // After applying this variant, we get the new 'reference'
		Variant variantAlt = variants.get(altGtNum - 1); // This our new 'variant'
		return new VariantNonRef(variantAlt, variantRef);
	}

}