
org.snpeff.snpEffect.testCases.integration.CompareToVep Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.snpEffect.testCases.integration;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import org.junit.Assert;
import org.snpeff.SnpEff;
import org.snpeff.fileIterator.VcfFileIterator;
import org.snpeff.interval.Transcript;
import org.snpeff.snpEffect.EffectType;
import org.snpeff.snpEffect.commandLine.SnpEffCmdEff;
import org.snpeff.vcf.EffFormatVersion;
import org.snpeff.vcf.VcfConsequence;
import org.snpeff.vcf.VcfConsequenceHeader;
import org.snpeff.vcf.VcfEffect;
import org.snpeff.vcf.VcfEntry;
/**
* Compare our results to ENSEML's Variant Effect predictor's output
*
* @author pcingola
*/
public class CompareToVep {
boolean debug = false;
boolean verbose = false;
boolean compareEffect = true;
boolean compareHgvsDna = false;
boolean compareHgvsProt = false;
boolean onlyProtein = false;
boolean shiftHgvs = false;
boolean strict = false;
boolean throwException = true;
String genomeName;
String addArgs[];
VcfConsequenceHeader vcfCsqHeader;
SnpEffCmdEff cmdEff;
int countHgvsDna, countHgvsProt, countEff;
public CompareToVep(String genomeName, boolean verbose) {
this.genomeName = genomeName;
this.verbose = verbose;
}
public CompareToVep(String genomeName, String addArgs[], boolean verbose) {
this.genomeName = genomeName;
this.addArgs = addArgs;
this.verbose = verbose;
}
/**
* Create command line arguments
*/
String[] args(String genomeName, String vcf) {
List args = new LinkedList();
if (addArgs != null) {
for (String arg : addArgs)
args.add(arg);
}
args.add("-noStats");
args.add("-noLog");
if (strict) args.add("-strict");
if (onlyProtein) args.add("-onlyProtein");
if (!shiftHgvs) args.add("-noShiftHgvs");
args.add("-formatEff");
args.add(genomeName);
args.add(vcf);
return args.toArray(new String[0]);
}
boolean canCompare(VcfEffect eff, VcfConsequence csq) {
if (compareHgvsDna) {
// These do not produce HGSV notation, so we cannot compare them
if (eff.getEffectType() == EffectType.DOWNSTREAM || eff.getEffectType() == EffectType.UPSTREAM) return false;
}
return true;
}
public boolean checkComapred() {
if (compareHgvsDna || compareHgvsProt) return (countHgvsDna + countHgvsProt) > 0;
return countEff > 0;
}
/**
* Compare two lists of results
*/
boolean compare(List effs, List csqs) {
HashSet trIds = new HashSet();
for (VcfEffect eff : effs)
trIds.add(eff.getTranscriptId());
// All transcripts have to match (at least one effect)
for (String trId : trIds) {
boolean match = compare(effs, csqs, trId);
if (!match) return false;
}
return true;
}
/**
* Compare two lists of results, focusing only on transcript 'trId'
*/
boolean compare(List effs, List csqs, String trId) {
if (trId == null) return true;
boolean ok = false;
if (verbose) {
Transcript tr = cmdEff.getConfig().getSnpEffectPredictor().getTranscript(trId);
if (tr != null) {
System.out.println("\n\t\tTranscript : '" + tr.getId() + "'");
System.out.println("\t\tStrand : '" + (tr.isStrandPlus() ? "+" : "-") + "'");
System.out.println("\t\tCDS : '" + tr.cds() + "'");
System.out.println("\t\tProtein : '" + tr.protein() + "'");
} else System.out.println("Transcript " + trId + " not found.");
}
// At least one effect has to match for this transcript
for (VcfEffect eff : effs)
if (trId.equals(eff.getTranscriptId())) {
boolean match = compare(eff, csqs);
if (verbose) {
String matched = match ? "OK" : "NO";
System.out.println("\t\t\t" + matched + " Match");
}
ok |= match;
}
return ok;
}
/**
* Compare two SO terms, return true if they match
*/
boolean compare(String effSo, String csqSo) {
if (effSo.equals(csqSo)) return true;
if (effSo.equals("inframe_deletion") && csqSo.equals("feature_truncation")) return true;
if (effSo.equals("conservative_inframe_insertion") && csqSo.equals("inframe_insertion")) return true;
if (effSo.equals("disruptive_inframe_insertion") && csqSo.equals("inframe_insertion")) return true;
if (effSo.equals("conservative_inframe_deletion") && csqSo.equals("inframe_deletion")) return true;
if (effSo.equals("disruptive_inframe_deletion") && csqSo.equals("inframe_deletion")) return true;
if (effSo.equals("conservative_inframe_deletion") && csqSo.equals("feature_truncation")) return true;
if (effSo.equals("disruptive_inframe_deletion") && csqSo.equals("feature_truncation")) return true;
if (effSo.equals("synonymous_variant") && csqSo.equals("coding_sequence_variant")) return true;
if (effSo.equals("non_coding_transcript_exon_variant") && csqSo.equals("NMD_transcript_variant")) return true;
return false;
}
/**
* Compare a single SnpEff results to a list of CSQs (ENSEMBL's VEP results)
* @return true if 'eff' matches any CSQ
*/
boolean compare(VcfEffect eff, List csqs) {
String effStr = eff.getEffectsStrSo();
boolean foundTranscript = false;
// Split all effects
for (String et : effStr.split("\\" + EffFormatVersion.EFFECT_TYPE_SEPARATOR_OLD)) {
if (verbose) System.out.println("\t\t" + et + "\t" + eff.getTranscriptId());
// Match all consequences
for (VcfConsequence csq : csqs) {
// Check in same transcript
if (csq.getFeature().equals(eff.getTranscriptId())) {
if (canCompare(eff, csq)) {
foundTranscript = true;
if (compare(eff, csq)) return true;
}
}
}
}
return !foundTranscript; // If transcript was not found, then no match is expected
}
/**
* Comparison type dispatcher
*/
boolean compare(VcfEffect eff, VcfConsequence csq) {
if (compareEffect) return compareEffect(eff, csq);
if (compareHgvsDna || compareHgvsProt) return compareHgvs(eff, csq);
throw new RuntimeException("Nothing to compare!");
}
/**
* Compare a single effect to CSQ
*/
boolean compareEffect(VcfEffect eff, VcfConsequence csq) {
String effectTypes[] = eff.getEffectTypesStr().split("\\" + EffFormatVersion.EFFECT_TYPE_SEPARATOR_OLD);
String consecuences[] = csq.getConsequence().split("&");
for (String et : effectTypes) {
for (String cons : consecuences) {
if (compare(et, cons)) {
countEff++;
if (verbose) System.out.println("\t\t\tOK :" + eff.getTranscriptId() + "\t" + et + "\t" + cons);
return true;
}
if (verbose) System.out.println("\t\t\t " + eff.getTranscriptId() + "\t" + et + "\t" + cons);
}
}
return false;
}
/**
* Compare a single effect to CSQ
*/
boolean compareHgvs(VcfEffect eff, VcfConsequence csq) {
if (verbose) {
String effHgsvDna = eff.getHgvsDna();
String effHgsvProt = eff.getHgvsProt();
System.out.println("\t\t\teff : " + eff.getEffectTypesStr() //
+ "\n\t\t\tcsq : " + csq.getConsequence() //
+ "\n\t\t\ttrId : " + eff.getTranscriptId() + "\t" + csq.getFeature() //
+ "\n\t\t\thgsv.c : '" + effHgsvDna + "'\t'" + csq.getHgvsDna() + "'\t" + (compareHgvsDna(eff, csq) ? "OK" : "BAD") //
+ "\n\t\t\thgsv.p : '" + effHgsvProt + "'\t'" + csq.getHgvsProt() + "'\t" + (compareHgvsProt(eff, csq) ? "OK" : "BAD") //
+ "\n" //
);
}
return (!compareHgvsDna || compareHgvsDna(eff, csq)) //
&& //
(!compareHgvsProt || compareHgvsProt(eff, csq));
}
/**
* Compare HGSV DNA
*/
boolean compareHgvsDna(VcfEffect eff, VcfConsequence csq) {
String effHgsv = eff.getHgvsDna();
String csqHgvs = csq.getHgvsDna();
if (csqHgvs.isEmpty() && (effHgsv == null || effHgsv.isEmpty())) return true;
if (!csqHgvs.isEmpty() && (effHgsv == null || effHgsv.isEmpty())) return false;
if (csqHgvs.isEmpty() && (effHgsv != null || !effHgsv.isEmpty())) return false;
csqHgvs = csqHgvs.substring(csqHgvs.indexOf(':') + 1);
boolean eq = csqHgvs.equals(effHgsv);
if (eq) countHgvsDna++;
return eq;
}
/**
* Compare HGSV Protein
*/
boolean compareHgvsProt(VcfEffect eff, VcfConsequence csq) {
String effHgsv = eff.getHgvsProt();
String csqHgvs = csq.getHgvsProt();
if (csqHgvs.isEmpty() && (effHgsv == null || effHgsv.isEmpty())) return true;
if (!csqHgvs.isEmpty() && (effHgsv == null || effHgsv.isEmpty())) return false;
if (csqHgvs.isEmpty() && (effHgsv != null || !effHgsv.isEmpty())) return false;
// This seems to be a bug in ENSEMBL's VEP
// E.g.: 'ENST00000241356.4:c.945G>A(p.%3D)'
if (csqHgvs.endsWith("(p.%3D)")) return true;
csqHgvs = csqHgvs.substring(csqHgvs.indexOf(':') + 1);
boolean eq = csqHgvs.equals(effHgsv);
// We use short frame-shift description, whereas CSQ uses long terminology
if (csqHgvs.indexOf("fs") > 0 && effHgsv.endsWith("fs")) {
String effHgsvFs = effHgsv.substring(0, effHgsv.length() - 2); // Remove trailing 'fs'
String csqHgsvFs = csqHgvs.substring(0, csqHgvs.indexOf("fs") - 3); // Remove last part, including last AA before 'fs' (3 leter code)
eq = csqHgsvFs.equals(effHgsvFs);
}
if (eq) countHgvsProt++;
return eq;
}
/**
* Compare all VCF entries
*/
public void compareVep(String vcf) {
if (verbose) System.out.println(this.getClass().getSimpleName() + ": Compare VEP, genome " + genomeName + ", file " + vcf);
parseCsqHeader(vcf);
// Run and compare VCF entries
List vcfEnties = runSnpEff(args(genomeName, vcf));
for (VcfEntry ve : vcfEnties) {
List csqs = VcfConsequence.parse(vcfCsqHeader, ve);
List effs = ve.getVcfEffects();
if (verbose) {
System.out.println(ve);
System.out.println("\tCSQ:");
for (VcfConsequence csq : csqs)
System.out.println("\t\t" + csq);
System.out.println("\tEFF:");
for (VcfEffect eff : effs)
System.out.println("\t\t" + eff);
System.out.println("\tCompare:");
}
Assert.assertTrue("EFF and CSQ do not match", compare(effs, csqs));
}
}
void parseCsqHeader(String vcfFileName) {
VcfFileIterator vcf = new VcfFileIterator(vcfFileName);
vcf.next();
vcfCsqHeader = new VcfConsequenceHeader(vcf);
}
/**
* Run SnpEFf and return a list of results
*/
List runSnpEff(String[] args) {
SnpEff cmd = new SnpEff(args);
cmdEff = (SnpEffCmdEff) cmd.cmd();
cmdEff.setVerbose(verbose);
cmdEff.setSupressOutput(!verbose);
cmdEff.setDebug(debug);
cmdEff.getConfig();
List vcfEnties = cmdEff.run(true);
return vcfEnties;
}
public void setCompareHgvs() {
compareEffect = false;
compareHgvsDna = compareHgvsProt = true;
}
public void setCompareHgvsDna(boolean compareHgvsDna) {
this.compareHgvsDna = compareHgvsDna;
}
public void setCompareHgvsProt(boolean compareHgvsProt) {
this.compareHgvsProt = compareHgvsProt;
}
public void setOnlyProtein(boolean onlyProtein) {
this.onlyProtein = onlyProtein;
}
public void setShiftHgvs(boolean shiftHgvs) {
this.shiftHgvs = shiftHgvs;
}
public void setStrict(boolean strict) {
this.strict = strict;
}
@Override
public String toString() {
if (compareHgvsDna || compareHgvsProt) return "HGVS DNA OK: " + countHgvsDna + "\tHGVS protein OK: " + countHgvsProt;
return "Effects OK: " + countEff;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy