
org.snpeff.snpEffect.commandLine.SnpEffCmdLen Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.snpEffect.commandLine;
import org.snpeff.SnpEff;
import org.snpeff.interval.Chromosome;
import org.snpeff.interval.Exon;
import org.snpeff.interval.Gene;
import org.snpeff.interval.Intron;
import org.snpeff.interval.SpliceSite;
import org.snpeff.interval.SpliceSiteAcceptor;
import org.snpeff.interval.SpliceSiteDonor;
import org.snpeff.interval.Transcript;
import org.snpeff.snpEffect.SnpEffectPredictor;
import org.snpeff.stats.ReadsOnMarkersModel;
import org.snpeff.util.Gpr;
import org.snpeff.util.Timer;
/**
* Calculate the maximum interval length by type, for all markers in a genome
*
*
* @author pcingola
*/
public class SnpEffCmdLen extends SnpEff {
int readLength, numIterations, numReads;
SnpEffectPredictor snpEffectPredictor;
ReadsOnMarkersModel readsOnMarkersModel;
public SnpEffCmdLen() {
super();
}
/**
* Calculate effective length for all genes
*/
void effectiveCodingLength() {
if (verbose) Timer.showStdErr("Calclating gene effective coding lengths");
System.out.println("gene.name\tgene.id\tchr\tstart\tend\teffective.length\tmax.cds.length");
for (Chromosome chr : snpEffectPredictor.getGenome()) {
if (verbose) Timer.showStdErr("Effective coding lengths for chromosome " + chr.getId());
for (Gene gene : snpEffectPredictor.getGenome().getGenes()) {
if (gene.getChromosomeName().equals(chr.getId()) && gene.isProteinCoding()) {
int efflen = effectiveCodingLength(gene);
int maxcds = maxcds(gene);
System.out.println(gene.getGeneName() //
+ "\t" + gene.getId() //
+ "\t" + gene.getChromosomeName() //
+ "\t" + (gene.getStart() + 1) //
+ "\t" + (gene.getEnd() + 1) //
+ "\t" + efflen //
+ "\t" + maxcds //
);
// Sanity check
if (maxcds > efflen) throw new RuntimeException("CDS length is greter then effective length. This should never happen!");
}
}
}
}
/**
* Calculate effective length for a gene
*/
int effectiveCodingLength(Gene gene) {
// Initialize
byte coding[] = new byte[gene.size()];
for (int i = 0; i < coding.length; i++)
coding[i] = 0;
// Mark all 'used' bases
for (Transcript tr : gene) {
// Ignore non-protein coding
if (tr.isProteinCoding()) {
for (Exon ex : tr) {
// Mark all bases in exon as 'used'
for (int i = ex.getStart(); i <= ex.getEnd(); i++)
coding[i - gene.getStart()] = 1;
// Mark bases in SpliceSiteAcceptor/Donnor as 'used'
for (SpliceSite ss : ex.getSpliceSites()) {
if (ss instanceof SpliceSiteAcceptor || ss instanceof SpliceSiteDonor) {
for (int i = ss.getStart(); i <= ss.getEnd(); i++)
coding[i - gene.getStart()] = 1;
}
}
}
// Mark bases in SpliceSites as 'used'
for (Intron intr : tr.introns()) {
for (SpliceSite ss : intr.getSpliceSites()) {
if (ss instanceof SpliceSiteAcceptor || ss instanceof SpliceSiteDonor) {
for (int i = ss.getStart(); i <= ss.getEnd(); i++)
coding[i - gene.getStart()] = 1;
}
}
}
}
}
// Count all used bases
int efflen = 0;
for (int i = 0; i < coding.length; i++)
if (coding[i] > 0) efflen++;
return efflen;
}
int maxcds(Gene gene) {
int max = 0;
for (Transcript tr : gene)
if (tr.isProteinCoding()) max = Math.max(max, tr.cds().length());
return max;
}
@Override
public void parseArgs(String[] args) {
this.args = args;
for (int i = 0; i < args.length; i++) {
// Argument starts with '-'?
if (args[i].equals("-r")) {
if ((i + 1) < args.length) readLength = Gpr.parseIntSafe(args[++i]);
else usage("Missing value for parameter '-r'");
} else if (args[i].equals("-iter")) {
if ((i + 1) < args.length) numIterations = Gpr.parseIntSafe(args[++i]);
else usage("Missing value for parameter '-iter'");
} else if (args[i].equals("-reads")) {
if ((i + 1) < args.length) numReads = Gpr.parseIntSafe(args[++i]);
else usage("Missing value for parameter '-reads'");
} else if (args[i].equals("-r")) {
if ((i + 1) < args.length) readLength = Gpr.parseIntSafe(args[++i]);
else usage("Missing value for parameter '-r'");
} else if (genomeVer.isEmpty()) genomeVer = args[i];
else usage("Unknown parameter '" + args[i] + "'");
}
// Check: Do we have all required parameters?
if (genomeVer.isEmpty()) usage("Missing genomer_version parameter");
if (readLength < 0) usage("Read length should be a non-negative number");
}
/**
* Run
* @return
*/
@Override
public boolean run() {
// Initialize
loadConfig();
loadDb();
if (verbose) Timer.showStdErr("Building interval forest");
snpEffectPredictor.buildForest();
//---
// Count lengths
//---
effectiveCodingLength();
readsOnMarkersModel = new ReadsOnMarkersModel(snpEffectPredictor);
readsOnMarkersModel.setVerbose(verbose);
if (verbose) Timer.showStdErr("Counting bases");
readsOnMarkersModel.run(); // Count
if (!quiet) System.out.println(readsOnMarkersModel);
// Perform some random sampling
if ((numIterations > 0) && (readLength > 0)) readsOnMarkersModel.randomSampling(numIterations, readLength, numReads);
return true;
}
/**
* Show usage and exit
*/
@Override
public void usage(String message) {
if (message != null) System.err.println("Error: " + message + "\n");
System.err.println("snpEff version " + SnpEff.VERSION);
System.err.println("Usage: snpEff len [options] genome_version");
System.err.println("Options:");
System.err.println("\t-r : Assume a read size of 'num' bases.");
System.err.println("\t-iter : Perform 'num' iterations of random sampling.");
System.err.println("\t-reads : Each random sampling iteration has 'num' reads.");
System.exit(-1);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy