
org.snpeff.geneSets.algorithm.EnrichmentAlgorithm Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.geneSets.algorithm;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import org.apfloat.Apfloat;
import org.snpeff.geneSets.GeneSet;
import org.snpeff.geneSets.GeneSets;
import org.snpeff.geneSets.GeneSetsRanked;
import org.snpeff.geneSets.Result;
/**
* A generic enrichment algorithm for selecting gene-sets from a collection of gene-sets
*
* @author pcingola
*/
public abstract class EnrichmentAlgorithm {
public enum EnrichmentAlgorithmType {
FISHER_GREEDY, RANKSUM_GREEDY, FISHER, RANKSUM, LEADING_EDGE_FRACTION, NONE;
/**
* Is the algorithm base on "interesting / not-interesting" binary clasification?
* @return
*/
public boolean isBinary() {
return (this == FISHER) || (this == FISHER_GREEDY);
}
/**
* Is this a greedy algorithm?
* @return
*/
public boolean isGreedy() {
return (this == FISHER_GREEDY) || (this == RANKSUM_GREEDY);
}
/**
* Does the algorithm need a rank statistic?
* @return
*/
public boolean isRank() {
return (this == RANKSUM) || (this == RANKSUM_GREEDY);
}
}
public static final int HTML_TD_GENES_MAX_LEN = 40;
public static final String HTML_BG_COLOR[] = { "dddddd", "eeeeee" };
public static final String HTML_BG_COLOR_TITLE = "cccccc";
boolean debug = false;
boolean verbose = false;
boolean htmlTable = false;
int minGeneSetSize = 0;
int maxGeneSetSize = Integer.MAX_VALUE;
int numberToSelect;
double maxPValue = Double.NaN;
double maxPvalueAdjusted = 0.05;
StringBuilder output = new StringBuilder();
GeneSets geneSets;
Set filterOutputGeneSets;
public static long PRINT_SOMETHING_TIME = 5000; // Print something every X milliseconds
public EnrichmentAlgorithm(GeneSets geneSets, int numberToSelect) {
this.geneSets = geneSets;
this.numberToSelect = numberToSelect;
}
public double getMaxPvalueAdjusted() {
return maxPvalueAdjusted;
}
public StringBuilder getOutput() {
return output;
}
/**
* Showld we show this result or should the output be filtered?
* @param result
* @return
*/
protected boolean isShow(Result result) {
// Filter by pValue
if (Double.isNaN(maxPValue)) {
// Use adjusted p-value
if (result.getPvalueAdjusted() >= maxPvalueAdjusted) return false;
} else {
// Use un-adjusted p-value
if (result.getPvalue().doubleValue() >= maxPValue) return false;
}
// Filter by Gene set name (show only if it is in this list)
if (filterOutputGeneSets != null) {
String gsName = result.getLatestGeneSet().getName();
boolean show = filterOutputGeneSets.contains(gsName);
if (verbose) System.err.println("\tFilter output list. Show geneSet '" + gsName + "' : " + show);
return show;
}
// OK
return true;
}
protected void print(String str) {
System.out.println(str);
output.append(str + "\n");
}
/**
* Print after all genesets are shown
*/
void printEnd() {
if (htmlTable) print("");
}
/**
* Show a result
* @param it : Iteration or Rank
* @param result
*/
void printResult(int it, Result result) {
if ((result != null) && (result.getLatestGeneSet() != null)) {
if (isShow(result)) {
GeneSet geneSet = result.getLatestGeneSet();
//---
// Get genes information
//---
StringBuffer interestingGenes = new StringBuffer();
ArrayList genesList = new ArrayList();
genesList.addAll(geneSet.getInterestingGenes());
Collections.sort(genesList);
for (String gene : genesList) {
interestingGenes.append(gene);
// Show score (value) and rank information?
if (geneSets.hasValue(gene) || geneSets.isRanked()) {
interestingGenes.append("[");
// Append value
if (geneSets.hasValue(gene)) interestingGenes.append(geneSets.getValue(gene));
// Append rank
if (geneSets.isRanked()) {
GeneSetsRanked geneSetsRanked = ((GeneSetsRanked) geneSets);
interestingGenes.append("/" + geneSetsRanked.getRank(gene));
}
interestingGenes.append("]");
}
interestingGenes.append(" ");
}
//---
// Print output
//---
if (htmlTable) {
// Show as HTML table
String descr = geneSet.getDescription();
String name = geneSet.getName();
if (descr.startsWith("http://")) {
name = "" + name + "";
descr = "link";
}
String intGenes = interestingGenes.toString();
if (interestingGenes.length() > HTML_TD_GENES_MAX_LEN) intGenes = "";
String bgcolor = HTML_BG_COLOR[it % 2];
print("\t " + it //
+ " \t" + String.format("%.2e", result.getPvalue().doubleValue()) //
+ " \t" + String.format("%.2e", result.getPvalueAdjusted()) //
+ " \t" + name //
+ " \t" + geneSet.size() //
+ " \t" + descr //
+ " \t" + intGenes //
+ " \t" + (geneSets.isRanked() ? geneSet.rankSum() : 0) //
+ " \t ");
} else {
// Show as "normal" TXT
print(it //
+ "\t" + result.getPvalue() //
+ "\t" + result.getPvalueAdjusted() //
+ "\t" + geneSet.getName() //
+ "\t" + geneSet.size() //
+ "\t" + geneSet.getDescription() //
+ "\t" + result.getGeneSets() //
+ "\t" + interestingGenes //
+ "\t" + (geneSets.isRanked() ? geneSet.rankSum() : 0) //
);
}
}
} // else print("\t" + it + "\tNULL");
}
/**
* Print before all genesets are shown
*/
void printTitle() {
if (htmlTable) print(" Rank \tp-value \tp-value adj \tLatest result \tSize \tDescription \tInteresting genes \t Score ");
else if (verbose) print("Iteration\tp-value\tp-value adj\tLatest result\tSize\tDescription\tResult\tInteresting genes");
}
/**
* Calculate the pValue for a given geneSet
* @param geneSetList
* @return
*/
abstract Apfloat pValue(GeneSet geneSet);
/**
* Create a new gene set using all gene sets and calculate pValue
* @param geneSetList
* @return
*/
Apfloat pValue(List geneSetList) {
GeneSet newGeneSet = new GeneSet(geneSetList, geneSets);
return pValue(newGeneSet);
}
/**
* Select the 'best' gene sets
* @return
*/
public Result select() {
Result best = new Result();
//---
// Calculate pValues for each gene set matching our criteria
//---
List results = new ArrayList();
for (GeneSet geneSet : geneSets) {
if ((geneSet.getGeneCount() > 0) // This term is empty? => skip it
&& (geneSet.getGeneCount() >= minGeneSetSize) // Use gene sets bigger than minGeneSetSize
&& (geneSet.getGeneCount() <= maxGeneSetSize) // Use gene sets smaller than maxGeneSetSize
) {
// Calculate pValue
Apfloat pValue = pValue(geneSet);
Result result = new Result(geneSet, pValue, 0); // We'll update the geneSetCount later
results.add(result);
}
}
// Update the geneSetCount
for (Result res : results)
res.setGeneSetCountLast(results.size());
//---
// Show results
//---
if (htmlTable || verbose) {
printTitle();
// Sort by pValue
Collections.sort(results);
// Show them
int rank = 1;
for (Result r : results)
printResult(rank++, r);
printEnd();
}
return best;
}
public void setDebug(boolean debug) {
this.debug = debug;
}
public void setFilterOutputGeneSets(Set filterOutputGeneSets) {
this.filterOutputGeneSets = filterOutputGeneSets;
}
public void setHtmlTable(boolean htmlTable) {
this.htmlTable = htmlTable;
}
public void setMaxGeneSetSize(int maxGeneSetSize) {
this.maxGeneSetSize = maxGeneSetSize;
}
public void setMaxPValue(double maxPValue) {
this.maxPValue = maxPValue;
}
public void setMaxPvalueAdjusted(double maxPvalueAdjusted) {
this.maxPvalueAdjusted = maxPvalueAdjusted;
}
public void setMinGeneSetSize(int minGeneSetSize) {
this.minGeneSetSize = minGeneSetSize;
}
public void setVerbose(boolean verbose) {
this.verbose = verbose;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy