
org.snpeff.stats.VariantStats Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.stats;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import org.snpeff.interval.Chromosome;
import org.snpeff.interval.Genome;
import org.snpeff.interval.Variant;
import org.snpeff.interval.Variant.VariantType;
/**
* Variants statistics
*/
public class VariantStats implements SamplingStats {
public static final String CHANGE_SEPARATOR = "\t";
static final char bases[] = { 'A', 'C', 'G', 'T' };
Genome genome;
IntStats indelLen;
HashMap chrPosStatsbyName;
long countVariants = 0;
long countNonVariants;
long countNonEmptyId;
CountByType countByChangeType, baseChangesCount;
public VariantStats(Genome genome) {
this.genome = genome;
indelLen = new IntStats();
chrPosStatsbyName = new HashMap();
countByChangeType = new CountByType();
baseChangesCount = new CountByType();
}
/**
* How to code an 'item' change (e.g. codon change, AA change, etc.)
*/
private String changeKey(String oldItem, String newItem) {
return oldItem + CHANGE_SEPARATOR + newItem;
}
void chromoStats(Variant variant) {
String chrName = variant.getChromosomeName();
ChrPosStats chrPosStats = chrPosStatsbyName.get(chrName);
// No stats? => Create a new one
if (chrPosStats == null) {
Chromosome chr = genome.getChromosome(chrName);
if (chr != null) {
chrPosStats = new ChrPosStats(chrName, chr.size());
chrPosStatsbyName.put(chrName, chrPosStats);
}
}
// Perform stats
if (chrPosStats != null) chrPosStats.sample(variant.getStart());
}
public char[] getBases() {
return bases;
}
/**
* Background color used for base change table
*/
public String getBasesChangesColor(String oldBase, String newBase) {
return baseChangesCount.getColorHtml(changeKey(oldBase, newBase));
}
public long getBasesChangesCount(String oldBase, String newBase) {
return baseChangesCount.get(changeKey(oldBase, newBase));
}
public VariantType[] getChangeType() {
return VariantType.values();
}
public int getChangeTypeLength() {
return VariantType.values().length;
}
/**
* Choromosome length
* @param chromoName
* @return
*/
public int getChromosomeLength(String chromoName) {
Chromosome chr = genome.getChromosome(chromoName);
if (chr != null) return chr.size();
return 0;
}
/**
* A list of chromosomes that had at least one change
* Note: Chromosome names are sorted.
*
* @return
*/
public List getChromosomeNamesEffective() {
// Add all chromosomes to the list and sort them
ArrayList chrsEffective = new ArrayList();
for (String chrName : chrPosStatsbyName.keySet())
chrsEffective.add(genome.getChromosome(chrName));
Collections.sort(chrsEffective);
// Create a list of chromosome names
ArrayList chrNames = new ArrayList();
for (Chromosome chr : chrsEffective)
chrNames.add(chr.getId());
return chrNames;
}
public ChrPosStats getChrPosStats(String chrName) {
return chrPosStatsbyName.get(chrName);
}
public String getChrPosStatsChartUrl(String chrName) {
return chrPosStatsbyName.get(chrName).toStringHistoPlot("Variants histogram: " + chrName, "Position", "Variants");
}
/**
* Total number of variants
* @return
*/
public long getCount() {
return countVariants;
}
/**
* Number of variants by type
* @return
*/
public CountByType getCountByChangeType() {
return countByChangeType;
}
/**
* Number of changes by chromosome
* @param chromoName
* @return
*/
public int getCountByChromosome(String chromoName) {
ChrPosStats chrStats = chrPosStatsbyName.get(chromoName);
if (chrStats == null) return 0;
return chrStats.getTotal();
}
public long getCountNonEmptyId() {
return countNonEmptyId;
}
public long getCountNonVariants() {
return countNonVariants;
}
/**
* Genome length
* @return
*/
public long getGenomeLen() {
return genome.length();
}
/**
* Genome effective length: The sum of length of every chromosome that had a change
* (e.g. If there was no SNP in chromosome Y, then it doesn't count in the effective length)
*
* @return
*/
public long getGenomeLenEffective() {
long len = 0;
for (String chrName : chrPosStatsbyName.keySet()) {
Chromosome ch = genome.getChromosome(chrName);
len += ch.size();
}
return len;
}
public IntStats getIndelLen() {
return indelLen;
}
public String getIndelLenHistoUrl() {
return indelLen.toStringPlot("Insertion deletion length histogram", "Length", true);
}
/**
* Ratio of known variants (the one with a non-empty ID) and total variants
*/
public double getKnownRatio() {
double tot = countVariants;
double known = countNonEmptyId;
return tot > 0 ? known / tot : 0;
}
/**
* Rate of change
* @return
*/
public long getRateOfChange() {
return countVariants > 0 ? getGenomeLenEffective() / countVariants : 0;
}
/**
* Rate of change by chromosome
* @param chromoName
* @return
*/
public int getRateOfChangeByChromosome(String chromoName) {
int rate = 0;
int len = getChromosomeLength(chromoName);
int count = getCountByChromosome(chromoName);
if (count > 0) rate = len / count;
return rate;
}
@Override
public boolean hasData() {
return countVariants != 0;
}
/**
* Perform starts on an InDel
*/
void indelSample(Variant variant) {
// InDel length histogram
int len = (variant.isDel() ? -1 : 1) * (variant.getAlt().length() - 1);
indelLen.sample(len);
}
/**
* Use this sample to perform statistics
*/
@Override
public void sample(Variant variant) {
// Not a real change => Ignore
if (!variant.isVariant()) {
countNonVariants++;
return;
}
countVariants++;
// Count non-empty IDs
if ((variant.getId() != null) && !variant.getId().isEmpty()) countNonEmptyId++;
// Count by change type
String variantType = variant.getVariantType().toString();
countByChangeType.inc(variantType); // Each type of changes
// SNP stats or InDel stats
if (variant.isVariant()) {
if (variant.isSnp()) snpSample(variant);
else if (variant.isInDel()) indelSample(variant);
}
// Coverage by chromosome (hot spot) stats
chromoStats(variant);
}
/**
* Perform stats on a SNP
*/
void snpSample(Variant variant) {
baseChangesCount.inc(changeKey(variant.getReference(), variant.getAlt())); // Some case might be the same base (e.g. heterozygous SNP change "A => W", where 'W' means 'A' or 'T')
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy