
org.snpeff.coverage.CountReads Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SnpEff Show documentation
Show all versions of SnpEff Show documentation
Variant annotation and effect prediction package.
The newest version!
package org.snpeff.coverage;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import org.snpeff.fileIterator.BedFileIterator;
import org.snpeff.fileIterator.VcfFileIterator;
import org.snpeff.interval.Chromosome;
import org.snpeff.interval.Genome;
import org.snpeff.interval.Marker;
import org.snpeff.interval.Markers;
import org.snpeff.interval.Transcript;
import org.snpeff.interval.Variant;
import org.snpeff.snpEffect.SnpEffectPredictor;
import org.snpeff.stats.CountByKey;
import org.snpeff.stats.CountByType;
import org.snpeff.stats.CoverageByType;
import org.snpeff.stats.PosStats;
import org.snpeff.util.Gpr;
import org.snpeff.util.Timer;
import org.snpeff.vcf.VcfEntry;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
/**
* Count how many reads map (from many SAM/BAM files) onto markers
*
* @author pcingola
*/
public class CountReads {
public static int SHOW_EVERY = 10000;
boolean verbose = false; // Be verbose
int countTotalReads;
int readLengthCount;
int countExceptions = 0;
long readLengthSum;
String fileName;
Genome genome;
SnpEffectPredictor snpEffectPredictor;
CountByType countTypes;
CountByKey countReads;
CountByKey countBases;
MarkerTypes markerTypes;
CoverageByType coverageByType;
ArrayList coverageByExons;
public CountReads(String fileName, SnpEffectPredictor snpEffectPredictor) {
this.fileName = fileName;
if (snpEffectPredictor != null)
this.snpEffectPredictor = snpEffectPredictor;
else
this.snpEffectPredictor = new SnpEffectPredictor(new Genome());
markerTypes = new MarkerTypes();
coverageByExons = new ArrayList();
}
public void addMarkerType(Marker marker, String type) {
markerTypes.addType(marker, type);
}
/**
* Create a collection of all markers
*
* @return
*/
Collection allMarkers() {
return countReads.keySet();
}
/**
* Count markers from a file
*/
public void count() {
genome = snpEffectPredictor.getGenome();
readLengthSum = 0;
readLengthCount = 0;
// Iterate over all BAM/SAM files
try {
if (verbose)
Timer.showStdErr("Reading file '" + fileName + "'");
countReads = new CountByKey();
countBases = new CountByKey();
countTypes = new CountByType();
coverageByType = new CoverageByType();
countFile(fileName);
} catch (Exception e) {
e.printStackTrace();
}
if (verbose) {
System.err.println("");
Timer.showStdErr("Finished reding file " + fileName + "\n\tTotal reads: " + countTotalReads);
}
if (verbose)
Timer.showStdErr("Done.");
}
/**
* Count all markers from a BED file
*/
void countBedFile(String fileName) {
// Open file
int readNum = 1;
for (Variant read : new BedFileIterator(fileName)) {
try {
readLengthCount++;
readLengthSum += read.size();
countMarker(fileName, read);
if (verbose)
Gpr.showMark(readNum, SHOW_EVERY);
readNum++;
} catch (Exception e) {
countExceptions++;
if (countExceptions < 10)
e.printStackTrace();
else if (countExceptions == 10)
System.err.println("Not showing more exceptions!");
}
}
}
/**
* Count all markers from a SAM/BAM file
*/
void countFile(String fileName) {
String fl = fileName.toLowerCase();
if (fl.endsWith(".bam") || fl.endsWith(".sam"))
countSamFile(fileName);
else if (fl.endsWith(".vcf") || fl.endsWith(".vcf.gz"))
countVcfFile(fileName);
else if (fl.endsWith(".bed") || fl.endsWith(".bed.gz"))
countBedFile(fileName);
else
throw new RuntimeException("Unrecognized file extention. Supported types: BAM, SAM, BED, VCF.");
}
/**
* Count one marker
*/
void countMarker(String fileName, Marker read) {
// Find all intersects
Markers regions = snpEffectPredictor.queryDeep(read);
// Count total reads
countTotalReads++;
// Count each marker
HashSet done = new HashSet();
for (Marker m : regions) {
// Make sure we count only once each marker
if (!done.contains(m)) {
done.add(m);
countReads.inc(m); // Count reads
countBases.inc(m, m.intersectSize(read)); // Count number bases that intersect
// Count by marker type (make sure we only count once per read)
String type = markerTypes.getType(m);
String subtype = markerTypes.getSubType(m);
String typeRank = markerTypes.getTypeRank(m);
countTypes.inc(type); // Count reads
// Coverage by type
PosStats posStats = coverageByType.getOrCreate(type);
posStats.sample(read, m);
// Coverage by number of exons
coverageByExons(read, m, typeRank);
// Count sub-type if any
if (subtype != null) {
countTypes.inc(subtype); // Count reads
posStats = coverageByType.getOrCreate(subtype);
posStats.sample(read, m);
}
}
}
}
/**
* Count how many of each marker type are there
*
* @return
*/
CountByType countMarkerTypes(Collection markersToCount) {
CountByType countByMarkerType = new CountByType();
for (Marker marker : markersToCount) {
String type = markerTypes.getType(marker);
String subtype = markerTypes.getSubType(marker);
countByMarkerType.inc(type);
if (subtype != null)
countByMarkerType.inc(subtype);
}
return countByMarkerType;
}
/**
* Count all markers from a SAM/BAM file
*/
void countSamFile(String fileName) {
// Open file
int readNum = 1;
// SAMFileReader sam = new SAMFileReader(new File(fileName));
// sam.setValidationStringency(ValidationStringency.SILENT);
SamReader sam = SamReaderFactory.makeDefault().open(new File(fileName));
for (SAMRecord samRecord : sam) {
try {
if (!samRecord.getReadUnmappedFlag()) { // Mapped?
Chromosome chr = genome.getOrCreateChromosome(samRecord.getReferenceName());
if (chr != null) {
// Create a marker from read
Marker read = new Marker(chr, samRecord.getAlignmentStart(), samRecord.getAlignmentEnd(), false,
"");
readLengthCount++;
readLengthSum += read.size();
countMarker(fileName, read);
}
}
if (verbose)
Gpr.showMark(readNum, SHOW_EVERY);
readNum++;
} catch (Exception e) {
countExceptions++;
if (countExceptions < 10)
e.printStackTrace();
else if (countExceptions == 10)
System.err.println("Not showing more exceptions!");
}
}
try {
sam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
throw new RuntimeException("Error closing SAM/BAM file '" + fileName + "'", e);
}
}
/**
* Count all markers from a VCF file
*/
void countVcfFile(String fileName) {
// Open file
int readNum = 1;
for (VcfEntry read : new VcfFileIterator(fileName)) {
try {
readLengthCount++;
readLengthSum += read.size();
countMarker(fileName, read);
if (verbose)
Gpr.showMark(readNum, SHOW_EVERY);
readNum++;
} catch (Exception e) {
countExceptions++;
if (countExceptions < 10)
e.printStackTrace();
else if (countExceptions == 10)
System.err.println("Not showing more exceptions!");
}
}
}
/**
* Coverage by number of exons in a transcript
*
* @param m
* @param typeRank
*/
void coverageByExons(Marker read, Marker m, String typeRank) {
// Find corresponfing transcript
Transcript tr = (Transcript) m.findParent(Transcript.class);
if (tr == null)
return;
// Number of exons
int exons = tr.numChilds();
// Do we need to add new counters?
if (coverageByExons.size() <= exons) {
for (int i = coverageByExons.size(); i <= exons; i++)
coverageByExons.add(new CoverageByType());
}
// Perform stats
CoverageByType cbe = coverageByExons.get(exons);
PosStats posStats = cbe.getOrCreate(typeRank);
posStats.sample(read, m);
}
public CountByKey getCountBases() {
return countBases;
}
public int getCountExceptions() {
return countExceptions;
}
public CountByKey getCountReads() {
return countReads;
}
public int getCountTotalReads() {
return countTotalReads;
}
public CountByType getCountTypes() {
return countTypes;
}
public ArrayList getCoverageByExons() {
return coverageByExons;
}
public CoverageByType getCoverageByType() {
return coverageByType;
}
public MarkerTypes getMarkerTypes() {
return markerTypes;
}
/**
* Average read length
*
* @return
*/
public int getReadLengthAvg() {
if (readLengthCount <= 0)
return 0;
double rl = ((double) readLengthSum) / readLengthCount;
return (int) Math.round(rl);
}
public int getReadLengthCount() {
return readLengthCount;
}
public long getReadLengthSum() {
return readLengthSum;
}
/**
* Initialize
*
* @param snpEffectPredictor
*/
void init(SnpEffectPredictor snpEffectPredictor) {
}
public void setMarkerTypes(MarkerTypes markerTypes) {
this.markerTypes = markerTypes;
}
public void setVerbose(boolean verbose) {
this.verbose = verbose;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy