
org.opencb.biodata.tools.alignment.BamManager Maven / Gradle / Ivy
/*
*
*
*/
package org.opencb.biodata.tools.alignment;
import ga4gh.Reads;
import htsjdk.samtools.*;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
import htsjdk.samtools.util.Log;
import org.ga4gh.models.ReadAlignment;
import org.opencb.biodata.models.alignment.RegionCoverage;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.tools.alignment.coverage.SamRecordRegionCoverageCalculator;
import org.opencb.biodata.tools.alignment.filters.AlignmentFilters;
import org.opencb.biodata.tools.alignment.iterators.BamIterator;
import org.opencb.biodata.tools.alignment.iterators.SAMRecordToAvroReadAlignmentBamIterator;
import org.opencb.biodata.tools.alignment.iterators.SAMRecordToProtoReadAlignmentBamIterator;
import org.opencb.biodata.tools.alignment.iterators.SamRecordBamIterator;
import org.opencb.biodata.tools.alignment.stats.AlignmentGlobalStats;
import org.opencb.biodata.tools.alignment.stats.SamRecordAlignmentGlobalStatsCalculator;
import org.opencb.commons.utils.FileUtils;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
/**
* Created by imedina on 14/09/15.
*/
public class BamManager {
private Path input;
private SamReader samReader;
private static final int DEFAULT_MAX_NUM_RECORDS = 50000;
public BamManager() {
}
public BamManager(Path input) throws IOException {
FileUtils.checkFile(input);
this.input = input;
SamReaderFactory srf = SamReaderFactory.make();
srf.validationStringency(ValidationStringency.LENIENT);
this.samReader = srf.open(SamInputResource.of(input.toFile()));
}
/**
* Creates a index file for the BAM or CRAM input file.
* @return The path of the index file.
* @throws IOException
*/
public Path createIndex() throws IOException {
Path indexPath = input.getParent().resolve(input.getFileName().toString() + ".bai");
return createIndex(indexPath);
}
/**
* Creates a BAM/CRAM index file.
* @param outputIndex The index created.
* @return
* @throws IOException
*/
public Path createIndex(Path outputIndex) throws IOException {
FileUtils.checkDirectory(outputIndex.toAbsolutePath().getParent(), true);
SamReaderFactory srf = SamReaderFactory.make().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS);
srf.validationStringency(ValidationStringency.LENIENT);
try (SamReader reader = srf.open(SamInputResource.of(input.toFile()))) {
// Files need to be sorted by coordinates to create the index
SAMFileHeader.SortOrder sortOrder = reader.getFileHeader().getSortOrder();
if (!sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
throw new IOException("Expected sorted file. File '" + input.toString()
+ "' is not sorted by coordinates (" + sortOrder.name() + ")");
}
if (reader.type().equals(SamReader.Type.BAM_TYPE)) {
BAMIndexer.createIndex(reader, outputIndex.toFile(), Log.getInstance(BamManager.class));
} else {
if (reader.type().equals(SamReader.Type.CRAM_TYPE)) {
// TODO This really needs to be tested!
SeekableStream streamFor = SeekableStreamFactory.getInstance().getStreamFor(input.toString());
CRAMBAIIndexer.createIndex(streamFor, outputIndex.toFile(), Log.getInstance(BamManager.class),
ValidationStringency.DEFAULT_STRINGENCY);
} else {
throw new IOException("This is not a BAM or CRAM file. SAM files cannot be indexed");
}
}
}
return outputIndex;
}
/**
* This method aims to provide a very simple, safe and quick way of accessing to a small fragment of the BAM/CRAM file.
* This must not be used in production for reading big data files. It returns a maximum of 10,000 SAM records.
*
* @param region @return
* @throws IOException
*/
public List query(Region region) throws Exception {
return query(region, null, new AlignmentOptions(), SAMRecord.class);
}
public List query(Region region, AlignmentOptions options) throws Exception {
return query(region, null, options, SAMRecord.class);
}
public List query(Region region, AlignmentFilters filters, AlignmentOptions options) throws Exception {
return query(region, filters, options, SAMRecord.class);
}
// public List query() throws Exception {
// return query(null, null, new AlignmentOptions(), SAMRecord.class);
// }
public List query(AlignmentFilters filters) throws Exception {
return query(null, filters, null, SAMRecord.class);
}
public List query(AlignmentFilters filters, AlignmentOptions options) throws Exception {
return query(null, filters, options, SAMRecord.class);
}
public List query(AlignmentFilters filters, AlignmentOptions options, Class clazz) throws Exception {
return query(null, filters, options, clazz);
}
public List query(Region region, AlignmentFilters filters, AlignmentOptions alignmentOptions, Class clazz) throws Exception {
if (alignmentOptions == null) {
alignmentOptions = new AlignmentOptions();
}
// Number of returned records, if not set then DEFAULT_MAX_NUM_RECORDS is returned
int maxNumberRecords = DEFAULT_MAX_NUM_RECORDS;
if (alignmentOptions.getLimit() > 0) { // && alignmentOptions.getLimit() <= DEFAULT_MAX_NUM_RECORDS
maxNumberRecords = alignmentOptions.getLimit();
}
List results = new ArrayList<>(maxNumberRecords);
BamIterator bamIterator = (region != null)
? iterator(region, filters, alignmentOptions, clazz)
: iterator(filters, alignmentOptions, clazz);
while (bamIterator.hasNext() && results.size() < maxNumberRecords) {
results.add(bamIterator.next());
}
bamIterator.close();
return results;
}
/**
* This method aims to provide a very simple, safe and quick way of iterating BAM/CRAM files.
*
*/
public BamIterator iterator() {
return iterator(null, new AlignmentOptions(), SAMRecord.class);
}
public BamIterator iterator(AlignmentOptions options) {
return iterator(null, options, SAMRecord.class);
}
public BamIterator iterator(AlignmentFilters filters, AlignmentOptions options) {
return iterator(filters, options, SAMRecord.class);
}
public BamIterator iterator(AlignmentFilters filters, AlignmentOptions alignmentOptions, Class clazz) {
if (alignmentOptions == null) {
alignmentOptions = new AlignmentOptions();
}
SAMRecordIterator samRecordIterator = samReader.iterator();
return getAlignmentIterator(filters, alignmentOptions.isBinQualities(), clazz, samRecordIterator);
}
public BamIterator iterator(Region region) {
return iterator(region, null, new AlignmentOptions(), SAMRecord.class);
}
public BamIterator iterator(Region region, AlignmentOptions options) {
return iterator(region, null, options, SAMRecord.class);
}
public BamIterator iterator(Region region, AlignmentFilters filters, AlignmentOptions options) {
return iterator(region, filters, options, SAMRecord.class);
}
public BamIterator iterator(Region region, AlignmentFilters filters, AlignmentOptions alignmentOptions, Class clazz) {
if (alignmentOptions == null) {
alignmentOptions = new AlignmentOptions();
}
SAMRecordIterator samRecordIterator =
samReader.query(region.getChromosome(), region.getStart(), region.getEnd(), alignmentOptions.isContained());
return getAlignmentIterator(filters, alignmentOptions.isBinQualities(), clazz, samRecordIterator);
}
public AlignmentGlobalStats stats() throws Exception {
return calculateGlobalStats(iterator());
// AlignmentGlobalStats alignmentGlobalStats = new AlignmentGlobalStats();
// SamRecordAlignmentGlobalStatsCalculator calculator = new SamRecordAlignmentGlobalStatsCalculator();
// try (BamIterator iterator = iterator()) {
// while (iterator.hasNext()) {
// AlignmentGlobalStats computed = calculator.compute(iterator.next());
// calculator.update(computed, alignmentGlobalStats);
// }
// } catch (Exception e) {
// e.printStackTrace();
// }
// return alignmentGlobalStats;
}
public AlignmentGlobalStats stats(Region region, AlignmentFilters filters, AlignmentOptions options) throws Exception {
return calculateGlobalStats(iterator(region, filters, options));
// AlignmentGlobalStats alignmentGlobalStats = new AlignmentGlobalStats();
// SamRecordAlignmentGlobalStatsCalculator calculator = new SamRecordAlignmentGlobalStatsCalculator();
// try (BamIterator iterator = iterator(region, options, filters)) {
// while (iterator.hasNext()) {
// AlignmentGlobalStats computed = calculator.compute(iterator.next());
// calculator.update(computed, alignmentGlobalStats);
// }
// } catch (Exception e) {
// e.printStackTrace();
// }
// return alignmentGlobalStats;
}
private AlignmentGlobalStats calculateGlobalStats(BamIterator iterator) throws Exception {
AlignmentGlobalStats alignmentGlobalStats = new AlignmentGlobalStats();
SamRecordAlignmentGlobalStatsCalculator calculator = new SamRecordAlignmentGlobalStatsCalculator();
while (iterator.hasNext()) {
AlignmentGlobalStats computed = calculator.compute(iterator.next());
calculator.update(computed, alignmentGlobalStats);
}
iterator.close();
return alignmentGlobalStats;
}
public RegionCoverage coverage(Region region, AlignmentFilters filters, AlignmentOptions options) {
RegionCoverage regionCoverage = new RegionCoverage(region);
if (options == null) {
options = new AlignmentOptions();
}
SamRecordRegionCoverageCalculator calculator = new SamRecordRegionCoverageCalculator(options.getMinBaseQuality());
try (BamIterator iterator = iterator(region, filters, options)) {
while (iterator.hasNext()) {
SAMRecord next = iterator.next();
if (!next.getReadUnmappedFlag()) {
calculator.update(next, regionCoverage);
}
}
} catch (Exception e) {
e.printStackTrace();
}
return regionCoverage;
}
private BamIterator getAlignmentIterator(AlignmentFilters filters, boolean binQualities, Class clazz,
SAMRecordIterator samRecordIterator) {
if (ReadAlignment.class == clazz) { // AVRO
return (BamIterator) new SAMRecordToAvroReadAlignmentBamIterator(samRecordIterator, filters, binQualities);
} else if (Reads.ReadAlignment.class == clazz) { // PROTOCOL BUFFER
return (BamIterator) new SAMRecordToProtoReadAlignmentBamIterator(samRecordIterator, filters, binQualities);
} else if (SAMRecord.class == clazz) {
return (BamIterator) new SamRecordBamIterator(samRecordIterator, filters);
} else {
throw new IllegalArgumentException("Unknown alignment class " + clazz);
}
}
public void close() throws IOException {
if (samReader != null) {
samReader.close();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy