Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* The MIT License
*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.illumina.parser.readers;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;
import picard.PicardException;
import picard.illumina.parser.BclData;
import picard.illumina.parser.TileIndex;
import picard.util.UnsignedTypeUtil;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
import java.util.zip.GZIPInputStream;
/**
* BCL Files are base call and quality score binary files containing a (base,quality) pair for successive clusters.
* The file is structured as followed:
* Bytes 1-4 : unsigned int numClusters
* Bytes 5-numClusters + 5 : 1 byte base/quality score
*
* The base/quality scores are organized as follows (with one exception, SEE BELOW):
* The right 2 most bits (these are the LEAST significant bits) indicate the base, where
* A=00(0x00), C=01(0x01), G=10(0x02), and T=11(0x03)
*
* The remaining bytes compose the quality score which is an unsigned int.
*
* EXCEPTION: If a byte is entirely 0 (e.g. byteRead == 0) then it is a no call, the base
* becomes '.' and the Quality becomes 2, the default illumina masking value
*
* (E.g. if we get a value in binary of 10001011 it gets transformed as follows:
*
* Value read: 10001011(0x8B)
*
* Quality Base
*
* 100010 11
* 00100010 0x03
* 0x22 T
* 34 T
*
* So the output base/quality will be a (T/34)
*/
public class BclReader extends BaseBclReader implements CloseableIterator {
private static final int HEADER_SIZE = 4;
protected BclData queue = null;
public BclReader(final List bclsForOneTile, final int[] outputLengths,
final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final boolean seekable) {
super(outputLengths, bclQualityEvaluationStrategy);
try {
final ByteBuffer byteBuffer = ByteBuffer.allocate(HEADER_SIZE);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
for (int i = 0; i < cycles; ++i) {
final File bclFile = bclsForOneTile.get(i);
if (bclFile == null) {
close();
throw new RuntimeIOException(String.format("Could not find BCL file for cycle %d", i));
}
final String filePath = bclFile.getName();
final boolean isGzip = filePath.endsWith(".gz");
final boolean isBgzf = filePath.endsWith(".bgzf");
final InputStream stream = open(bclFile, seekable, isGzip, isBgzf);
final int read = stream.read(byteBuffer.array());
if (read != HEADER_SIZE) {
close();
throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile()));
}
numClustersPerCycle[i] = byteBuffer.getInt();
if (!isBgzf && !isGzip) {
assertProperFileStructure(bclFile, numClustersPerCycle[i], stream);
}
this.streams[i] = stream;
this.streamFiles[i] = bclFile;
byteBuffer.clear();
}
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
public static boolean isGzipped(final File file) {
return file.getAbsolutePath().endsWith(".gz");
}
public static boolean isBlockGzipped(final File file) {
return file.getAbsolutePath().endsWith(".bgzf");
}
public static long getNumberOfClusters(final File file) {
InputStream stream = null;
try {
if (isBlockGzipped(file)) stream = new BlockCompressedInputStream(IOUtil.maybeBufferedSeekableStream(file));
else if (isGzipped(file)) stream = new GZIPInputStream(IOUtil.maybeBufferInputStream(new FileInputStream(file)));
else stream = IOUtil.maybeBufferInputStream(new FileInputStream(file));
return getNumberOfClusters(file.getAbsolutePath(), stream);
} catch (final IOException ioe) {
throw new PicardException("Could not open file " + file.getAbsolutePath() + " to get its cluster count: " + ioe.getMessage(), ioe);
} finally {
CloserUtil.close(stream);
}
}
private static long getNumberOfClusters(final String filePath, final InputStream inputStream) {
final byte[] header = new byte[HEADER_SIZE];
try {
final int headerBytesRead = inputStream.read(header);
if (headerBytesRead != HEADER_SIZE) {
throw new PicardException("Malformed file, expected header of size " + HEADER_SIZE + " but received " + headerBytesRead);
}
} catch (final IOException ioe) {
throw new PicardException("Unable to read header for file (" + filePath + ")", ioe);
}
final ByteBuffer headerBuf = ByteBuffer.wrap(header);
headerBuf.order(ByteOrder.LITTLE_ENDIAN);
return UnsignedTypeUtil.uIntToLong(headerBuf.getInt());
}
public BclReader(final File bclFile, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final boolean seekable) {
super(new int[]{1}, bclQualityEvaluationStrategy);
try {
final ByteBuffer byteBuffer = ByteBuffer.allocate(HEADER_SIZE);
final String filePath = bclFile.getName();
final boolean isGzip = filePath.endsWith(".gz");
final boolean isBgzf = filePath.endsWith(".bgzf");
final InputStream stream = open(bclFile, seekable, isGzip, isBgzf);
final int read = stream.read(byteBuffer.array());
if (read != HEADER_SIZE) {
throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile()));
}
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
this.numClustersPerCycle[0] = byteBuffer.getInt();
if (!isBgzf && !isGzip) {
assertProperFileStructure(bclFile, this.getNumClusters(), stream);
}
this.streams[0] = stream;
this.streamFiles[0] = bclFile;
} catch (final IOException ioe) {
throw new PicardException("IOException opening file " + bclFile.getAbsoluteFile(), ioe);
}
}
void assertProperFileStructure(final File file, final int numClusters, final InputStream stream) {
final long elementsInFile = file.length() - HEADER_SIZE;
if (numClusters != elementsInFile) {
CloserUtil.close(stream);
throw new PicardException("Expected " + numClusters + " in file " + file.getAbsolutePath() + " but found " + elementsInFile);
}
}
public void close() {
for (final InputStream stream : this.streams) {
CloserUtil.close(stream);
}
}
@Override
public boolean hasNext() {
if (queue == null) {
advance();
}
return queue != null;
}
protected void assertProperFileStructure(final File file) {
final long elementsInFile = file.length() - HEADER_SIZE;
if (getNumClusters() != elementsInFile) {
throw new PicardException("Expected " + getNumClusters() + " in file " + file.getAbsolutePath() + " but found " + elementsInFile);
}
}
public BclData next() {
if (queue == null) {
advance();
}
final BclData data = queue;
queue = null;
return data;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
void advance() {
int totalCycleCount = 0;
final BclData data = new BclData(outputLengths);
for (int read = 0; read < outputLengths.length; read++) {
for (int cycle = 0; cycle < outputLengths[read]; ++cycle) {
try {
final int readByte;
try {
readByte = this.streams[totalCycleCount].read();
} catch (IOException e) {
// when logging the error, increment cycle by 1, since totalCycleCount is zero-indexed but Illumina directories are 1-indexed.
throw new IOException(String.format("Error while reading from BCL file for cycle %d. Offending file on disk is %s",
(totalCycleCount+1), this.streamFiles[totalCycleCount].getAbsolutePath()), e);
}
if (readByte == -1) {
queue = null;
return;
}
decodeBasecall(data, read, cycle, readByte);
totalCycleCount++;
} catch (final IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
}
this.queue = data;
}
public static BclReader makeSeekable(final List files, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final int[] outputLengths) {
return new BclReader(files, outputLengths, bclQualityEvaluationStrategy, true);
}
public int seek(final List files, final TileIndex tileIndex, final int currentTile) {
int count = 0;
int numClustersInTile = 0;
for (final InputStream inputStream : streams) {
final TileIndex.TileIndexRecord tileIndexRecord = tileIndex.findTile(currentTile);
final BclIndexReader bclIndexReader = new BclIndexReader(files.get(count));
final long virtualFilePointer = bclIndexReader.get(tileIndexRecord.getZeroBasedTileNumber());
if (!(inputStream instanceof BlockCompressedInputStream)) {
throw new UnsupportedOperationException("Seeking only allowed on bzgf");
} else {
try {
if (tileIndex.getNumTiles() != bclIndexReader.getNumTiles()) {
throw new PicardException(String.format("%s.getNumTiles(%d) != %s.getNumTiles(%d)",
tileIndex.getFile().getAbsolutePath(), tileIndex.getNumTiles(), bclIndexReader.getBciFile().getAbsolutePath(), bclIndexReader.getNumTiles()));
}
((BlockCompressedInputStream) inputStream).seek(virtualFilePointer);
numClustersInTile = tileIndexRecord.getNumClustersInTile();
} catch (final IOException e) {
throw new PicardException("Problem seeking to " + virtualFilePointer, e);
}
}
count++;
}
return numClustersInTile;
}
}