All Downloads are FREE. Search and download functionalities are using the official Maven repository.

picard.illumina.parser.ClusterIntensityFileReader Maven / Gradle / Ivy

/*
 * The MIT License
 *
 * Copyright (c) 2012 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package picard.illumina.parser;

import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
import picard.util.UnsignedTypeUtil;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Arrays;

/**
 * Read a .cnf (binary noise) or .cif (binary intensity) file.  A file in this format contains
 * 1 or more cycles of data for a set of clusters, with 4 values per cycle, one for each channel.
 * A file can store its values in either a byte or a short per value, but the API treats them all as shorts.
 * This class does not distinguish btw CIF and CNF files.
 *
 * @author [email protected]
 */
class ClusterIntensityFileReader {

    private static final byte[] IDENTIFIER = StringUtil.stringToBytes("CIF");
    private static final byte FILE_VERSION = 1;
    private static final int HEADER_SIZE = 13;
    private static final int NUM_CHANNELS = IntensityChannel.values().length;

    // Just for error reporting
    private final File file;

    /**
     * The entire file is mmapped
     */
    private final MappedByteBuffer buf;
    private final ClusterIntensityFileHeader header;

    // Precomputed for speed, I hope.
    private final int cycleSize;
    private final int channelSize;

    public static class ClusterIntensityFileHeader {
        public final int elementSize;
        public final int firstCycle;
        public final int numCycles;
        public final int numClusters;

        public ClusterIntensityFileHeader(final byte[] headerBytes, final File file) {
            if(headerBytes.length < HEADER_SIZE) {
                throw new PicardException("Bytes past to header constructor are too short excpected(" + HEADER_SIZE + ") received (" + headerBytes.length);
            }

            ByteBuffer buf = ByteBuffer.allocate(headerBytes.length); //for doing some byte conversions
            buf.order(ByteOrder.LITTLE_ENDIAN);
            buf.put(headerBytes);
            buf.position(0);

            final byte[] identifierBuf = new byte[IDENTIFIER.length];
            buf.get(identifierBuf);
            if (!Arrays.equals(identifierBuf, IDENTIFIER)) {
                throw new PicardException("Cluster intensity file " + file + " contains unexpected header: " +
                        StringUtil.bytesToString(identifierBuf));
            }
            final byte fileVersion = buf.get();
            if (fileVersion != FILE_VERSION) {
                throw new PicardException("Cluster intensity file " + file + " contains unexpected version: " + fileVersion);
            }
            elementSize = buf.get();
            if (elementSize < 1 || elementSize > 2) {
                throw new PicardException("Cluster intensity file " + file + " contains unexpected element size: " + elementSize);
            }
            // convert these to unsigned
            firstCycle = UnsignedTypeUtil.uShortToInt(buf.getShort());
            numCycles = UnsignedTypeUtil.uShortToInt(buf.getShort());
            if (numCycles == 0) {
                throw new PicardException("Cluster intensity file " + file + " has zero cycles.");
            }
            numClusters = buf.getInt();
            if (numClusters < 0) {
                // It is possible for there to be no clusters in a tile.
                throw new PicardException("Cluster intensity file " + file + " has negative number of clusters: " +numClusters);
            }
        }
    }

    /**
     * Prepare to parse a CIF or CNF file.
     * @param file The file to be parsed.
     */
    public ClusterIntensityFileReader(final File file) {
        try {
            this.file = file;
            final FileInputStream is = new FileInputStream(this.file);
            final FileChannel channel = is.getChannel();
            final long fileSize = channel.size();
            buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize);
            buf.order(ByteOrder.LITTLE_ENDIAN);
            CloserUtil.close(channel);
            CloserUtil.close(is);
            final byte [] headerBytes = new byte[HEADER_SIZE];
            buf.get(headerBytes);
            this.header = new ClusterIntensityFileHeader(headerBytes, this.file);
        } catch (IOException e) {
            throw new PicardException("IOException opening cluster intensity file " + file, e);
        }
        cycleSize = NUM_CHANNELS * header.numClusters * header.elementSize;
        channelSize = header.numClusters * header.elementSize;
    }

    /**
     * Get the value for the given args.  Value is returned as a signed short regardless of whether storage is
     * in bytes or shorts.
     * @param cluster 0-based cluster number.
     * @param channel Which channel is desired.
     * @param cycle Absolute cycle number.  E.g. if the first cycle in the file is N, then the first value that can
     * be fetched is cycle=N
     * @return Intensity or noise (depending on whether this is a CIF or CNF file).
     */
    public short getValue(final int cluster, final IntensityChannel channel, final int cycle) {
        if (cycle < header.firstCycle || cycle >= header.firstCycle + header.numCycles) {
            throw new IllegalArgumentException("Requested cycle (" + cycle + ") number out of range.  First cycle=" +
                    header.firstCycle + "; numCycles=" + header.numCycles);
        }
        if (cluster < 0 || cluster >= header.numClusters) {
            throw new IllegalArgumentException("Requested cluster (" + cluster + ") number out of range. numClustersInTile=" + header.numClusters);
        }
        final int relativeCycle = cycle - header.firstCycle;
        final int position = HEADER_SIZE + relativeCycle * cycleSize + channel.ordinal() * channelSize + cluster * header.elementSize;
        buf.position(position);
        if (header.elementSize == 1) {
            return buf.get();
        } else {
            return buf.getShort();
        }
    }

    public File getFile() {
        return file;
    }

    /**
     * @return The first (one-based) cycle stored in this file.
     */
    public int getFirstCycle() {
        return header.firstCycle;
    }

    /**
     * @return Number of clusters stored in this file.
     */
    public int getNumClusters() {
        return header.numClusters;
    }

    /**
     * @return Number of cycles stored in this file.
     */
    public int getNumCycles() {
        return header.numCycles;
    }

    /**
     * @return the size of one intensity value for one channel in this file.
     */
    public int getElementSize() {
        return header.elementSize;
    }

    public static ClusterIntensityFileHeader readHeaders(final File intensityFile) {
        FileInputStream reader = null;
        byte [] headerBytes = new byte[HEADER_SIZE];
        int bytesRead = 0;
        try {
            reader = new FileInputStream(intensityFile);
            bytesRead = reader.read(headerBytes);
        } catch(FileNotFoundException fnfExc) {
            throw new PicardException("Error opening intensity file (" + intensityFile.getAbsolutePath() +")", fnfExc);
        } catch(IOException ioExc) {
            throw new PicardException("Error reading values from header for intensity file (" + intensityFile.getAbsolutePath() + ")", ioExc);
        } finally {
            CloserUtil.close(reader);
        }

        if(bytesRead != HEADER_SIZE)
            throw new PicardException("Error reading intensity file header, too few bytes read, expected( " + HEADER_SIZE + ") read(" + bytesRead + ")");

        return new ClusterIntensityFileHeader(headerBytes, intensityFile);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy