All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.broad.igv.bbfile.BigBedDataBlock Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard.  All Rights Reserved.
 *
 * This software is licensed under the terms of the GNU Lesser General Public License (LGPL),
 * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php.
 *
 * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR
 * WARRANTES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING,
 * WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER
 * OR NOT DISCOVERABLE.  IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE
 * TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES
 * OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
 * ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER
 * THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT
 * SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
 */

package org.broad.igv.bbfile;

import org.apache.log4j.Logger;
import org.broad.tribble.util.LittleEndianInputStream;
import org.broad.tribble.util.SeekableStream;

import java.util.ArrayList;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.DataInputStream;
import java.util.HashMap;


/**
 * Created by IntelliJ IDEA.
 * User: martind
 * Date: Jan 26, 2010
 * Time: 12:18:32 PM
 * To change this template use File | Settings | File Templates.
 */
/*
*   Container class for reading and storing a block of bed data items.
* */
public class BigBedDataBlock {

 private static Logger log = Logger.getLogger(BigBedDataBlock.class);

    // Bed data block access variables   - for reading in bed records from a file
    private SeekableStream fis;  // file input stream handle
    private long fileOffset;       // Bed data block file offset
    private long dataBlockSize;     // byte size for data block specified in the R+ leaf
    private boolean isLowToHigh;   // if true, data is low to high byte order; else high to low

    // defines the bigBed/bigWig source chromosomes
    private HashMap chromosomeMap;  // map of chromosome ID's and corresponding names
    private RPTreeLeafNodeItem leafHitItem;   // R+ tree leaf item containing data block location

    // Provides uncompressed byte stream data reader
    private byte[] bedBuffer;  // buffer containing leaf block data uncompressed
    private int remDataSize;   // number of unread data bytes
    private long dataSizeRead;     // number of bytes read from the decompressed mWigBuffer

    // byte stream readers
    private LittleEndianInputStream lbdis;    // low to high byte stream reader
    private DataInputStream dis;       // high to low byte stream reader

    // Bed data extraction members
    private ArrayList bedFeatureList; // array of BigBed data
    private int nItemsSelected;    // number of Bed features selected from this section

    /*
    *   Constructor for Bed data block reader.
    *
    *   Parameters:
    *       fis - file input stream handle
    *       leafItem - R+ tree leaf item containing chromosome region and file data location
    *       chromIDTree - B+ chromosome index tree returns chromosome ID's for names
    *       isLowToHigh - byte order is low to high if true; else high to low
    *       uncompressBufSize - byte size for decompression buffer; else 0 for uncompressed
    * */
    public BigBedDataBlock(SeekableStream fis, RPTreeLeafNodeItem leafHitItem,
            HashMap chromosomeMap, boolean isLowToHigh, int uncompressBufSize){
        this.fis = fis;
        this.leafHitItem = leafHitItem;
        this.chromosomeMap = chromosomeMap;
        this.isLowToHigh = isLowToHigh;

        dataBlockSize = this.leafHitItem.geDataSize();
        byte[] buffer = new byte[(int) dataBlockSize];

        fileOffset = this.leafHitItem.getDataOffset();

        // read Bed data block into a buffer
        try {
            fis.seek(fileOffset);
            fis.readFully(buffer);

            // decompress if necessary - the buffer size is 0 for uncompressed data
            // Note:  BBFile Table C specifies a decompression buffer size
            if(uncompressBufSize > 0)
              bedBuffer = BBCompressionUtils.decompress(buffer, uncompressBufSize);
            else
              bedBuffer = buffer;    // use uncompressed read buffer directly

       }catch(IOException ex) {
            String error = String.format("Error reading Bed data for leaf item %d \n");
            log.error(error, ex);
            throw new RuntimeException(error, ex);
       }

        // wrap the bed buffer as an input stream
        if(this.isLowToHigh)
            lbdis = new LittleEndianInputStream(new ByteArrayInputStream(bedBuffer));
        else
            dis = new DataInputStream(new ByteArrayInputStream(bedBuffer));

        // initialize unread data size
        remDataSize = bedBuffer.length;

        // use methods getBedData or getNextFeature to extract block data
    }

    /*
    *   Method returns all Bed features within the decompressed block buffer
    *
    *   Parameters:
    *       selectionRegion - chromosome region for selecting Bed features
    *       contained - indicates selected data must be contained in selection region
    *           if true, else may intersect selection region
    *
    *   Returns:
    *      Bed feature items in the data block
    *
    *   Note: Remaining bytes to data block are used to determine end of reading
    *   since a zoom record count for the data block is not known.
    * */
    public ArrayList getBedData(RPChromosomeRegion selectionRegion,
                                                boolean contained) {
        int itemNumber = 0;
        int chromID, chromStart, chromEnd;
        String restOfFields;
        int itemHitValue;

        // chromID + chromStart + chromEnd + rest 0 byte
        // 0 byte for "restOfFields" is always present for bed data
        int minItemSize = 3 * 4 + 1;

        // allocate the bed feature array list
        bedFeatureList = new ArrayList();

        // check if all leaf items are selection hits
        RPChromosomeRegion itemRegion = new RPChromosomeRegion( leafHitItem.getChromosomeBounds());
        int leafHitValue = itemRegion.compareRegions(selectionRegion);
        
        try {
            for(int index = 0; remDataSize > 0; ++index) {
                itemNumber = index + 1;

                // read in BigBed item fields - BBFile Table I
                if(isLowToHigh){
                    chromID = lbdis.readInt();
                    chromStart= lbdis.readInt();
                    chromEnd = lbdis.readInt();
                    restOfFields = lbdis.readString();
                }
                else{
                    chromID = dis.readInt();
                    chromStart= dis.readInt();
                    chromEnd = dis.readInt();
                    restOfFields = dis.readUTF();
                }

                if(leafHitValue == 0) {     // contained leaf region items always added
                    String chromosome = chromosomeMap.get(chromID);
                    BedFeature bbItem = new BedFeature(itemNumber, chromosome,
                         chromStart, chromEnd, restOfFields);
                    bedFeatureList.add(bbItem);
                }
                else {                      // test for hit
                    itemRegion = new RPChromosomeRegion(chromID, chromStart, chromID, chromEnd);
                    itemHitValue = itemRegion.compareRegions(selectionRegion);

                    // abs(itemHitValue) == 1 for intersection; itemHitValue == 0 for contained
                    if(!contained && Math.abs(itemHitValue) < 2 ||
                            itemHitValue == 0) {
                        // add bed feature to item selection list
                        String chromosome = chromosomeMap.get(chromID);
                        BedFeature bbItem = new BedFeature(itemNumber, chromosome,
                             chromStart, chromEnd, restOfFields);
                        bedFeatureList.add(bbItem);
                    }
                }

                // compute data block remainder from size of item read
                // todo: check that restOfFields.length() does not also include 0 byte terminator
                remDataSize -= minItemSize + restOfFields.length();
            }

        }catch(IOException ex) {
            log.error("Read error for Bed data item " + itemNumber);

            // accept this as an end of block condition unless no items were read
            if(itemNumber == 1)
                throw new RuntimeException("Read error for Bed data item " + itemNumber);
        }

        return bedFeatureList;
    }

    public void print() {

        log.debug("BigBed data for " + bedFeatureList.size() + " items");

        for(int index = 0; index <= bedFeatureList.size(); ++index) {
            // BigBed data items print themselves
            bedFeatureList.get(index).print();
         }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy