All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.projectglow.plink.BedFileIterator.scala Maven / Gradle / Ivy

/*
 * Copyright 2019 The Glow Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.projectglow.plink

import com.google.common.io.LittleEndianDataInputStream
import org.apache.hadoop.fs.FSDataInputStream

/**
 * Parses genotype blocks of a BED file. The iterator assumes that the input streams are currently at the beginning of a
 * genotype block.
 *
 * BED standard: https://www.cog-genomics.org/plink/1.9/formats#bed
 *
 * This class does not currently support the entire BED standard. Limitations:
 * - Only variant-major BEDs are supported.
 *
 * @param stream Data stream that records are read from. Must be little-endian.
 * @param underlyingStream Hadoop input stream that underlies the little-endian data stream. Only
 *                         used for cleaning up when there are no genotype blocks left.
 * @param numBlocks The number of genotype blocks to be read. `hasNext` will return `false` once we've read `numBlocks`
 *                  blocks.
 * @param blockSize The size of a block in bytes; equal to `ceil(numSamples / 4)`
 */
class BedFileIterator(
    stream: LittleEndianDataInputStream,
    underlyingStream: FSDataInputStream,
    numBlocks: Int,
    blockSize: Int)
    extends Iterator[Array[Byte]] {

  var blockIdx = 0
  val byteArray: Array[Byte] = new Array[Byte](blockSize)

  def hasNext(): Boolean = {
    val ret = blockIdx < numBlocks
    if (!ret) {
      cleanup()
    }
    ret
  }

  def next(): Array[Byte] = {
    blockIdx += 1
    stream.readFully(byteArray)
    byteArray
  }

  private def cleanup(): Unit = {
    underlyingStream.close()
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy