All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.projectglow.bgen.BgenHeaderWriter.scala Maven / Gradle / Ivy

/*
 * Copyright 2019 The Glow Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.projectglow.bgen

import com.google.common.io.LittleEndianDataOutputStream

private[bgen] class BgenHeaderWriter(
    ledos: LittleEndianDataOutputStream,
    numVariants: Long,
    sampleIds: Seq[Option[String]]) {

  import BgenRecordWriter._

  private val HEADER_BLOCK_LENGTH = 20
  private val COMPRESSION_TYPE = 1 // zlib
  private val LAYOUT_TYPE = 2
  private val LAYOUT_OFFSET = 2
  private val SAMPLE_OFFSET = 31
  private val hasSampleIds: Boolean = sampleIds.nonEmpty && sampleIds.forall(_.isDefined)
  private val sampleIdBlockLength: Int = {
    if (hasSampleIds) {
      // We assume that the characters are ASCII (one byte in UTF-8)
      8 + 2 * sampleIds.length + sampleIds.map(_.get.length).sum
    } else {
      0
    }
  }

  /**
   *   Bit  | Value | Description
   * =======+=======+============================================
   *   0-1  |   1   | SNP blocks compressed using zlib
   * ------------------------------------------------------------
   *   2-5  |   2   | SNP blocks laid out according to Layout 2
   * ------------------------------------------------------------
   *   31   |  0/1  | 1 if sample IDs stored in file, 0 otherwise
   * ------------------------------------------------------------
   */
  private def writeFlags(): Unit = {
    val hasSampleIdsFlag = if (hasSampleIds) 1 else 0
    val flags = COMPRESSION_TYPE +
      (LAYOUT_TYPE << LAYOUT_OFFSET) +
      (hasSampleIdsFlag << SAMPLE_OFFSET)
    ledos.writeInt(flags)
  }

  /**
   * # Bytes | Description
   * ========+=================================
   *    4    | Length of the header block = L_H
   * ------------------------------------------
   *    4    | Number of variants
   * ------------------------------------------
   *    4    | Number of samples
   * ------------------------------------------
   *    4    | Magic numbers (b, g, e, n)
   * ------------------------------------------
   *    0    | Free data
   * ------------------------------------------
   *    4    | Flags
   * ------------------------------------------
   */
  private def writeHeaderBlock(): Unit = {
    writeUnsignedInt(HEADER_BLOCK_LENGTH, ledos)
    writeUnsignedInt(numVariants.toInt, ledos)
    writeUnsignedInt(sampleIds.length, ledos)
    // Write magic numbers
    ledos.writeByte('b')
    ledos.writeByte('g')
    ledos.writeByte('e')
    ledos.writeByte('n')
    writeFlags()
  }

  /**
   * # Bytes | Description
   * ========+==================================
   *    4    | Length of bytes in block = L_(SI)
   * -------------------------------------------
   *    4    | Number of samples = N
   * -------------------------------------------
   *    2    | Length of sample ID 1
   * -------------------------------------------
   *  L_(s1) | Sample ID 1
   * -------------------------------------------
   *    2    | Length of sample ID 2
   * -------------------------------------------
   *  L_(s2) | Sample ID 2
   * -------------------------------------------
   *   ...   | ...
   * -------------------------------------------
   *    2    | Length of sample ID N
   * -------------------------------------------
   *  L_(sN) | Sample ID N
   * -------------------------------------------
   */
  private def maybeWriteSampleIdBlock(): Unit = {
    if (hasSampleIds) {
      writeUnsignedInt(sampleIdBlockLength, ledos)
      writeUnsignedInt(sampleIds.length, ledos)
      sampleIds.foreach { sid =>
        writeUTF8String(sid.get, false, ledos)
      }
    }
  }

  /**
   * # Bytes | Description
   * ========+=========================================================
   *    4    | Offset of the first byte of the first variant data block
   * ------------------------------------------------------------------
   *   L_H   | Header block
   * ------------------------------------------------------------------
   *  L_(SI) | Sampler identifier block (if present)
   * ------------------------------------------------------------------
   */
  def writeHeader(): Unit = {
    writeUnsignedInt(HEADER_BLOCK_LENGTH + sampleIdBlockLength, ledos)
    writeHeaderBlock()
    maybeWriteSampleIdBlock()
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy