org.hammerlab.genomics.readsets.io.InputConfig.scala Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of readsets_2.10 Show documentation

readsets

The newest version!

package org.hammerlab.genomics.readsets.io

import org.hammerlab.genomics.loci.parsing.{All, ParsedLoci}

/**
 * Configuring how/which reads are loaded can be an important optimization.
 *
 * Most of these fields are commonly used filters. For boolean fields, setting a field to true will result in filtering
 * on that field. The result is the intersection of the filters (i.e. reads must satisfy ALL filters).
 *
 * @param overlapsLociOpt if set, include only mapped reads that overlap the given loci
 * @param nonDuplicate include only reads that do not have the duplicate bit set
 * @param passedVendorQualityChecks include only reads that do not have the failedVendorQualityChecks bit set
 * @param isPaired include only reads are paired-end reads
 * @param minAlignmentQualityOpt Minimum Phred-scaled alignment score for a read
 * @param maxSplitSizeOpt Maximum on-disk size to allow Hadoop splits to be; useful to set below the on-disk block-size
 *                        for BAMs where the reads compress extra-well, resulting in unwieldy numbers of reads per
 *                        fixed-disk-size partition.
 */
case class InputConfig(overlapsLociOpt: Option[ParsedLoci],
                       nonDuplicate: Boolean,
                       passedVendorQualityChecks: Boolean,
                       isPaired: Boolean,
                       minAlignmentQualityOpt: Option[Int],
                       maxSplitSizeOpt: Option[Long]) {
  def loci = overlapsLociOpt.getOrElse(All)
}

object InputConfig {
  val empty =
    InputConfig(
      overlapsLociOpt = None,
      nonDuplicate = false,
      passedVendorQualityChecks = false,
      isPaired = false,
      minAlignmentQualityOpt = None,
      maxSplitSizeOpt = None
    )
}