
com.fulcrumgenomics.vcf.api.VcfSource.scala Maven / Gradle / Ivy
The newest version!
/*
* The MIT License
*
* Copyright (c) 2019 Fulcrum Genomics LLC
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package com.fulcrumgenomics.vcf.api
import java.io.Closeable
import com.fulcrumgenomics.FgBioDef._
import com.fulcrumgenomics.commons.collection.SelfClosingIterator
import com.fulcrumgenomics.commons.io.PathUtil
import htsjdk.samtools.util.CloseableIterator
import htsjdk.tribble.AbstractFeatureReader
import htsjdk.variant.bcf2.BCF2Codec
import htsjdk.variant.variantcontext.VariantContext
import htsjdk.variant.vcf.{VCFCodec, VCFFileReader, VCFHeader}
/**
* Provides a reader over a source of VCF-like data that could be a VCF file or a BCF file. Has facilities
* for iterating over the entire stream of variants as well as querying by genomic location if an
* index is present.
*
* @param reader the underlying HTSJDK [[VCFFileReader]]
* @param headerTransformer a method to transform the header after being read in.
*/
class VcfSource private(private val reader: AbstractFeatureReader[VariantContext, _],
private val headerTransformer: VcfHeader => VcfHeader) extends View[Variant] with Closeable {
/** The header associated with the VCF being read. */
val header: VcfHeader = headerTransformer(VcfConversions.toScalaHeader(reader.getHeader.asInstanceOf[VCFHeader]))
/**
* The type of iterator returned by both the [[iterator]] method as well as the [[query()]] method. Note that
* [[SelfClosingIterator]] both self-closes when it hits the end of the iterator, _and_ extends
* [[com.fulcrumgenomics.commons.collection.BetterBufferedIterator]].
*/
type VariantIterator = SelfClosingIterator[Variant]
/** Wraps an iterator provided by HTSJDK into a SelfClosingIterator that transforms VariantContexts into Variants. */
private def wrap(it: CloseableIterator[VariantContext]): VariantIterator = {
new SelfClosingIterator(
iter = it.map(vc => VcfConversions.toScalaVariant(vc, header)),
closer = () => it.close())
}
/**
* Returns an iterator over the entire stream of variants. The returned iterator may be be closed by invoking
* `close()` on it, and will automatically close itself when exhausted. Only a single iterator at a time
* is supported per [[VcfSource]], including iterators returned from [[query()]].
*/
override def iterator: VariantIterator = wrap(reader.iterator())
/** True if the VCF is sorted and indexed such that queries can be executed, false otherwise. */
def isQueryable: Boolean = this.reader.isQueryable
/**
* Returns an iterator over variants overlapping the specified genomic region.
*
* The returned iterator may be be closed by invoking `close()` on it, and will automatically close itself
* when exhausted. Only a single iterator at a time is supported per [[com.fulcrumgenomics.vcf.api.VcfSource]], including iterators
* returned from [[iterator()]].
*/
def query(chrom: String, start: Int, end: Int): Iterator[Variant] = wrap(reader.query(chrom, start, end))
/** Closes the underlying reader. */
override def close(): Unit = this.reader.safelyClose()
/** Required for 2.12 compat. */
protected def underlying: VcfSource = this
}
object VcfSource {
/**
* Manufactures a variant source for reading from the specified path. The index, if one exists, will be
* auto-discovered based on the path to the VCF.
*
* @param path the path to a VCF, gzipped VCF or BCF file
* @return a VariantSource for reading from the path given
*/
def apply(path: PathToVcf): VcfSource = {
this.apply(path=path, headerTransformer=identity)
}
/**
* Manufactures a variant source for reading from the specified path. The index, if one exists, will be
* auto-discovered based on the path to the VCF.
*
* @param path the path to a VCF, gzipped VCF or BCF file
* @param headerTransformer a method to transform the header after being read in
* @return a VariantSource for reading from the path given
*/
def apply(path: PathToVcf, headerTransformer: VcfHeader => VcfHeader): VcfSource = {
val codec = if (PathUtil.extensionOf(path).contains(".bcf")) {
new BCF2Codec
}
else {
val c = new VCFCodec
c.disableOnTheFlyModifications()
c
}
val reader = AbstractFeatureReader.getFeatureReader(path.toUri.toString, codec, false)
new VcfSource(reader, headerTransformer=headerTransformer)
}
/** Return the only sample in the VCF source otherwise raise an exception. */
def onlySample(source: VcfSource): String = {
source.header.samples.toList match {
case head :: Nil => head
case _ => throw new IllegalArgumentException(s"Source is not single-sample. Found samples: ${source.header.samples.mkString(", ")}")
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy