All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fulcrumgenomics.fastq.FastqSource.scala Maven / Gradle / Ivy

The newest version!
/*
 * The MIT License
 *
 * Copyright (c) 2016 Fulcrum Genomics LLC
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package com.fulcrumgenomics.fastq

import java.io._

import com.fulcrumgenomics.util.Io
import com.fulcrumgenomics.commons.CommonsDef.{PathToFastq, yieldAndThen}

import scala.io.Source

/**
  * Provides factory methods for creating a `FastqSource` from multiple types.
  */
object FastqSource {
  /** Creates a new fastq source from a sequence of lines. */
  def apply(lines: Iterable[String]): FastqSource = new FastqSource(lines.iterator)

  /** Creates a new fastq source from an iterator of lines. */
  def apply(lines: Iterator[String]): FastqSource = new FastqSource(lines)

  /** Creates a new fastq source from an input stream. */
  def apply(stream: InputStream): FastqSource = new FastqSource(Source.fromInputStream(stream).getLines())

  /** Creates a new fastq source from a source. */
  def apply(source: Source): FastqSource = new FastqSource(source.getLines(), Some(source))

  /** Creates a new fastq source from a File. */
  def apply(file: File): FastqSource = apply(path=file.toPath)

  /** Creates a new fastq source from a Path. */
  def apply(path: PathToFastq): FastqSource = apply(Io.readLines(path))

  /** Returns an iterator over multiple fastq files that ensures:
    *   1. Either all sources or no sources have more records
    *   2. That the next records from each of the sources have the same read name
    *
    * @param sources a Seq of one or more FastqSource objects
    * @return an Iterator that returns a Seq of FastqRecord, one per source
    */
  def zipped(sources: Seq[FastqSource]): Iterator[Seq[FastqRecord]] = new Iterator[Seq[FastqRecord]] {
    require(sources.nonEmpty, "No sources provided")

    def hasNext: Boolean = sources.exists(_.hasNext)

    def next(): Seq[FastqRecord] = {
      if (!this.hasNext) throw new NoSuchElementException("Calling next() when hasNext() is false.")
      require(sources.forall(_.hasNext) == sources.head.hasNext, "Sources are out of sync.")
      val records = sources.map(_.next())
      // Check that the FASTQ records all have the same name
      require(records.forall(_.name == records.head.name), "Fastqs are out of sync, found read names: " + records.map(_.name).mkString(", "))
      records
    }
  }
}


/**
  * Reads fastq records from any text based source via a reader. Ensures that lines come in
  * the expected groupings of four lines with the correct headers, and that bases and qualities
  * are of the same length.  The underlying reader is closed automatically when EOF is reached.
  */
class FastqSource private(val lines: Iterator[String],
                          private[this] val source: Option[{ def close(): Unit }] = None)
  extends Iterator[FastqRecord] with Closeable {
  
  private var nextRecord: Option[FastqRecord] = fetchNextRecord()

  /** True if calling `next()` will yield another record, false otherwise. */
  override def hasNext: Boolean = this.nextRecord.isDefined

  /** Returns the next record if available, or throws an exception if none is available. */
  override def next(): FastqRecord =  yieldAndThen(nextRecord.get) { this.nextRecord = fetchNextRecord() }

  /** Short hand to throw an illegal state exception. */
  private def illegal(msg: String): Nothing = throw new IllegalStateException(msg)

  /** Sets the current record to None, and then attempts to read the next record from the input. */
  private def fetchNextRecord() : Option[FastqRecord] = {
    this.lines.take(4).toList match {
      case Nil =>
        None
      case header :: seq :: qheader :: quals :: Nil =>
        if (!header.startsWith("@"))    illegal(s"Fastq sequence header must start with @: ${header}")
        if (!qheader.startsWith("+"))   illegal(s"Fastq quality header must start with +: ${qheader}")
        if (seq.length != quals.length) illegal(s"Sequence and qualities not same length for record: ${header}")

        // Destructure the header line into name, read number and comment
        val (fullName, comment) = header.indexWhere(_.isWhitespace) match {
          case -1 => (header.drop(1), None)
          case i  => (header.substring(1, i), Some(header.substring(i+1)))
        }

        val suffix = fullName.takeRight(2)
        val (name, readNumber) = suffix.length == 2 && suffix(0) == '/' && suffix(1).isDigit match {
          case true  => (fullName.dropRight(2), Some(fullName.last.asDigit))
          case false => (fullName, None)
        }

        Some(FastqRecord(
          name       = name,
          bases      = seq,
          quals      = quals,
          comment    = comment,
          readNumber = readNumber
        ))
      case header :: _ =>
        illegal(s"Fastq source terminated mid-record at ${header}")
    }
  }

  /** Closes the underlying reader; only necessary if EOF hasn't been reached. */
  override def close(): Unit = this.source.foreach(_.close())
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy