
com.fulcrumgenomics.testing.SamBuilder.scala Maven / Gradle / Ivy
The newest version!
/*
* The MIT License
*
* Copyright (c) 2017 Fulcrum Genomics LLC
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package com.fulcrumgenomics.testing
import com.fulcrumgenomics.FgBioDef._
import com.fulcrumgenomics.alignment.Cigar
import com.fulcrumgenomics.bam.api.SamRecord.{MissingBases, MissingQuals}
import com.fulcrumgenomics.bam.api.{SamOrder, SamRecord, SamSource, SamWriter}
import com.fulcrumgenomics.fasta.{SequenceDictionary, SequenceMetadata}
import com.fulcrumgenomics.testing.SamBuilder._
import htsjdk.samtools.SamPairUtil.PairOrientation
import htsjdk.samtools._
import java.nio.file.Files
import java.text.DecimalFormat
import java.util
import java.util.concurrent.atomic.AtomicLong
import scala.collection.mutable.ArrayBuffer
import scala.util.Random
object SamBuilder {
sealed trait Strand {
val isNegative: Boolean
override def toString(): String = this.getClass.getSimpleName.replaceFirst("[$].*$", "")
}
object Plus extends Strand { val isNegative = false }
object Minus extends Strand { val isNegative = true }
val DefaultSampleName: String = "Sample"
val DefaultReadGroupId: String = "A"
}
/** Class to create sets of [[com.fulcrumgenomics.bam.api.SamRecord]]s for testing. */
class SamBuilder(val readLength: Int=100,
val baseQuality: Int=30,
val sampleName: String = DefaultSampleName,
val sort: Option[SamOrder] = None,
val readGroupId: Option[String] = None,
sd: Option[SequenceDictionary] = None,
seed: Int = 42
) extends Iterable[SamRecord] {
import com.fulcrumgenomics.fasta.Converters._
// Setup the header, sequence dictionary and read group
val header = new SAMFileHeader()
sort.getOrElse(SamOrder.Unsorted).applyTo(header)
{ // Build the default dictionary
val dict: SequenceDictionary = sd.getOrElse {
val seqs = (Range.inclusive(1, 22) ++ Seq("X", "Y", "M")).map { chr =>
SequenceMetadata(name="chr" + chr, length=200e6.toInt)
}
SequenceDictionary(seqs:_*)
}
header.setSequenceDictionary(dict.asSam)
}
/** Shorter accessor for the sequence dictionary. */
lazy val dict: SequenceDictionary = header.getSequenceDictionary.fromSam
val rg = new SAMReadGroupRecord(readGroupId.getOrElse(DefaultReadGroupId))
rg.setSample(sampleName)
header.addReadGroup(rg)
private val random = new Random(seed)
private val bases = "ACGT".toCharArray
private val counter = new AtomicLong(0)
private val format = new DecimalFormat("0000")
/** The collection of records being accumulated. */
private val records = ArrayBuffer[SamRecord]()
/** The default string of qualities to use. */
protected val defaultQuals: String = (33 + baseQuality).toChar.toString * readLength
/** Generate a sequential name. */
protected def nextName: String = format.format(counter.getAndIncrement())
/** Generate a random sequence of bases of length readLength. */
protected def randomBases: String = {
val chs = new Array[Char](readLength)
forloop (from=0, until=readLength) { i =>
chs(i) = bases(random.nextInt(bases.length))
}
new String(chs)
}
/** Adds a pair of reads to the file. */
def addPair(name: String = nextName,
bases1 : String = randomBases,
bases2: String = randomBases,
quals1: String = defaultQuals,
quals2: String = defaultQuals,
contig: Int = 0,
contig2: Option[Int] = None,
start1: Int = SAMRecord.NO_ALIGNMENT_START,
start2: Int = SAMRecord.NO_ALIGNMENT_START,
unmapped1: Boolean = false,
unmapped2: Boolean = false,
cigar1: String = s"${readLength}M",
cigar2: String = s"${readLength}M",
mapq1: Int = 60,
mapq2: Int = 60,
strand1: Strand = Plus,
strand2: Strand = Minus,
attrs: Map[String,Any] = Map.empty
): Seq[SamRecord] = {
val cig1 = Cigar(cigar1)
val cig2 = Cigar(cigar2)
require(bases1 == MissingBases || quals1 == MissingQuals || bases1.length == quals1.length, "bases1 and quals1 were different lengths.")
require(bases2 == MissingBases || quals2 == MissingQuals || bases2.length == quals2.length, "bases2 and quals2 were different lengths.")
require(unmapped1 || bases1 == MissingBases || bases1.length == cig1.lengthOnQuery, "bases1 doesn't agree with cigar on length.")
require(unmapped2 || bases2 == MissingBases || bases2.length == cig2.lengthOnQuery, "bases2 doesn't agree with cigar on length.")
require(unmapped1 || quals1 == MissingQuals || quals1.length == cig1.lengthOnQuery, "quals1 doesn't agree with cigar on length.")
require(unmapped2 || quals2 == MissingQuals || quals2.length == cig2.lengthOnQuery, "quals2 doesn't agree with cigar on length.")
val r1 = SamRecord(header)
r1.pf = true
r1.name = name
r1.bases = bases1
r1.quals = quals1
r1.refIndex = if (start1 == SAMRecord.NO_ALIGNMENT_START) SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX else contig
r1.start = start1
r1.positiveStrand = strand1 == Plus
r1.paired = true
r1.firstOfPair = true
r1.unmapped = unmapped1
if (!unmapped1) {
r1.cigar = cig1
r1.mapq = mapq1
}
r1("RG") = this.rg.getId
attrs.foreach { case (key, value) => r1(key) = value }
val r2 = SamRecord(header)
r2.pf = true
r2.name = name
r2.bases = bases2
r2.quals = quals2
r2.refIndex = if (start2 == SAMRecord.NO_ALIGNMENT_START) SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX else contig2.getOrElse(contig)
r2.start = start2
r2.positiveStrand = strand2 == Plus
r2.paired = true
r2.secondOfPair = true
r2.unmapped = unmapped2
if (!unmapped2) {
r2.cigar = cig2
r2.mapq = mapq2
}
r2("RG") = this.rg.getId
attrs.foreach { case (key, value) => r2(key) = value }
val orientations = util.Arrays.asList(PairOrientation.values(): _*)
SamPairUtil.setProperPairAndMateInfo(r1.asSam, r2.asSam, orientations, true)
val recs = Seq(r1, r2)
this.records ++= recs
recs
}
/** Adds a non-paired read. Returns an Option for convenience of calling map/foreach etc. */
def addFrag(name: String = nextName,
bases: String = randomBases,
quals: String = defaultQuals,
contig: Int = 0,
start: Int = SAMRecord.NO_ALIGNMENT_START,
unmapped: Boolean = false,
cigar: String = s"${readLength}M",
mapq: Int = 60,
strand: Strand = Plus,
attrs: Map[String,Any] = Map.empty) : Option[SamRecord] = {
val cig = Cigar.fromSam(cigar)
require(bases.length == quals.length, "bases and quals must be same length.")
require((bases == MissingBases && quals == MissingQuals) || (bases != MissingBases && quals != MissingQuals),
s"Bases and quals must both either be missing or not missing: bases=$bases quals=$quals")
require(unmapped || bases == SamRecord.MissingBases || bases.length == cig.lengthOnQuery, "bases and cigar disagree on length")
val r1 = SamRecord(header)
r1.pf = true
r1.name = name
r1.bases = bases
r1.quals = quals
r1.refIndex = if (start == SAMRecord.NO_ALIGNMENT_START) SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX else contig
r1.start = start
r1.positiveStrand = strand == Plus
r1.unmapped = unmapped
if (!unmapped) {
r1.cigar = cig
r1.mapq = mapq
}
r1("RG") = this.rg.getId
attrs.foreach { case (key, value) => r1(key) = value }
this.records += r1
Some(r1)
}
/** Returns an iterator over the records that have been built. */
override def iterator: Iterator[SamRecord] = this.sort match {
case None => this.records.iterator
case Some(so) if so == SamOrder.Unsorted || so == SamOrder.Unknown => this.records.iterator
case Some(so) => this.records.toIndexedSeq.sortBy(so.sortkey).iterator
}
/** Returns the number of records that have been built. */
override def size: Int = this.records.size
/** Adds all the records from another builder to this one. */
def ++= (that: Iterable[SamRecord]) : Unit = this.records ++= that
/** Writes the contents of the record set to the provided file path. */
def write(path: PathToBam) : Unit = {
val writer = SamWriter(path, header, sort=sort)
writer ++= this.records
writer.close()
}
/** Writes the contents to a temporary file that will be deleted when the JVM exits. */
def toTempFile(deleteOnExit: Boolean = true): PathToBam = {
val path = Files.createTempFile("SamRecordSet.", ".bam")
if (deleteOnExit) path.toFile.deleteOnExit()
write(path)
path
}
/** Creates a SamReader over the records stored in a temporary file. */
def toSource: SamSource = {
val bam = toTempFile()
SamSource(bam)
}
/** Clears the records built by this builder. */
def clear(): Unit = this.records.clear()
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy