![JAR search and dependency download from the Maven repository](/logo.png)
com.google.cloud.genomics.dataflow.readers.bam.BAMShard Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of google-genomics-dataflow Show documentation
Show all versions of google-genomics-dataflow Show documentation
Google Genomics Dataflow pipelines.
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.genomics.dataflow.readers.bam;
import com.google.cloud.genomics.utils.Contig;
import com.google.common.collect.Lists;
import htsjdk.samtools.BAMFileIndexImpl;
import htsjdk.samtools.Chunk;
import htsjdk.samtools.SAMFileSpanImpl;
import java.io.Serializable;
import java.util.List;
/**
* A shard of BAM data we will create during sharding and then use to drive the reading.
* We use this class during shard generation, by iteratively building
* a shard, extending it bin by bin (@see #addBin)
* At the end of the process, the shard is finalized (@see #finalize)
* and SAMFileSpan that has all the chunks we want to read is produced.
*/
public class BAMShard implements Serializable {
public String file;
public SAMFileSpanImpl span;
public Contig contig;
public List chunks;
public long cachedSizeInBytes = -1;
/**
* Begins a new shard with an empty chunk list and a starting locus.
*/
public BAMShard(String file, String referenceName, long firstLocus) {
this.file = file;
this.contig = new Contig(referenceName, firstLocus, -1);
this.chunks = Lists.newLinkedList();
this.span = null;
}
/**
* Creates a shard with a known file span.
* Such shard is not expected to be extended and calling addBin or finalize on it will fail.
* This constructor is used for "degenerate" shards like unmapped reads or
* all reads in cases where we don't have an index.
*/
public BAMShard(String file, SAMFileSpanImpl span, Contig contig) {
this.file = file;
this.span = span;
this.contig = contig;
this.chunks = null;
}
/**
* Appends chunks from another bin to the list and moved the end position.
*/
public void addBin(List chunksToAdd, long lastLocus) {
assert chunks != null;
contig = new Contig(contig.referenceName, contig.start, lastLocus);
chunks.addAll(chunksToAdd);
updateSpan();
}
/**
* Generates a final list of chunks, now that we know the exact bounding loci
* for this shard. We get all chunks overlapping this loci, and then ask the index
* for the chunks overlapping them.
*/
public BAMShard finalize(BAMFileIndexImpl index, long lastLocus) {
contig = new Contig(contig.referenceName, contig.start, lastLocus);
this.chunks = index.getChunksOverlapping(contig.referenceName,
(int)contig.start, (int)contig.end);
updateSpan();
return this;
}
/**
* Updates the underlying file span by optimizing and coalescing the current chunk list.
*/
private void updateSpan() {
span = new SAMFileSpanImpl(Chunk.optimizeChunkList(chunks, this.contig.start));
cachedSizeInBytes = -1;
}
public long sizeInLoci() {
return contig.end > 0 ? contig.end - contig.start : 0;
}
public long approximateSizeInBytes() {
if (cachedSizeInBytes < 0) {
cachedSizeInBytes = span.approximateSizeInBytes();
}
return cachedSizeInBytes;
}
@Override
public String toString() {
String str = file + ": " + contig.toString() + ", locus size = " + sizeInLoci();
if (cachedSizeInBytes >= 0) {
// Only return this as part of the string if it's already cached and calculated.
// Otherwise calling this function causes the object to be mutated.
str += ", span size = " + cachedSizeInBytes;
}
return str;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy