htsjdk.samtools.cram.CRAIIndexMerger Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of htsjdk Show documentation

A Java API for high-throughput sequencing data (HTS) formats

There is a newer version: 4.1.3

package htsjdk.samtools.cram;

import htsjdk.samtools.IndexMerger;

import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.zip.GZIPOutputStream;

/**
 * Merges CRAM index files for (headerless) parts of a CRAM file into a single index file.
 *
 * A partitioned CRAM is a directory containing the following files:
 * 
 *     A file named header containing all header bytes (CRAM header and CRAM container containing the BAM header).
 *     Zero or more files named part-00000, part-00001, ... etc, containing CRAM containers.
 *     A file named terminator containing a CRAM end-of-file marker container.
 * 
 *
 * If an index is required, a CRAM index can be generated for each (headerless) part file. These files
 * should be named .part-00000.crai, .part-00001.crai, ... etc. Note the leading . to make the files hidden.
 *
 * This format has the following properties:
 *
 * 
 *     Parts and their indexes may be written in parallel, since one part file can be written independently of the others.
 *     A CRAM file can be created from a partitioned CRAM file by merging all the non-hidden files (header, part-00000, part-00001, ..., terminator).
 *     A CRAM index can be created from a partitioned CRAM file by merging all of the hidden files with a .crai suffix. Note that this is not a simple file concatenation operation. See {@link CRAIIndexMerger}.
 * 
 *
 */
public final class CRAIIndexMerger extends IndexMerger {

  private GZIPOutputStream compressedOut;
  private long offset;

  public CRAIIndexMerger(final OutputStream out, final long headerLength) throws IOException {
    super(out, headerLength);
    this.compressedOut = new GZIPOutputStream(new BufferedOutputStream(out));
    this.offset = headerLength;
  }

  @Override
  public void processIndex(CRAIIndex index, long partLength) {
    index.getCRAIEntries()
        .forEach(e -> shift(e, offset).writeToStream(compressedOut));
    offset += partLength;
  }

  private static CRAIEntry shift(CRAIEntry entry, long offset) {
    return new CRAIEntry(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentSpan(), entry.getContainerStartByteOffset() + offset, entry.getSliceByteOffsetFromCompressionHeaderStart(), entry.getSliceByteSize());
  }

  @Override
  public void finish(long dataFileLength) throws IOException {
    compressedOut.flush();
    compressedOut.close();
  }
}