All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.samtools.BinaryBAMShardIndexWriter Maven / Gradle / Ivy

package htsjdk.samtools;

import htsjdk.samtools.util.BinaryCodec;

import java.io.IOException;
import java.io.OutputStream;
import java.util.List;

/**
 * Adapted from HTSJDK Binary BAMIndexWriter,
 * See https://github.com/samtools/htsjdk/blob/master/src/java/htsjdk/samtools/BinaryBAMIndexWriter.java
 * Only writes header for the first reference.
 */
public class BinaryBAMShardIndexWriter implements BAMIndexWriter {
  private final BinaryCodec codec;

  /**
   * @param nRef Number of reference sequences. If zero is passed then header is not written.
   * This is useful in sharded writing as we only want the header written for the first shard.
   * 
   * @param output BAM index output stream.  This stream will be closed when BinaryBAMIndexWriter.close() is called.
   */
  public BinaryBAMShardIndexWriter(final int nRef, final OutputStream output) {
      try {
          codec = new BinaryCodec(output);
          if (nRef > 0) {
            writeHeader(nRef);
          }
      } catch (final Exception e) {
          throw new SAMException("Exception opening output stream", e);
      }
  }

  /**
   * Write this content as binary output
   */
  @Override
  public void writeReference(final BAMIndexContent content) {

      if (content == null) {
          writeNullContent();
          return;
      }

      // write bins

      final BAMIndexContent.BinList bins = content.getBins();
      final int size = bins == null ? 0 : content.getNumberOfNonNullBins();

      if (size == 0) {
          writeNullContent();
          return;
      }

      //final List chunks = content.getMetaData() == null ? null
      //        : content.getMetaData().getMetaDataChunks();
      final BAMIndexMetaData metaData = content.getMetaData();

      codec.writeInt(size + ((metaData != null)? 1 : 0 ));
      // codec.writeInt(size);
      for (final Bin bin : bins) {   // note, bins will always be sorted
          if (bin.getBinNumber() == GenomicIndexUtil.MAX_BINS)
              continue;
          writeBin(bin);
      }

      // write metadata "bin" and chunks        
      if (metaData != null)
          writeChunkMetaData(metaData);

      // write linear index

      final LinearIndex linearIndex = content.getLinearIndex();
      final long[] entries = linearIndex == null ? null : linearIndex.getIndexEntries();
      final int indexStart = linearIndex == null ? 0 : linearIndex.getIndexStart();
      final int n_intv = entries == null ? indexStart : entries.length + indexStart;
      codec.writeInt(n_intv);
      if (entries == null) {
          return;
      }
      // since indexStart is usually 0, this is usually a no-op
      for (int i = 0; i < indexStart; i++) {
          codec.writeLong(0);
      }
      for (int k = 0; k < entries.length; k++) {
          codec.writeLong(entries[k]);
      }
      try {
          codec.getOutputStream().flush();
      } catch (final IOException e) {
          throw new SAMException("IOException in BinaryBAMIndexWriter reference " + content.getReferenceSequence(), e);
      }
  }

  /**
   * Writes out the count of records without coordinates
   *
   * @param count
   */
  @Override
  public void writeNoCoordinateRecordCount(final Long count) {
      codec.writeLong(count == null ? 0 : count);
  }

  /**
   * Any necessary processing at the end of the file
   */
  @Override
  public void close() {
      codec.close();
  }

  private void writeBin(final Bin bin) {
      final int binNumber = bin.getBinNumber();
      if (binNumber >= GenomicIndexUtil.MAX_BINS){
          throw new SAMException("Unexpected bin number when writing bam index " + binNumber);
      }
      
      codec.writeInt(binNumber);
      if (bin.getChunkList() == null){
          codec.writeInt(0);
          return;
      }
      final List chunkList = bin.getChunkList();
      final int n_chunk = chunkList.size();
      codec.writeInt(n_chunk);
      for (final Chunk c : chunkList) {
          codec.writeLong(c.getChunkStart());
          codec.writeLong(c.getChunkEnd());
      }
  }

  /**
   * Write the meta data represented by the chunkLists associated with bin MAX_BINS 37450
   *
   * @param metaData information describing numAligned records, numUnAligned, etc
   */
  private void writeChunkMetaData(final BAMIndexMetaData metaData) {
      codec.writeInt(GenomicIndexUtil.MAX_BINS);
      final int nChunk = 2;
      codec.writeInt(nChunk);
      codec.writeLong(metaData.getFirstOffset());
      codec.writeLong(metaData.getLastOffset());
      codec.writeLong(metaData.getAlignedRecordCount());
      codec.writeLong(metaData.getUnalignedRecordCount());

  }

  private void writeHeader(int nRef) {
      // magic string
      final byte[] magic = BAMFileConstants.BAM_INDEX_MAGIC;
      codec.writeBytes(magic);
      codec.writeInt(nRef);
  }

  private void writeNullContent() {
      codec.writeLong(0);  // 0 bins , 0 intv
  }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy