All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.seqdoop.hadoop_bam.util.SAMOutputPreparer Maven / Gradle / Ivy

Go to download

A Java library for the manipulation of files in common bioinformatics formats using the Hadoop MapReduce framework.

There is a newer version: 7.10.0
Show newest version
// Copyright (c) 2012 Aalto University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.

// File created: 2012-07-26 14:36:03

package org.seqdoop.hadoop_bam.util;

import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.cram.common.CramVersions;
import htsjdk.samtools.util.BlockCompressedOutputStream;

import org.seqdoop.hadoop_bam.SAMFormat;

public class SAMOutputPreparer {
	private ByteBuffer buf;

	public SAMOutputPreparer() {
		// Enough room for a 32-bit integer.
		buf = ByteBuffer.wrap(new byte[4]);
		buf.order(ByteOrder.LITTLE_ENDIAN);
	}

	public static final byte[] BAM_MAGIC = {'B','A','M', 1};

	/** Prepares the given output stream for writing of SAMRecords in the given
	 * format. This includes writing the given SAM header and, in the case of
	 * BAM or CRAM, writing some further metadata as well as compressing everything
	 * written. Returns a new stream to replace the original: it will do the
	 * appropriate compression for BAM/CRAM files.
	 */
	public OutputStream prepareForRecords(
			OutputStream out, final SAMFormat format,
			final SAMFileHeader header)
		throws IOException {

        switch (format) {
            case SAM:
                out = prepareSAMOrBAMStream(out, format, header);
                break;
            case BAM:
                out = prepareSAMOrBAMStream(out, format, header);
                break;
            case CRAM:
                out = prepareCRAMStream(out, format, header);
                break;
            default:
                throw new IllegalArgumentException
                    ("Unsupported SAM file format, must be one of SAM, BAM or CRAM");
        }

        // Important for BAM: if the caller doesn't want to use the new stream
        // for some reason, the BlockCompressedOutputStream's buffer would never
        // be flushed.
        out.flush();
        return out;
	}

	private OutputStream prepareCRAMStream(
			OutputStream out, final SAMFormat format,
	        final SAMFileHeader header)  throws IOException
	{
		CramIO.writeHeader(CramVersions.DEFAULT_CRAM_VERSION, out, header, null);
		return out;
	}

	private OutputStream prepareSAMOrBAMStream(
			OutputStream out, final SAMFormat format,
			final SAMFileHeader header) throws IOException
	{
		final StringWriter sw = new StringWriter();
		new SAMTextHeaderCodec().encode(sw, header);
		final String text = sw.toString();

		if (format == SAMFormat.BAM) {
			out = new BlockCompressedOutputStream(out, null);
			out.write(BAM_MAGIC);
			writeInt32(out, text.length());
		}

		writeString(out, text);

		if (format == SAMFormat.BAM) {
			final List refs =
				header.getSequenceDictionary().getSequences();

			writeInt32(out, refs.size());

			for (final SAMSequenceRecord ref : refs) {
				final String name = ref.getSequenceName();
				writeInt32(out, name.length() + 1);
				writeString(out, name);
				out.write(0);
				writeInt32(out, ref.getSequenceLength());
			}
		}

		return out;
	}

	private static void writeString(final OutputStream out, final String s)
		throws IOException
	{
		// Don't flush the underlying stream yet, only the writer: in the case of
		// BAM, we might be able to cram more things into the gzip block still.
		final OutputStreamWriter w = new OutputStreamWriter(
			new FilterOutputStream(out) { @Override public void flush() {} } );
		w.write(s);
		w.flush();
	}

	private void writeInt32(final OutputStream out, int n) throws IOException {
		buf.putInt(0, n);
		out.write(buf.array());
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy