org.seqdoop.hadoop_bam.util.SAMOutputPreparer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hadoop-bam Show documentation
Show all versions of hadoop-bam Show documentation
A Java library for the manipulation of files in common bioinformatics formats using the Hadoop MapReduce framework.
// Copyright (c) 2012 Aalto University
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
// File created: 2012-07-26 14:36:03
package org.seqdoop.hadoop_bam.util;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.cram.common.CramVersions;
import htsjdk.samtools.util.BlockCompressedOutputStream;
import org.seqdoop.hadoop_bam.SAMFormat;
public class SAMOutputPreparer {
private ByteBuffer buf;
public SAMOutputPreparer() {
// Enough room for a 32-bit integer.
buf = ByteBuffer.wrap(new byte[4]);
buf.order(ByteOrder.LITTLE_ENDIAN);
}
public static final byte[] BAM_MAGIC = {'B','A','M', 1};
/** Prepares the given output stream for writing of SAMRecords in the given
* format. This includes writing the given SAM header and, in the case of
* BAM or CRAM, writing some further metadata as well as compressing everything
* written. Returns a new stream to replace the original: it will do the
* appropriate compression for BAM/CRAM files.
*/
public OutputStream prepareForRecords(
OutputStream out, final SAMFormat format,
final SAMFileHeader header)
throws IOException {
switch (format) {
case SAM:
out = prepareSAMOrBAMStream(out, format, header);
break;
case BAM:
out = prepareSAMOrBAMStream(out, format, header);
break;
case CRAM:
out = prepareCRAMStream(out, format, header);
break;
default:
throw new IllegalArgumentException
("Unsupported SAM file format, must be one of SAM, BAM or CRAM");
}
// Important for BAM: if the caller doesn't want to use the new stream
// for some reason, the BlockCompressedOutputStream's buffer would never
// be flushed.
out.flush();
return out;
}
private OutputStream prepareCRAMStream(
OutputStream out, final SAMFormat format,
final SAMFileHeader header) throws IOException
{
CramIO.writeHeader(CramVersions.DEFAULT_CRAM_VERSION, out, header, null);
return out;
}
private OutputStream prepareSAMOrBAMStream(
OutputStream out, final SAMFormat format,
final SAMFileHeader header) throws IOException
{
final StringWriter sw = new StringWriter();
new SAMTextHeaderCodec().encode(sw, header);
final String text = sw.toString();
if (format == SAMFormat.BAM) {
out = new BlockCompressedOutputStream(out, null);
out.write(BAM_MAGIC);
writeInt32(out, text.length());
}
writeString(out, text);
if (format == SAMFormat.BAM) {
final List refs =
header.getSequenceDictionary().getSequences();
writeInt32(out, refs.size());
for (final SAMSequenceRecord ref : refs) {
final String name = ref.getSequenceName();
writeInt32(out, name.length() + 1);
writeString(out, name);
out.write(0);
writeInt32(out, ref.getSequenceLength());
}
}
return out;
}
private static void writeString(final OutputStream out, final String s)
throws IOException
{
// Don't flush the underlying stream yet, only the writer: in the case of
// BAM, we might be able to cram more things into the gzip block still.
final OutputStreamWriter w = new OutputStreamWriter(
new FilterOutputStream(out) { @Override public void flush() {} } );
w.write(s);
w.flush();
}
private void writeInt32(final OutputStream out, int n) throws IOException {
buf.putInt(0, n);
out.write(buf.array());
}
}