All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.seqdoop.hadoop_bam.util.BGZFCodec Maven / Gradle / Ivy

Go to download

A Java library for the manipulation of files in common bioinformatics formats using the Hadoop MapReduce framework.

There is a newer version: 7.10.0
Show newest version
package org.seqdoop.hadoop_bam.util;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.fs.Seekable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.io.compress.SplitCompressionInputStream;
import org.apache.hadoop.io.compress.SplittableCompressionCodec;

/**
 * A Hadoop {@link CompressionCodec} for the
 * BGZF compression format,
 * which reads and writes files with a .bgz suffix. There is no standard
 * suffix for BGZF-compressed files, and in fact .gz is commonly used, in
 * which case {@link BGZFEnhancedGzipCodec} should be used instead of this class.
 * 

* To use BGZFCodec, set it on the configuration object as follows. *

* {@code * conf.set("io.compression.codecs", BGZFCodec.class.getCanonicalName()) * } * @see BGZFEnhancedGzipCodec */ public class BGZFCodec extends GzipCodec implements SplittableCompressionCodec { public static final String DEFAULT_EXTENSION = ".bgz"; @Override public CompressionOutputStream createOutputStream(OutputStream out) throws IOException { return new BGZFCompressionOutputStream(out); } // compressors are not used, so ignore/return null @Override public CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor) throws IOException { return createOutputStream(out); // compressors are not used, so ignore } @Override public Class getCompressorType() { return null; // compressors are not used, so return null } @Override public Compressor createCompressor() { return null; // compressors are not used, so return null } @Override public SplitCompressionInputStream createInputStream(InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException { BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn); long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end); ((Seekable)seekableIn).seek(adjustedStart); return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end); } // fall back to GzipCodec for input streams without a start position @Override public String getDefaultExtension() { return DEFAULT_EXTENSION; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy