org.apache.hadoop.io.file.tfile.Compression Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in org.apache.hadoop.shaded.com.liance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org.apache.hadoop.shaded.org.licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.file.tfile;
import java.org.apache.hadoop.shaded.io.BufferedInputStream;
import java.org.apache.hadoop.shaded.io.BufferedOutputStream;
import java.org.apache.hadoop.shaded.io.FilterOutputStream;
import java.org.apache.hadoop.shaded.io.IOException;
import java.org.apache.hadoop.shaded.io.InputStream;
import java.org.apache.hadoop.shaded.io.OutputStream;
import java.util.ArrayList;
import org.apache.hadoop.shaded.org.apache.hadoop.thirdparty.org.apache.hadoop.shaded.com.google.org.apache.hadoop.shaded.com.on.annotations.VisibleForTesting;
import org.apache.hadoop.shaded.org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.CodecPool;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.CompressionCodec;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.CompressionInputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.CompressionOutputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.Compressor;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.Decompressor;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.DefaultCodec;
import org.apache.hadoop.shaded.org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.shaded.org.slf4j.Logger;
import org.apache.hadoop.shaded.org.slf4j.LoggerFactory;
import static org.apache.hadoop.shaded.org.apache.hadoop.fs.CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_DEFAULT;
import static org.apache.hadoop.shaded.org.apache.hadoop.fs.CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_KEY;
import static org.apache.hadoop.shaded.org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;
/**
* Compression related stuff.
*/
public final class Compression {
static final Logger LOG = LoggerFactory.getLogger(Compression.class);
/**
* Prevent the instantiation of class.
*/
private Compression() {
// nothing
}
static class FinishOnFlushCompressionStream extends FilterOutputStream {
public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
super(cout);
}
@Override
public void write(byte b[], int off, int len) throws IOException {
out.write(b, off, len);
}
@Override
public void flush() throws IOException {
CompressionOutputStream cout = (CompressionOutputStream) out;
cout.finish();
cout.flush();
cout.resetState();
}
}
/**
* Compression algorithms.
*/
public enum Algorithm {
LZO(TFile.COMPRESSION_LZO) {
private transient boolean checked = false;
private transient ClassNotFoundException cnf;
private transient boolean reinitCodecInTests;
private static final String defaultClazz =
"org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.LzoCodec";
private transient String clazz;
private transient CompressionCodec codec = null;
private String getLzoCodecClass() {
String extClazzConf = conf.get(CONF_LZO_CLASS);
String extClazz = (extClazzConf != null) ?
extClazzConf : System.getProperty(CONF_LZO_CLASS);
return (extClazz != null) ? extClazz : defaultClazz;
}
@Override
public synchronized boolean isSupported() {
if (!checked || reinitCodecInTests) {
checked = true;
reinitCodecInTests = conf.getBoolean("test.reload.lzo.codec", false);
clazz = getLzoCodecClass();
try {
LOG.info("Trying to load Lzo codec class: " + clazz);
codec =
(CompressionCodec) ReflectionUtils.newInstance(Class
.forName(clazz), conf);
} catch (ClassNotFoundException e) {
cnf = e;
}
}
return codec != null;
}
@Override
CompressionCodec getCodec() throws IOException {
if (!isSupported()) {
throw new IOException(String.format(
"LZO codec %s=%s could not be loaded", CONF_LZO_CLASS, clazz),
cnf);
}
return codec;
}
@Override
public synchronized InputStream createDecompressionStream(
InputStream downStream, Decompressor decompressor,
int downStreamBufferSize) throws IOException {
if (!isSupported()) {
throw new IOException(
"LZO codec class not specified. Did you forget to set property "
+ CONF_LZO_CLASS + "?");
}
InputStream bis1 = null;
if (downStreamBufferSize > 0) {
bis1 = new BufferedInputStream(downStream, downStreamBufferSize);
} else {
bis1 = downStream;
}
conf.setInt(IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_KEY,
IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_DEFAULT);
CompressionInputStream cis =
codec.createInputStream(bis1, decompressor);
BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
return bis2;
}
@Override
public synchronized OutputStream createCompressionStream(
OutputStream downStream, Compressor org.apache.hadoop.shaded.com.ressor,
int downStreamBufferSize) throws IOException {
if (!isSupported()) {
throw new IOException(
"LZO codec class not specified. Did you forget to set property "
+ CONF_LZO_CLASS + "?");
}
OutputStream bos1 = null;
if (downStreamBufferSize > 0) {
bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
} else {
bos1 = downStream;
}
conf.setInt(IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_KEY,
IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_DEFAULT);
CompressionOutputStream cos =
codec.createOutputStream(bos1, org.apache.hadoop.shaded.com.ressor);
BufferedOutputStream bos2 =
new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
DATA_OBUF_SIZE);
return bos2;
}
},
GZ(TFile.COMPRESSION_GZ) {
private transient DefaultCodec codec;
@Override
CompressionCodec getCodec() {
if (codec == null) {
codec = new DefaultCodec();
codec.setConf(conf);
}
return codec;
}
@Override
public synchronized InputStream createDecompressionStream(
InputStream downStream, Decompressor decompressor,
int downStreamBufferSize) throws IOException {
// Set the internal buffer size to read from down stream.
if (downStreamBufferSize > 0) {
codec.getConf().setInt(IO_FILE_BUFFER_SIZE_KEY, downStreamBufferSize);
}
CompressionInputStream cis =
codec.createInputStream(downStream, decompressor);
BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
return bis2;
}
@Override
public synchronized OutputStream createCompressionStream(
OutputStream downStream, Compressor org.apache.hadoop.shaded.com.ressor,
int downStreamBufferSize) throws IOException {
OutputStream bos1 = null;
if (downStreamBufferSize > 0) {
bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
} else {
bos1 = downStream;
}
codec.getConf().setInt(IO_FILE_BUFFER_SIZE_KEY, 32 * 1024);
CompressionOutputStream cos =
codec.createOutputStream(bos1, org.apache.hadoop.shaded.com.ressor);
BufferedOutputStream bos2 =
new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
DATA_OBUF_SIZE);
return bos2;
}
@Override
public boolean isSupported() {
return true;
}
},
NONE(TFile.COMPRESSION_NONE) {
@Override
CompressionCodec getCodec() {
return null;
}
@Override
public synchronized InputStream createDecompressionStream(
InputStream downStream, Decompressor decompressor,
int downStreamBufferSize) throws IOException {
if (downStreamBufferSize > 0) {
return new BufferedInputStream(downStream, downStreamBufferSize);
}
return downStream;
}
@Override
public synchronized OutputStream createCompressionStream(
OutputStream downStream, Compressor org.apache.hadoop.shaded.com.ressor,
int downStreamBufferSize) throws IOException {
if (downStreamBufferSize > 0) {
return new BufferedOutputStream(downStream, downStreamBufferSize);
}
return downStream;
}
@Override
public boolean isSupported() {
return true;
}
};
// We require that all org.apache.hadoop.shaded.com.ression related settings are configured
// statically in the Configuration object.
protected static final Configuration conf = new Configuration();
private final String org.apache.hadoop.shaded.com.ressName;
// data input buffer size to absorb small reads from application.
private static final int DATA_IBUF_SIZE = 1 * 1024;
// data output buffer size to absorb small writes from application.
private static final int DATA_OBUF_SIZE = 4 * 1024;
public static final String CONF_LZO_CLASS =
"org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ression.codec.lzo.class";
Algorithm(String name) {
this.org.apache.hadoop.shaded.com.ressName = name;
}
abstract CompressionCodec getCodec() throws IOException;
public abstract InputStream createDecompressionStream(
InputStream downStream, Decompressor decompressor,
int downStreamBufferSize) throws IOException;
public abstract OutputStream createCompressionStream(
OutputStream downStream, Compressor org.apache.hadoop.shaded.com.ressor, int downStreamBufferSize)
throws IOException;
public abstract boolean isSupported();
public Compressor getCompressor() throws IOException {
CompressionCodec codec = getCodec();
if (codec != null) {
Compressor org.apache.hadoop.shaded.com.ressor = CodecPool.getCompressor(codec);
if (org.apache.hadoop.shaded.com.ressor != null) {
if (org.apache.hadoop.shaded.com.ressor.finished()) {
// Somebody returns the org.apache.hadoop.shaded.com.ressor to CodecPool but is still using
// it.
LOG.warn("Compressor obtained from CodecPool already finished()");
} else {
if(LOG.isDebugEnabled()) {
LOG.debug("Got a org.apache.hadoop.shaded.com.ressor: " + org.apache.hadoop.shaded.com.ressor.hashCode());
}
}
/**
* Following statement is necessary to get around bugs in 0.18 where a
* org.apache.hadoop.shaded.com.ressor is referenced after returned back to the codec pool.
*/
org.apache.hadoop.shaded.com.ressor.reset();
}
return org.apache.hadoop.shaded.com.ressor;
}
return null;
}
public void returnCompressor(Compressor org.apache.hadoop.shaded.com.ressor) {
if (org.apache.hadoop.shaded.com.ressor != null) {
if(LOG.isDebugEnabled()) {
LOG.debug("Return a org.apache.hadoop.shaded.com.ressor: " + org.apache.hadoop.shaded.com.ressor.hashCode());
}
CodecPool.returnCompressor(org.apache.hadoop.shaded.com.ressor);
}
}
public Decompressor getDecompressor() throws IOException {
CompressionCodec codec = getCodec();
if (codec != null) {
Decompressor decompressor = CodecPool.getDecompressor(codec);
if (decompressor != null) {
if (decompressor.finished()) {
// Somebody returns the decompressor to CodecPool but is still using
// it.
LOG.warn("Deompressor obtained from CodecPool already finished()");
} else {
if(LOG.isDebugEnabled()) {
LOG.debug("Got a decompressor: " + decompressor.hashCode());
}
}
/**
* Following statement is necessary to get around bugs in 0.18 where a
* decompressor is referenced after returned back to the codec pool.
*/
decompressor.reset();
}
return decompressor;
}
return null;
}
public void returnDecompressor(Decompressor decompressor) {
if (decompressor != null) {
if(LOG.isDebugEnabled()) {
LOG.debug("Returned a decompressor: " + decompressor.hashCode());
}
CodecPool.returnDecompressor(decompressor);
}
}
public String getName() {
return org.apache.hadoop.shaded.com.ressName;
}
}
public static Algorithm getCompressionAlgorithmByName(String org.apache.hadoop.shaded.com.ressName) {
Algorithm[] algos = Algorithm.class.getEnumConstants();
for (Algorithm a : algos) {
if (a.getName().equals(org.apache.hadoop.shaded.com.ressName)) {
return a;
}
}
throw new IllegalArgumentException(
"Unsupported org.apache.hadoop.shaded.com.ression algorithm name: " + org.apache.hadoop.shaded.com.ressName);
}
static String[] getSupportedAlgorithms() {
Algorithm[] algos = Algorithm.class.getEnumConstants();
ArrayList ret = new ArrayList();
for (Algorithm a : algos) {
if (a.isSupported()) {
ret.add(a.getName());
}
}
return ret.toArray(new String[ret.size()]);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy