All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.io.compress.Compression Maven / Gradle / Ivy

There is a newer version: 2.2.4_1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.hbase.io.compress;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.hadoop.io.compress.DoNotPool;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Compression related stuff.
 * Copied from hadoop-3315 tfile.
 */
@InterfaceAudience.Private
public final class Compression {
  private static final Logger LOG = LoggerFactory.getLogger(Compression.class);

  /**
   * Prevent the instantiation of class.
   */
  private Compression() {
    super();
  }

  static class FinishOnFlushCompressionStream extends FilterOutputStream {
    public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
      super(cout);
    }

    @Override
    public void write(byte b[], int off, int len) throws IOException {
      out.write(b, off, len);
    }

    @Override
    public void flush() throws IOException {
      CompressionOutputStream cout = (CompressionOutputStream) out;
      cout.finish();
      cout.flush();
      cout.resetState();
    }
  }

  /**
   * Returns the classloader to load the Codec class from.
   */
  private static ClassLoader getClassLoaderForCodec() {
    ClassLoader cl = Thread.currentThread().getContextClassLoader();
    if (cl == null) {
      cl = Compression.class.getClassLoader();
    }
    if (cl == null) {
      cl = ClassLoader.getSystemClassLoader();
    }
    if (cl == null) {
      throw new RuntimeException("A ClassLoader to load the Codec could not be determined");
    }
    return cl;
  }

  /**
   * Compression algorithms. The ordinal of these cannot change or else you
   * risk breaking all existing HFiles out there.  Even the ones that are
   * not compressed! (They use the NONE algorithm)
   */
  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
      value="SE_TRANSIENT_FIELD_NOT_RESTORED",
      justification="We are not serializing so doesn't apply (not sure why transient though)")
  @InterfaceAudience.Public
  public static enum Algorithm {
    LZO("lzo") {
      // Use base type to avoid compile-time dependencies.
      private volatile transient CompressionCodec lzoCodec;
      private final transient Object lock = new Object();

      @Override
      CompressionCodec getCodec(Configuration conf) {
        if (lzoCodec == null) {
          synchronized (lock) {
            if (lzoCodec == null) {
              lzoCodec = buildCodec(conf);
            }
          }
        }
        return lzoCodec;
      }

      private CompressionCodec buildCodec(Configuration conf) {
        try {
          Class externalCodec =
              getClassLoaderForCodec().loadClass("com.hadoop.compression.lzo.LzoCodec");
          return (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
              new Configuration(conf));
        } catch (ClassNotFoundException e) {
          throw new RuntimeException(e);
        }
      }
    },
    GZ("gz") {
      private volatile transient GzipCodec codec;
      private final transient Object lock = new Object();

      @Override
      DefaultCodec getCodec(Configuration conf) {
        if (codec == null) {
          synchronized (lock) {
            if (codec == null) {
              codec = buildCodec(conf);
            }
          }
        }

        return codec;
      }

      private GzipCodec buildCodec(Configuration conf) {
        GzipCodec gzcodec = new ReusableStreamGzipCodec();
        gzcodec.setConf(new Configuration(conf));
        return gzcodec;
      }
    },

    NONE("none") {
      @Override
      DefaultCodec getCodec(Configuration conf) {
        return null;
      }

      @Override
      public synchronized InputStream createDecompressionStream(
          InputStream downStream, Decompressor decompressor,
          int downStreamBufferSize) throws IOException {
        if (downStreamBufferSize > 0) {
          return new BufferedInputStream(downStream, downStreamBufferSize);
        }
        return downStream;
      }

      @Override
      public synchronized OutputStream createCompressionStream(
          OutputStream downStream, Compressor compressor,
          int downStreamBufferSize) throws IOException {
        if (downStreamBufferSize > 0) {
          return new BufferedOutputStream(downStream, downStreamBufferSize);
        }

        return downStream;
      }
    },
    SNAPPY("snappy") {
      // Use base type to avoid compile-time dependencies.
      private volatile transient CompressionCodec snappyCodec;
      private final transient Object lock = new Object();

      @Override
      CompressionCodec getCodec(Configuration conf) {
        if (snappyCodec == null) {
          synchronized (lock) {
            if (snappyCodec == null) {
              snappyCodec = buildCodec(conf);
            }
          }
        }
        return snappyCodec;
      }

      private CompressionCodec buildCodec(Configuration conf) {
        try {
          Class externalCodec =
              getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.SnappyCodec");
          return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
        } catch (ClassNotFoundException e) {
          throw new RuntimeException(e);
        }
      }
    },
    LZ4("lz4") {
      // Use base type to avoid compile-time dependencies.
      private volatile transient CompressionCodec lz4Codec;
      private final transient Object lock = new Object();

      @Override
      CompressionCodec getCodec(Configuration conf) {
        if (lz4Codec == null) {
          synchronized (lock) {
            if (lz4Codec == null) {
              lz4Codec = buildCodec(conf);
            }
          }
        }
        return lz4Codec;
      }

      private CompressionCodec buildCodec(Configuration conf) {
        try {
          Class externalCodec =
              getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.Lz4Codec");
          return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
        } catch (ClassNotFoundException e) {
          throw new RuntimeException(e);
        }
      }
    },
    BZIP2("bzip2") {
      // Use base type to avoid compile-time dependencies.
      private volatile transient CompressionCodec bzipCodec;
      private final transient Object lock = new Object();

      @Override
      CompressionCodec getCodec(Configuration conf) {
        if (bzipCodec == null) {
          synchronized (lock) {
            if (bzipCodec == null) {
              bzipCodec = buildCodec(conf);
            }
          }
        }
        return bzipCodec;
      }

      private CompressionCodec buildCodec(Configuration conf) {
        try {
          Class externalCodec =
              getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.BZip2Codec");
          return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
        } catch (ClassNotFoundException e) {
          throw new RuntimeException(e);
        }
      }
    },
    ZSTD("zstd") {
      // Use base type to avoid compile-time dependencies.
      private volatile transient CompressionCodec zStandardCodec;
      private final transient Object lock = new Object();

      @Override
      CompressionCodec getCodec(Configuration conf) {
        if (zStandardCodec == null) {
          synchronized (lock) {
            if (zStandardCodec == null) {
              zStandardCodec = buildCodec(conf);
            }
          }
        }
        return zStandardCodec;
      }

      private CompressionCodec buildCodec(Configuration conf) {
        try {
          Class externalCodec =
              getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.ZStandardCodec");
          return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
        } catch (ClassNotFoundException e) {
          throw new RuntimeException(e);
        }
      }
    };

    private final Configuration conf;
    private final String compressName;
    /** data input buffer size to absorb small reads from application. */
    private static final int DATA_IBUF_SIZE = 1 * 1024;
    /** data output buffer size to absorb small writes from application. */
    private static final int DATA_OBUF_SIZE = 4 * 1024;

    Algorithm(String name) {
      this.conf = new Configuration();
      this.conf.setBoolean("io.native.lib.available", true);
      this.compressName = name;
    }

    abstract CompressionCodec getCodec(Configuration conf);

    public InputStream createDecompressionStream(
        InputStream downStream, Decompressor decompressor,
        int downStreamBufferSize) throws IOException {
      CompressionCodec codec = getCodec(conf);
      // Set the internal buffer size to read from down stream.
      if (downStreamBufferSize > 0) {
        ((Configurable)codec).getConf().setInt("io.file.buffer.size",
            downStreamBufferSize);
      }
      CompressionInputStream cis =
          codec.createInputStream(downStream, decompressor);
      BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
      return bis2;

    }

    public OutputStream createCompressionStream(
        OutputStream downStream, Compressor compressor, int downStreamBufferSize)
        throws IOException {
      OutputStream bos1 = null;
      if (downStreamBufferSize > 0) {
        bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
      }
      else {
        bos1 = downStream;
      }
      CompressionOutputStream cos =
          createPlainCompressionStream(bos1, compressor);
      BufferedOutputStream bos2 =
          new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
              DATA_OBUF_SIZE);
      return bos2;
    }

    /**
     * Creates a compression stream without any additional wrapping into
     * buffering streams.
     */
    public CompressionOutputStream createPlainCompressionStream(
        OutputStream downStream, Compressor compressor) throws IOException {
      CompressionCodec codec = getCodec(conf);
      ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
      return codec.createOutputStream(downStream, compressor);
    }

    public Compressor getCompressor() {
      CompressionCodec codec = getCodec(conf);
      if (codec != null) {
        Compressor compressor = CodecPool.getCompressor(codec);
        if (LOG.isTraceEnabled()) LOG.trace("Retrieved compressor " + compressor + " from pool.");
        if (compressor != null) {
          if (compressor.finished()) {
            // Somebody returns the compressor to CodecPool but is still using it.
            LOG.warn("Compressor obtained from CodecPool is already finished()");
          }
          compressor.reset();
        }
        return compressor;
      }
      return null;
    }

    public void returnCompressor(Compressor compressor) {
      if (compressor != null) {
        if (LOG.isTraceEnabled()) LOG.trace("Returning compressor " + compressor + " to pool.");
        CodecPool.returnCompressor(compressor);
      }
    }

    public Decompressor getDecompressor() {
      CompressionCodec codec = getCodec(conf);
      if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        if (LOG.isTraceEnabled()) LOG.trace("Retrieved decompressor " + decompressor + " from pool.");
        if (decompressor != null) {
          if (decompressor.finished()) {
            // Somebody returns the decompressor to CodecPool but is still using it.
            LOG.warn("Deompressor obtained from CodecPool is already finished()");
          }
          decompressor.reset();
        }
        return decompressor;
      }

      return null;
    }

    public void returnDecompressor(Decompressor decompressor) {
      if (decompressor != null) {
        if (LOG.isTraceEnabled()) LOG.trace("Returning decompressor " + decompressor + " to pool.");
        CodecPool.returnDecompressor(decompressor);
        if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
          if (LOG.isTraceEnabled()) LOG.trace("Ending decompressor " + decompressor);
          decompressor.end();
        }
      }
    }

    public String getName() {
      return compressName;
    }
  }

  public static Algorithm getCompressionAlgorithmByName(String compressName) {
    Algorithm[] algos = Algorithm.class.getEnumConstants();

    for (Algorithm a : algos) {
      if (a.getName().equals(compressName)) {
        return a;
      }
    }

    throw new IllegalArgumentException("Unsupported compression algorithm name: " + compressName);
  }

  /**
   * Get names of supported compression algorithms.
   *
   * @return Array of strings, each represents a supported compression
   * algorithm. Currently, the following compression algorithms are supported.
   */
  public static String[] getSupportedAlgorithms() {
    Algorithm[] algos = Algorithm.class.getEnumConstants();

    String[] ret = new String[algos.length];
    int i = 0;
    for (Algorithm a : algos) {
      ret[i++] = a.getName();
    }

    return ret;
  }

  /**
   * Decompresses data from the given stream using the configured compression
   * algorithm. It will throw an exception if the dest buffer does not have
   * enough space to hold the decompressed data.
   *
   * @param dest
   *          the output bytes buffer
   * @param destOffset
   *          start writing position of the output buffer
   * @param bufferedBoundedStream
   *          a stream to read compressed data from, bounded to the exact amount
   *          of compressed data
   * @param compressedSize
   *          compressed data size, header not included
   * @param uncompressedSize
   *          uncompressed data size, header not included
   * @param compressAlgo
   *          compression algorithm used
   * @throws IOException
   */
  public static void decompress(byte[] dest, int destOffset,
      InputStream bufferedBoundedStream, int compressedSize,
      int uncompressedSize, Compression.Algorithm compressAlgo)
      throws IOException {

    if (dest.length - destOffset < uncompressedSize) {
      throw new IllegalArgumentException(
          "Output buffer does not have enough space to hold "
              + uncompressedSize + " decompressed bytes, available: "
              + (dest.length - destOffset));
    }

    Decompressor decompressor = null;
    try {
      decompressor = compressAlgo.getDecompressor();
      InputStream is = compressAlgo.createDecompressionStream(
          bufferedBoundedStream, decompressor, 0);

      IOUtils.readFully(is, dest, destOffset, uncompressedSize);
      is.close();
    } finally {
      if (decompressor != null) {
        compressAlgo.returnDecompressor(decompressor);
      }
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy