All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.io.file.tfile.Compression Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in org.apache.hadoop.shaded.com.liance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org.apache.hadoop.shaded.org.licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.file.tfile;

import java.org.apache.hadoop.shaded.io.BufferedInputStream;
import java.org.apache.hadoop.shaded.io.BufferedOutputStream;
import java.org.apache.hadoop.shaded.io.FilterOutputStream;
import java.org.apache.hadoop.shaded.io.IOException;
import java.org.apache.hadoop.shaded.io.InputStream;
import java.org.apache.hadoop.shaded.io.OutputStream;
import java.util.ArrayList;

import org.apache.hadoop.shaded.org.apache.hadoop.thirdparty.org.apache.hadoop.shaded.com.google.org.apache.hadoop.shaded.com.on.annotations.VisibleForTesting;
import org.apache.hadoop.shaded.org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.CodecPool;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.CompressionCodec;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.CompressionInputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.CompressionOutputStream;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.Compressor;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.Decompressor;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.DefaultCodec;
import org.apache.hadoop.shaded.org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.shaded.org.slf4j.Logger;
import org.apache.hadoop.shaded.org.slf4j.LoggerFactory;

import static org.apache.hadoop.shaded.org.apache.hadoop.fs.CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_DEFAULT;
import static org.apache.hadoop.shaded.org.apache.hadoop.fs.CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_KEY;
import static org.apache.hadoop.shaded.org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY;

/**
 * Compression related stuff.
 */
public final class Compression {
  static final Logger LOG = LoggerFactory.getLogger(Compression.class);

  /**
   * Prevent the instantiation of class.
   */
  private Compression() {
    // nothing
  }

  static class FinishOnFlushCompressionStream extends FilterOutputStream {
    public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
      super(cout);
    }

    @Override
    public void write(byte b[], int off, int len) throws IOException {
      out.write(b, off, len);
    }

    @Override
    public void flush() throws IOException {
      CompressionOutputStream cout = (CompressionOutputStream) out;
      cout.finish();
      cout.flush();
      cout.resetState();
    }
  }

  /**
   * Compression algorithms.
   */
  public enum Algorithm {
    LZO(TFile.COMPRESSION_LZO) {
      private transient boolean checked = false;
      private transient ClassNotFoundException cnf;
      private transient boolean reinitCodecInTests;
      private static final String defaultClazz =
          "org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ress.LzoCodec";
      private transient String clazz;
      private transient CompressionCodec codec = null;

      private String getLzoCodecClass() {
        String extClazzConf = conf.get(CONF_LZO_CLASS);
        String extClazz = (extClazzConf != null) ?
            extClazzConf : System.getProperty(CONF_LZO_CLASS);
        return (extClazz != null) ? extClazz : defaultClazz;
      }

      @Override
      public synchronized boolean isSupported() {
        if (!checked || reinitCodecInTests) {
          checked = true;
          reinitCodecInTests = conf.getBoolean("test.reload.lzo.codec", false);
          clazz = getLzoCodecClass();
          try {
            LOG.info("Trying to load Lzo codec class: " + clazz);
            codec =
                (CompressionCodec) ReflectionUtils.newInstance(Class
                    .forName(clazz), conf);
          } catch (ClassNotFoundException e) {
            cnf = e;
          }
        }
        return codec != null;
      }

      @Override
      CompressionCodec getCodec() throws IOException {
        if (!isSupported()) {
          throw new IOException(String.format(
              "LZO codec %s=%s could not be loaded", CONF_LZO_CLASS, clazz),
                  cnf);
        }

        return codec;
      }

      @Override
      public synchronized InputStream createDecompressionStream(
          InputStream downStream, Decompressor decompressor,
          int downStreamBufferSize) throws IOException {
        if (!isSupported()) {
          throw new IOException(
              "LZO codec class not specified. Did you forget to set property "
                  + CONF_LZO_CLASS + "?");
        }
        InputStream bis1 = null;
        if (downStreamBufferSize > 0) {
          bis1 = new BufferedInputStream(downStream, downStreamBufferSize);
        } else {
          bis1 = downStream;
        }
        conf.setInt(IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_KEY,
            IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_DEFAULT);
        CompressionInputStream cis =
            codec.createInputStream(bis1, decompressor);
        BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
        return bis2;
      }

      @Override
      public synchronized OutputStream createCompressionStream(
          OutputStream downStream, Compressor org.apache.hadoop.shaded.com.ressor,
          int downStreamBufferSize) throws IOException {
        if (!isSupported()) {
          throw new IOException(
              "LZO codec class not specified. Did you forget to set property "
                  + CONF_LZO_CLASS + "?");
        }
        OutputStream bos1 = null;
        if (downStreamBufferSize > 0) {
          bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
        } else {
          bos1 = downStream;
        }
        conf.setInt(IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_KEY,
            IO_COMPRESSION_CODEC_LZO_BUFFERSIZE_DEFAULT);
        CompressionOutputStream cos =
            codec.createOutputStream(bos1, org.apache.hadoop.shaded.com.ressor);
        BufferedOutputStream bos2 =
            new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
                DATA_OBUF_SIZE);
        return bos2;
      }
    },

    GZ(TFile.COMPRESSION_GZ) {
      private transient DefaultCodec codec;

      @Override
      CompressionCodec getCodec() {
        if (codec == null) {
          codec = new DefaultCodec();
          codec.setConf(conf);
        }

        return codec;
      }

      @Override
      public synchronized InputStream createDecompressionStream(
          InputStream downStream, Decompressor decompressor,
          int downStreamBufferSize) throws IOException {
        // Set the internal buffer size to read from down stream.
        if (downStreamBufferSize > 0) {
          codec.getConf().setInt(IO_FILE_BUFFER_SIZE_KEY, downStreamBufferSize);
        }
        CompressionInputStream cis =
            codec.createInputStream(downStream, decompressor);
        BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
        return bis2;
      }

      @Override
      public synchronized OutputStream createCompressionStream(
          OutputStream downStream, Compressor org.apache.hadoop.shaded.com.ressor,
          int downStreamBufferSize) throws IOException {
        OutputStream bos1 = null;
        if (downStreamBufferSize > 0) {
          bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
        } else {
          bos1 = downStream;
        }
        codec.getConf().setInt(IO_FILE_BUFFER_SIZE_KEY, 32 * 1024);
        CompressionOutputStream cos =
            codec.createOutputStream(bos1, org.apache.hadoop.shaded.com.ressor);
        BufferedOutputStream bos2 =
            new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
                DATA_OBUF_SIZE);
        return bos2;
      }

      @Override
      public boolean isSupported() {
        return true;
      }
    },

    NONE(TFile.COMPRESSION_NONE) {
      @Override
      CompressionCodec getCodec() {
        return null;
      }

      @Override
      public synchronized InputStream createDecompressionStream(
          InputStream downStream, Decompressor decompressor,
          int downStreamBufferSize) throws IOException {
        if (downStreamBufferSize > 0) {
          return new BufferedInputStream(downStream, downStreamBufferSize);
        }
        return downStream;
      }

      @Override
      public synchronized OutputStream createCompressionStream(
          OutputStream downStream, Compressor org.apache.hadoop.shaded.com.ressor,
          int downStreamBufferSize) throws IOException {
        if (downStreamBufferSize > 0) {
          return new BufferedOutputStream(downStream, downStreamBufferSize);
        }

        return downStream;
      }

      @Override
      public boolean isSupported() {
        return true;
      }
    };

    // We require that all org.apache.hadoop.shaded.com.ression related settings are configured
    // statically in the Configuration object.
    protected static final Configuration conf = new Configuration();
    private final String org.apache.hadoop.shaded.com.ressName;
    // data input buffer size to absorb small reads from application.
    private static final int DATA_IBUF_SIZE = 1 * 1024;
    // data output buffer size to absorb small writes from application.
    private static final int DATA_OBUF_SIZE = 4 * 1024;
    public static final String CONF_LZO_CLASS =
        "org.apache.hadoop.shaded.io.org.apache.hadoop.shaded.com.ression.codec.lzo.class";

    Algorithm(String name) {
      this.org.apache.hadoop.shaded.com.ressName = name;
    }

    abstract CompressionCodec getCodec() throws IOException;

    public abstract InputStream createDecompressionStream(
        InputStream downStream, Decompressor decompressor,
        int downStreamBufferSize) throws IOException;

    public abstract OutputStream createCompressionStream(
        OutputStream downStream, Compressor org.apache.hadoop.shaded.com.ressor, int downStreamBufferSize)
        throws IOException;

    public abstract boolean isSupported();

    public Compressor getCompressor() throws IOException {
      CompressionCodec codec = getCodec();
      if (codec != null) {
        Compressor org.apache.hadoop.shaded.com.ressor = CodecPool.getCompressor(codec);
        if (org.apache.hadoop.shaded.com.ressor != null) {
          if (org.apache.hadoop.shaded.com.ressor.finished()) {
            // Somebody returns the org.apache.hadoop.shaded.com.ressor to CodecPool but is still using
            // it.
            LOG.warn("Compressor obtained from CodecPool already finished()");
          } else {
            if(LOG.isDebugEnabled()) {
              LOG.debug("Got a org.apache.hadoop.shaded.com.ressor: " + org.apache.hadoop.shaded.com.ressor.hashCode());
            }
          }
          /**
           * Following statement is necessary to get around bugs in 0.18 where a
           * org.apache.hadoop.shaded.com.ressor is referenced after returned back to the codec pool.
           */
          org.apache.hadoop.shaded.com.ressor.reset();
        }
        return org.apache.hadoop.shaded.com.ressor;
      }
      return null;
    }

    public void returnCompressor(Compressor org.apache.hadoop.shaded.com.ressor) {
      if (org.apache.hadoop.shaded.com.ressor != null) {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Return a org.apache.hadoop.shaded.com.ressor: " + org.apache.hadoop.shaded.com.ressor.hashCode());
        }
        CodecPool.returnCompressor(org.apache.hadoop.shaded.com.ressor);
      }
    }

    public Decompressor getDecompressor() throws IOException {
      CompressionCodec codec = getCodec();
      if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        if (decompressor != null) {
          if (decompressor.finished()) {
            // Somebody returns the decompressor to CodecPool but is still using
            // it.
            LOG.warn("Deompressor obtained from CodecPool already finished()");
          } else {
            if(LOG.isDebugEnabled()) {
              LOG.debug("Got a decompressor: " + decompressor.hashCode());
            }
          }
          /**
           * Following statement is necessary to get around bugs in 0.18 where a
           * decompressor is referenced after returned back to the codec pool.
           */
          decompressor.reset();
        }
        return decompressor;
      }

      return null;
    }

    public void returnDecompressor(Decompressor decompressor) {
      if (decompressor != null) {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Returned a decompressor: " + decompressor.hashCode());
        }
        CodecPool.returnDecompressor(decompressor);
      }
    }

    public String getName() {
      return org.apache.hadoop.shaded.com.ressName;
    }
  }

  public static Algorithm getCompressionAlgorithmByName(String org.apache.hadoop.shaded.com.ressName) {
    Algorithm[] algos = Algorithm.class.getEnumConstants();

    for (Algorithm a : algos) {
      if (a.getName().equals(org.apache.hadoop.shaded.com.ressName)) {
        return a;
      }
    }

    throw new IllegalArgumentException(
        "Unsupported org.apache.hadoop.shaded.com.ression algorithm name: " + org.apache.hadoop.shaded.com.ressName);
  }

  static String[] getSupportedAlgorithms() {
    Algorithm[] algos = Algorithm.class.getEnumConstants();

    ArrayList ret = new ArrayList();
    for (Algorithm a : algos) {
      if (a.isSupported()) {
        ret.add(a.getName());
      }
    }
    return ret.toArray(new String[ret.size()]);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy