All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.util.compression.ParallelZipUtils Maven / Gradle / Ivy

There is a newer version: 313
Show newest version
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.util.compression;

import static java.util.stream.Collectors.toList;

import alluxio.util.executor.ExecutorServiceFactories;
import alluxio.util.executor.ExecutorServiceUtils;
import alluxio.util.io.FileUtils;

import org.apache.commons.compress.archivers.zip.ParallelScatterZipCreator;
import org.apache.commons.compress.archivers.zip.Zip64Mode;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore;
import org.apache.commons.compress.parallel.InputStreamSupplier;
import org.apache.commons.compress.parallel.ScatterGatherBackingStore;
import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.input.NullInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Enumeration;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;

/**
 * Utility methods for working with parallel zip archives.
 */
public class ParallelZipUtils {
  private static final Logger LOG = LoggerFactory.getLogger(ParallelZipUtils.class);

  private static class BasicBackingStoreSupplier implements ScatterGatherBackingStoreSupplier {
    final AtomicInteger mStoreNum = new AtomicInteger(0);

    @Override
    public ScatterGatherBackingStore get() throws IOException {
      final File tempFile = File.createTempFile("zipUtilsParallelScatter", "n"
          + mStoreNum.incrementAndGet());
      return new FileBasedScatterGatherBackingStore(tempFile);
    }
  }

  /**
   * Creates a zipped archive from the given path in parallel, streaming the data
   * to the give output stream.
   *
   * @param dirPath
   * @param outputStream
   * @param poolSize
   * @param compressionLevel
   * @throws IOException
   * @throws InterruptedException
   */
  public static void compress(Path dirPath, OutputStream outputStream, int poolSize,
        int compressionLevel)
      throws IOException, InterruptedException {
    LOG.info("compress in parallel for path {}", dirPath);
    ExecutorService executor = ExecutorServiceFactories.fixedThreadPool(
        "parallel-zip-compress-pool", poolSize).create();

    ParallelScatterZipCreator parallelScatterZipCreator = new ParallelScatterZipCreator(executor,
        new BasicBackingStoreSupplier(), compressionLevel);
    ZipArchiveOutputStream zipArchiveOutputStream = new ZipArchiveOutputStream(outputStream);
    zipArchiveOutputStream.setUseZip64(Zip64Mode.Always);

    try {
      try (final Stream stream = Files.walk(dirPath)) {
        for (Path subPath : stream.collect(toList())) {
          if (Thread.interrupted()) {
            throw new InterruptedException();
          }

          File file = subPath.toFile();

          final InputStreamSupplier inputStreamSupplier = () -> {
            try {
              if (file.exists() && file.isFile()) {
                return new FileInputStream(file);
              } else {
                return new NullInputStream(0);
              }
            } catch (FileNotFoundException e) {
              LOG.warn("Can't find file when parallel zip, path = {}", subPath);
              return new NullInputStream(0);
            }
          };

          String entryName = dirPath.relativize(subPath).toString();
          if (file.isDirectory()) {
            entryName += File.separator;
          }

          ZipArchiveEntry zipArchiveEntry = new ZipArchiveEntry(entryName);
          zipArchiveEntry.setMethod(ZipArchiveEntry.DEFLATED);

          parallelScatterZipCreator.addArchiveEntry(zipArchiveEntry, inputStreamSupplier);
        }
      }

      parallelScatterZipCreator.writeTo(zipArchiveOutputStream);
      zipArchiveOutputStream.finish();
      zipArchiveOutputStream.flush();
    } catch (ExecutionException e) {
      LOG.error("Parallel compress rocksdb failed", e);
      throw new IOException(e);
    } finally {
      if (!executor.isTerminated()) {
        LOG.info("ParallelScatterZipCreator failed to shut down the thread pool, cleaning up now.");
        ExecutorServiceUtils.shutdownAndAwaitTermination(executor);
      }
    }

    LOG.info("Completed parallel compression for path {}, statistics: {}",
        dirPath, parallelScatterZipCreator.getStatisticsMessage().toString());
  }

  /**
   * Reads a zipped archive from a path in parallel and writes it to the given path.
   *
   * @param dirPath
   * @param backupPath
   * @param poolSize
   */
  public static void decompress(Path dirPath, String backupPath, int poolSize) throws IOException {
    LOG.info("decompress in parallel from path {} to {}", backupPath, dirPath);
    ExecutorService executor = ExecutorServiceFactories.fixedThreadPool(
        "parallel-zip-decompress-pool", poolSize).create();
    CompletionService completionService = new ExecutorCompletionService(executor);

    try (ZipFile zipFile = new ZipFile(backupPath)) {
      Enumeration entries = zipFile.getEntries();
      int taskCount = 0;

      while (entries.hasMoreElements()) {
        taskCount += 1;
        ZipArchiveEntry entry = entries.nextElement();
        completionService.submit(() -> {
          unzipEntry(zipFile, entry, dirPath);
          return true;
        });
      }

      for (int i = 0; i < taskCount; i++) {
        completionService.take().get();
      }
    } catch (ExecutionException e) {
      LOG.error("Parallel decompress rocksdb fail", e);
      FileUtils.deletePathRecursively(dirPath.toString());
      throw new IOException(e);
    } catch (InterruptedException e) {
      LOG.info("Parallel decompress rocksdb interrupted");
      FileUtils.deletePathRecursively(dirPath.toString());
      Thread.currentThread().interrupt();
      throw new RuntimeException(e);
    } finally {
      ExecutorServiceUtils.shutdownAndAwaitTermination(executor);
    }
  }

  /**
   * Unzip entry in ZipFile.
   *
   * @param zipFile
   * @param entry
   * @param dirPath
   * @throws Exception
   */
  private static void unzipEntry(ZipFile zipFile, ZipArchiveEntry entry, Path dirPath)
      throws Exception {
    File outputFile = new File(dirPath.toFile(), entry.getName());
    outputFile.getParentFile().mkdirs();

    if (entry.isDirectory()) {
      outputFile.mkdir();
    } else {
      try (InputStream inputStream = zipFile.getInputStream(entry);
           FileOutputStream fileOutputStream = new FileOutputStream(outputFile)) {
        IOUtils.copy(inputStream, fileOutputStream);
      }
    }
  }

  private ParallelZipUtils() {} // Utils class
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy