All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.compress.archivers.zip.ParallelScatterZipCreator Maven / Gradle / Ivy

Go to download

Apache Commons Compress software defines an API for working with compression and archive formats. These include: bzip2, gzip, pack200, lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj.

There is a newer version: 62
Show newest version
/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */
package org.apache.commons.compress.archivers.zip;

import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore;
import org.apache.commons.compress.parallel.InputStreamSupplier;
import org.apache.commons.compress.parallel.ScatterGatherBackingStore;
import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier;

import java.io.File;
import java.io.IOException;
import java.util.Deque;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.Deflater;

import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest;

/**
 * Creates a zip in parallel by using multiple threadlocal {@link ScatterZipOutputStream} instances.
 * 

* Note that until 1.18, this class generally made no guarantees about the order of things written to * the output file. Things that needed to come in a specific order (manifests, directories) * had to be handled by the client of this class, usually by writing these things to the * {@link ZipArchiveOutputStream} before calling {@link #writeTo writeTo} on this class.

*

* The client can supply an {@link java.util.concurrent.ExecutorService}, but for reasons of * memory model consistency, this will be shut down by this class prior to completion. *

* @since 1.10 */ public class ParallelScatterZipCreator { private final Deque streams = new ConcurrentLinkedDeque<>(); private final ExecutorService es; private final ScatterGatherBackingStoreSupplier backingStoreSupplier; private final Deque> futures = new ConcurrentLinkedDeque<>(); private final long startedAt = System.currentTimeMillis(); private long compressionDoneAt = 0; private long scatterDoneAt; private static class DefaultBackingStoreSupplier implements ScatterGatherBackingStoreSupplier { final AtomicInteger storeNum = new AtomicInteger(0); @Override public ScatterGatherBackingStore get() throws IOException { final File tempFile = File.createTempFile("parallelscatter", "n" + storeNum.incrementAndGet()); return new FileBasedScatterGatherBackingStore(tempFile); } } private ScatterZipOutputStream createDeferred(final ScatterGatherBackingStoreSupplier scatterGatherBackingStoreSupplier) throws IOException { final ScatterGatherBackingStore bs = scatterGatherBackingStoreSupplier.get(); // lifecycle is bound to the ScatterZipOutputStream returned final StreamCompressor sc = StreamCompressor.create(Deflater.DEFAULT_COMPRESSION, bs); //NOSONAR return new ScatterZipOutputStream(bs, sc); } private final ThreadLocal tlScatterStreams = new ThreadLocal() { @Override protected ScatterZipOutputStream initialValue() { try { final ScatterZipOutputStream scatterStream = createDeferred(backingStoreSupplier); streams.add(scatterStream); return scatterStream; } catch (final IOException e) { throw new RuntimeException(e); //NOSONAR } } }; /** * Create a ParallelScatterZipCreator with default threads, which is set to the number of available * processors, as defined by {@link java.lang.Runtime#availableProcessors} */ public ParallelScatterZipCreator() { this(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors())); } /** * Create a ParallelScatterZipCreator * * @param executorService The executorService to use for parallel scheduling. For technical reasons, * this will be shut down by this class. */ public ParallelScatterZipCreator(final ExecutorService executorService) { this(executorService, new DefaultBackingStoreSupplier()); } /** * Create a ParallelScatterZipCreator * * @param executorService The executorService to use. For technical reasons, this will be shut down * by this class. * @param backingStoreSupplier The supplier of backing store which shall be used */ public ParallelScatterZipCreator(final ExecutorService executorService, final ScatterGatherBackingStoreSupplier backingStoreSupplier) { this.backingStoreSupplier = backingStoreSupplier; es = executorService; } /** * Adds an archive entry to this archive. *

* This method is expected to be called from a single client thread *

* * @param zipArchiveEntry The entry to add. * @param source The source input stream supplier */ public void addArchiveEntry(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) { submitStreamAwareCallable(createCallable(zipArchiveEntry, source)); } /** * Adds an archive entry to this archive. *

* This method is expected to be called from a single client thread *

* * @param zipArchiveEntryRequestSupplier Should supply the entry to be added. * @since 1.13 */ public void addArchiveEntry(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) { submitStreamAwareCallable(createCallable(zipArchiveEntryRequestSupplier)); } /** * Submit a callable for compression. * * @see ParallelScatterZipCreator#createCallable for details of if/when to use this. * * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller. */ public final void submit(final Callable callable) { submitStreamAwareCallable(new Callable() { @Override public ScatterZipOutputStream call() throws Exception { callable.call(); return tlScatterStreams.get(); } }); } /** * Submit a callable for compression. * * @see ParallelScatterZipCreator#createCallable for details of if/when to use this. * * @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller. * @since 1.19 */ public final void submitStreamAwareCallable(final Callable callable) { futures.add(es.submit(callable)); } /** * Create a callable that will compress the given archive entry. * *

This method is expected to be called from a single client thread.

* * Consider using {@link #addArchiveEntry addArchiveEntry}, which wraps this method and {@link #submitStreamAwareCallable submitStreamAwareCallable}. * The most common use case for using {@link #createCallable createCallable} and {@link #submitStreamAwareCallable submitStreamAwareCallable} from a * client is if you want to wrap the callable in something that can be prioritized by the supplied * {@link ExecutorService}, for instance to process large or slow files first. * Since the creation of the {@link ExecutorService} is handled by the client, all of this is up to the client. * * @param zipArchiveEntry The entry to add. * @param source The source input stream supplier * @return A callable that should subsequently passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The * value of this callable is not used, but any exceptions happening inside the compression * will be propagated through the callable. */ public final Callable createCallable(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) { final int method = zipArchiveEntry.getMethod(); if (method == ZipMethod.UNKNOWN_CODE) { throw new IllegalArgumentException("Method must be set on zipArchiveEntry: " + zipArchiveEntry); } final ZipArchiveEntryRequest zipArchiveEntryRequest = createZipArchiveEntryRequest(zipArchiveEntry, source); return new Callable() { @Override public ScatterZipOutputStream call() throws Exception { ScatterZipOutputStream scatterStream = tlScatterStreams.get(); scatterStream.addArchiveEntry(zipArchiveEntryRequest); return scatterStream; } }; } /** * Create a callable that will compress archive entry supplied by {@link ZipArchiveEntryRequestSupplier}. * *

This method is expected to be called from a single client thread.

* * The same as {@link #createCallable(ZipArchiveEntry, InputStreamSupplier)}, but the archive entry * to be added is supplied by a {@link ZipArchiveEntryRequestSupplier}. * * @see #createCallable(ZipArchiveEntry, InputStreamSupplier) * * @param zipArchiveEntryRequestSupplier Should supply the entry to be added. * @return A callable that should subsequently passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The * value of this callable is not used, but any exceptions happening inside the compression * will be propagated through the callable. * @since 1.13 */ public final Callable createCallable(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) { return new Callable() { @Override public ScatterZipOutputStream call() throws Exception { ScatterZipOutputStream scatterStream = tlScatterStreams.get(); scatterStream.addArchiveEntry(zipArchiveEntryRequestSupplier.get()); return scatterStream; } }; } /** * Write the contents this to the target {@link ZipArchiveOutputStream}. *

* It may be beneficial to write things like directories and manifest files to the targetStream * before calling this method. *

* *

Calling this method will shut down the {@link ExecutorService} used by this class. If any of the {@link * Callable}s {@link #submitStreamAwareCallable submit}ted to this instance throws an exception, the archive can not be created properly and * this method will throw an exception.

* * @param targetStream The {@link ZipArchiveOutputStream} to receive the contents of the scatter streams * @throws IOException If writing fails * @throws InterruptedException If we get interrupted * @throws ExecutionException If something happens in the parallel execution */ public void writeTo(final ZipArchiveOutputStream targetStream) throws IOException, InterruptedException, ExecutionException { try { // Make sure we catch any exceptions from parallel phase try { for (final Future future : futures) { future.get(); } } finally { es.shutdown(); } es.awaitTermination(1000 * 60L, TimeUnit.SECONDS); // == Infinity. We really *must* wait for this to complete // It is important that all threads terminate before we go on, ensure happens-before relationship compressionDoneAt = System.currentTimeMillis(); for (final Future future : futures) { ScatterZipOutputStream scatterStream = future.get(); scatterStream.zipEntryWriter().writeNextZipEntry(targetStream); } for (final ScatterZipOutputStream scatterStream : streams) { scatterStream.close(); } scatterDoneAt = System.currentTimeMillis(); } finally { closeAll(); } } /** * Returns a message describing the overall statistics of the compression run * * @return A string */ public ScatterStatistics getStatisticsMessage() { return new ScatterStatistics(compressionDoneAt - startedAt, scatterDoneAt - compressionDoneAt); } private void closeAll() { for (final ScatterZipOutputStream scatterStream : streams) { try { scatterStream.close(); } catch (IOException ex) { //NOSONAR // no way to properly log this } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy