org.apache.commons.compress.archivers.zip.ParallelScatterZipCreator Maven / Gradle / Ivy
Show all versions of commons-compress Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.commons.compress.archivers.zip;
import org.apache.commons.compress.parallel.FileBasedScatterGatherBackingStore;
import org.apache.commons.compress.parallel.InputStreamSupplier;
import org.apache.commons.compress.parallel.ScatterGatherBackingStore;
import org.apache.commons.compress.parallel.ScatterGatherBackingStoreSupplier;
import java.io.File;
import java.io.IOException;
import java.util.Deque;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.Deflater;
import static org.apache.commons.compress.archivers.zip.ZipArchiveEntryRequest.createZipArchiveEntryRequest;
/**
* Creates a zip in parallel by using multiple threadlocal {@link ScatterZipOutputStream} instances.
*
* Note that until 1.18, this class generally made no guarantees about the order of things written to
* the output file. Things that needed to come in a specific order (manifests, directories)
* had to be handled by the client of this class, usually by writing these things to the
* {@link ZipArchiveOutputStream} before calling {@link #writeTo writeTo} on this class.
*
* The client can supply an {@link java.util.concurrent.ExecutorService}, but for reasons of
* memory model consistency, this will be shut down by this class prior to completion.
*
* @since 1.10
*/
public class ParallelScatterZipCreator {
private final Deque streams = new ConcurrentLinkedDeque<>();
private final ExecutorService es;
private final ScatterGatherBackingStoreSupplier backingStoreSupplier;
private final Deque> futures = new ConcurrentLinkedDeque<>();
private final long startedAt = System.currentTimeMillis();
private long compressionDoneAt = 0;
private long scatterDoneAt;
private static class DefaultBackingStoreSupplier implements ScatterGatherBackingStoreSupplier {
final AtomicInteger storeNum = new AtomicInteger(0);
@Override
public ScatterGatherBackingStore get() throws IOException {
final File tempFile = File.createTempFile("parallelscatter", "n" + storeNum.incrementAndGet());
return new FileBasedScatterGatherBackingStore(tempFile);
}
}
private ScatterZipOutputStream createDeferred(final ScatterGatherBackingStoreSupplier scatterGatherBackingStoreSupplier)
throws IOException {
final ScatterGatherBackingStore bs = scatterGatherBackingStoreSupplier.get();
// lifecycle is bound to the ScatterZipOutputStream returned
final StreamCompressor sc = StreamCompressor.create(Deflater.DEFAULT_COMPRESSION, bs); //NOSONAR
return new ScatterZipOutputStream(bs, sc);
}
private final ThreadLocal tlScatterStreams = new ThreadLocal() {
@Override
protected ScatterZipOutputStream initialValue() {
try {
final ScatterZipOutputStream scatterStream = createDeferred(backingStoreSupplier);
streams.add(scatterStream);
return scatterStream;
} catch (final IOException e) {
throw new RuntimeException(e); //NOSONAR
}
}
};
/**
* Create a ParallelScatterZipCreator with default threads, which is set to the number of available
* processors, as defined by {@link java.lang.Runtime#availableProcessors}
*/
public ParallelScatterZipCreator() {
this(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()));
}
/**
* Create a ParallelScatterZipCreator
*
* @param executorService The executorService to use for parallel scheduling. For technical reasons,
* this will be shut down by this class.
*/
public ParallelScatterZipCreator(final ExecutorService executorService) {
this(executorService, new DefaultBackingStoreSupplier());
}
/**
* Create a ParallelScatterZipCreator
*
* @param executorService The executorService to use. For technical reasons, this will be shut down
* by this class.
* @param backingStoreSupplier The supplier of backing store which shall be used
*/
public ParallelScatterZipCreator(final ExecutorService executorService,
final ScatterGatherBackingStoreSupplier backingStoreSupplier) {
this.backingStoreSupplier = backingStoreSupplier;
es = executorService;
}
/**
* Adds an archive entry to this archive.
*
* This method is expected to be called from a single client thread
*
*
* @param zipArchiveEntry The entry to add.
* @param source The source input stream supplier
*/
public void addArchiveEntry(final ZipArchiveEntry zipArchiveEntry, final InputStreamSupplier source) {
submitStreamAwareCallable(createCallable(zipArchiveEntry, source));
}
/**
* Adds an archive entry to this archive.
*
* This method is expected to be called from a single client thread
*
*
* @param zipArchiveEntryRequestSupplier Should supply the entry to be added.
* @since 1.13
*/
public void addArchiveEntry(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
submitStreamAwareCallable(createCallable(zipArchiveEntryRequestSupplier));
}
/**
* Submit a callable for compression.
*
* @see ParallelScatterZipCreator#createCallable for details of if/when to use this.
*
* @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller.
*/
public final void submit(final Callable extends Object> callable) {
submitStreamAwareCallable(new Callable() {
@Override
public ScatterZipOutputStream call() throws Exception {
callable.call();
return tlScatterStreams.get();
}
});
}
/**
* Submit a callable for compression.
*
* @see ParallelScatterZipCreator#createCallable for details of if/when to use this.
*
* @param callable The callable to run, created by {@link #createCallable createCallable}, possibly wrapped by caller.
* @since 1.19
*/
public final void submitStreamAwareCallable(final Callable extends ScatterZipOutputStream> callable) {
futures.add(es.submit(callable));
}
/**
* Create a callable that will compress the given archive entry.
*
* This method is expected to be called from a single client thread.
*
* Consider using {@link #addArchiveEntry addArchiveEntry}, which wraps this method and {@link #submitStreamAwareCallable submitStreamAwareCallable}.
* The most common use case for using {@link #createCallable createCallable} and {@link #submitStreamAwareCallable submitStreamAwareCallable} from a
* client is if you want to wrap the callable in something that can be prioritized by the supplied
* {@link ExecutorService}, for instance to process large or slow files first.
* Since the creation of the {@link ExecutorService} is handled by the client, all of this is up to the client.
*
* @param zipArchiveEntry The entry to add.
* @param source The source input stream supplier
* @return A callable that should subsequently passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The
* value of this callable is not used, but any exceptions happening inside the compression
* will be propagated through the callable.
*/
public final Callable createCallable(final ZipArchiveEntry zipArchiveEntry,
final InputStreamSupplier source) {
final int method = zipArchiveEntry.getMethod();
if (method == ZipMethod.UNKNOWN_CODE) {
throw new IllegalArgumentException("Method must be set on zipArchiveEntry: " + zipArchiveEntry);
}
final ZipArchiveEntryRequest zipArchiveEntryRequest = createZipArchiveEntryRequest(zipArchiveEntry, source);
return new Callable() {
@Override
public ScatterZipOutputStream call() throws Exception {
ScatterZipOutputStream scatterStream = tlScatterStreams.get();
scatterStream.addArchiveEntry(zipArchiveEntryRequest);
return scatterStream;
}
};
}
/**
* Create a callable that will compress archive entry supplied by {@link ZipArchiveEntryRequestSupplier}.
*
* This method is expected to be called from a single client thread.
*
* The same as {@link #createCallable(ZipArchiveEntry, InputStreamSupplier)}, but the archive entry
* to be added is supplied by a {@link ZipArchiveEntryRequestSupplier}.
*
* @see #createCallable(ZipArchiveEntry, InputStreamSupplier)
*
* @param zipArchiveEntryRequestSupplier Should supply the entry to be added.
* @return A callable that should subsequently passed to #submitStreamAwareCallable, possibly in a wrapped/adapted from. The
* value of this callable is not used, but any exceptions happening inside the compression
* will be propagated through the callable.
* @since 1.13
*/
public final Callable createCallable(final ZipArchiveEntryRequestSupplier zipArchiveEntryRequestSupplier) {
return new Callable() {
@Override
public ScatterZipOutputStream call() throws Exception {
ScatterZipOutputStream scatterStream = tlScatterStreams.get();
scatterStream.addArchiveEntry(zipArchiveEntryRequestSupplier.get());
return scatterStream;
}
};
}
/**
* Write the contents this to the target {@link ZipArchiveOutputStream}.
*
* It may be beneficial to write things like directories and manifest files to the targetStream
* before calling this method.
*
*
* Calling this method will shut down the {@link ExecutorService} used by this class. If any of the {@link
* Callable}s {@link #submitStreamAwareCallable submit}ted to this instance throws an exception, the archive can not be created properly and
* this method will throw an exception.
*
* @param targetStream The {@link ZipArchiveOutputStream} to receive the contents of the scatter streams
* @throws IOException If writing fails
* @throws InterruptedException If we get interrupted
* @throws ExecutionException If something happens in the parallel execution
*/
public void writeTo(final ZipArchiveOutputStream targetStream)
throws IOException, InterruptedException, ExecutionException {
try {
// Make sure we catch any exceptions from parallel phase
try {
for (final Future> future : futures) {
future.get();
}
} finally {
es.shutdown();
}
es.awaitTermination(1000 * 60L, TimeUnit.SECONDS); // == Infinity. We really *must* wait for this to complete
// It is important that all threads terminate before we go on, ensure happens-before relationship
compressionDoneAt = System.currentTimeMillis();
for (final Future extends ScatterZipOutputStream> future : futures) {
ScatterZipOutputStream scatterStream = future.get();
scatterStream.zipEntryWriter().writeNextZipEntry(targetStream);
}
for (final ScatterZipOutputStream scatterStream : streams) {
scatterStream.close();
}
scatterDoneAt = System.currentTimeMillis();
} finally {
closeAll();
}
}
/**
* Returns a message describing the overall statistics of the compression run
*
* @return A string
*/
public ScatterStatistics getStatisticsMessage() {
return new ScatterStatistics(compressionDoneAt - startedAt, scatterDoneAt - compressionDoneAt);
}
private void closeAll() {
for (final ScatterZipOutputStream scatterStream : streams) {
try {
scatterStream.close();
} catch (IOException ex) { //NOSONAR
// no way to properly log this
}
}
}
}