com.google.cloud.dataflow.sdk.util.ZipFiles Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
Google Cloud Dataflow Java SDK provides a simple, Java-based
interface for processing virtually any size data using Google cloud
resources. This artifact includes entire Dataflow Java SDK.
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.util;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Iterators;
import com.google.common.io.ByteSource;
import com.google.common.io.CharSource;
import com.google.common.io.Closer;
import com.google.common.io.Files;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Iterator;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
/**
* Functions for zipping a directory (including a subdirectory) into a ZIP-file
* or unzipping it again.
*/
public final class ZipFiles {
private ZipFiles() {}
/**
* Returns a new {@link ByteSource} for reading the contents of the given
* entry in the given zip file.
*/
static ByteSource asByteSource(ZipFile file, ZipEntry entry) {
return new ZipEntryByteSource(file, entry);
}
/**
* Returns a new {@link CharSource} for reading the contents of the given
* entry in the given zip file as text using the given charset.
*/
static CharSource asCharSource(
ZipFile file, ZipEntry entry, Charset charset) {
return asByteSource(file, entry).asCharSource(charset);
}
private static final class ZipEntryByteSource extends ByteSource {
private final ZipFile file;
private final ZipEntry entry;
ZipEntryByteSource(ZipFile file, ZipEntry entry) {
this.file = checkNotNull(file);
this.entry = checkNotNull(entry);
}
@Override
public InputStream openStream() throws IOException {
return file.getInputStream(entry);
}
// TODO: implement size() to try calling entry.getSize()?
@Override
public String toString() {
return "ZipFiles.asByteSource(" + file + ", " + entry + ")";
}
}
/**
* Returns a {@link FluentIterable} of all the entries in the given zip file.
*/
// unmodifiable Iterator can be safely cast
// to Iterator
@SuppressWarnings("unchecked")
static FluentIterable entries(final ZipFile file) {
checkNotNull(file);
return new FluentIterable() {
@Override
public Iterator iterator() {
return (Iterator) Iterators.forEnumeration(file.entries());
}
};
}
/**
* Unzips the zip file specified by the path and creates the directory structure inside
* the target directory. Refuses to unzip files that refer to a parent directory, for security
* reasons.
*
* @param zipFile the source zip-file to unzip
* @param targetDirectory the directory to unzip to. If the zip-file contains
* any subdirectories, they will be created within our target directory.
* @throws IOException the unzipping failed, e.g. because the output was not writable, the {@code
* zipFile} was not readable, or contains an illegal entry (contains "..", pointing outside
* the target directory)
* @throws IllegalArgumentException the target directory is not a valid directory (e.g. does not
* exist, or is a file instead of a directory)
*/
static void unzipFile(
File zipFile,
File targetDirectory) throws IOException {
checkNotNull(zipFile);
checkNotNull(targetDirectory);
checkArgument(
targetDirectory.isDirectory(),
"%s is not a valid directory",
targetDirectory.getAbsolutePath());
final ZipFile zipFileObj = new ZipFile(zipFile);
try {
for (ZipEntry entry : entries(zipFileObj)) {
checkName(entry.getName());
File targetFile = new File(targetDirectory, entry.getName());
if (entry.isDirectory()) {
if (!targetFile.isDirectory() && !targetFile.mkdirs()) {
throw new IOException(
"Failed to create directory: " + targetFile.getAbsolutePath());
}
} else {
File parentFile = targetFile.getParentFile();
if (!parentFile.isDirectory()) {
if (!parentFile.mkdirs()) {
throw new IOException(
"Failed to create directory: "
+ parentFile.getAbsolutePath());
}
}
// Write the file to the destination.
asByteSource(zipFileObj, entry).copyTo(Files.asByteSink(targetFile));
}
}
} finally {
zipFileObj.close();
}
}
/**
* Checks that the given entry name is legal for unzipping: if it contains
* ".." as a name element, it could cause the entry to be unzipped outside
* the directory we're unzipping to.
*
* @throws IOException if the name is illegal
*/
private static void checkName(String name) throws IOException {
// First just check whether the entry name string contains "..".
// This should weed out the the vast majority of entries, which will not
// contain "..".
if (name.contains("..")) {
// If the string does contain "..", break it down into its actual name
// elements to ensure it actually contains ".." as a name, not just a
// name like "foo..bar" or even "foo..", which should be fine.
File file = new File(name);
while (file != null) {
if (file.getName().equals("..")) {
throw new IOException("Cannot unzip file containing an entry with "
+ "\"..\" in the name: " + name);
}
file = file.getParentFile();
}
}
}
/**
* Zips an entire directory specified by the path.
*
* @param sourceDirectory the directory to read from. This directory and all
* subdirectories will be added to the zip-file. The path within the zip
* file is relative to the directory given as parameter, not absolute.
* @param zipFile the zip-file to write to.
* @throws IOException the zipping failed, e.g. because the input was not
* readable.
*/
static void zipDirectory(
File sourceDirectory,
File zipFile) throws IOException {
checkNotNull(sourceDirectory);
checkNotNull(zipFile);
checkArgument(
sourceDirectory.isDirectory(),
"%s is not a valid directory",
sourceDirectory.getAbsolutePath());
checkArgument(
!zipFile.exists(),
"%s does already exist, files are not being overwritten",
zipFile.getAbsolutePath());
Closer closer = Closer.create();
try {
OutputStream outputStream = closer.register(new BufferedOutputStream(
new FileOutputStream(zipFile)));
zipDirectory(sourceDirectory, outputStream);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
/**
* Zips an entire directory specified by the path.
*
* @param sourceDirectory the directory to read from. This directory and all
* subdirectories will be added to the zip-file. The path within the zip
* file is relative to the directory given as parameter, not absolute.
* @param outputStream the stream to write the zip-file to. This method does not close
* outputStream.
* @throws IOException the zipping failed, e.g. because the input was not
* readable.
*/
static void zipDirectory(
File sourceDirectory,
OutputStream outputStream) throws IOException {
checkNotNull(sourceDirectory);
checkNotNull(outputStream);
checkArgument(
sourceDirectory.isDirectory(),
"%s is not a valid directory",
sourceDirectory.getAbsolutePath());
ZipOutputStream zos = new ZipOutputStream(outputStream);
for (File file : sourceDirectory.listFiles()) {
zipDirectoryInternal(file, "", zos);
}
zos.finish();
}
/**
* Private helper function for zipping files. This one goes recursively
* through the input directory and all of its subdirectories and adds the
* single zip entries.
*
* @param inputFile the file or directory to be added to the zip file
* @param directoryName the string-representation of the parent directory
* name. Might be an empty name, or a name containing multiple directory
* names separated by "/". The directory name must be a valid name
* according to the file system limitations. The directory name should be
* empty or should end in "/".
* @param zos the zipstream to write to
* @throws IOException the zipping failed, e.g. because the output was not
* writeable.
*/
private static void zipDirectoryInternal(
File inputFile,
String directoryName,
ZipOutputStream zos) throws IOException {
String entryName = directoryName + inputFile.getName();
if (inputFile.isDirectory()) {
entryName += "/";
// We are hitting a sub-directory. Recursively add children to zip in deterministic,
// sorted order.
File[] childFiles = inputFile.listFiles();
if (childFiles.length > 0) {
Arrays.sort(childFiles);
// loop through the directory content, and zip the files
for (File file : childFiles) {
zipDirectoryInternal(file, entryName, zos);
}
// Since this directory has children, exit now without creating a zipentry specific to
// this directory. The entry for a non-entry directory is incompatible with certain
// implementations of unzip.
return;
}
}
// Put the zip-entry for this file or empty directory into the zipoutputstream.
ZipEntry entry = new ZipEntry(entryName);
entry.setTime(inputFile.lastModified());
zos.putNextEntry(entry);
// Copy file contents into zipoutput stream.
if (inputFile.isFile()) {
Files.asByteSource(inputFile).copyTo(zos);
}
}
}