All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.gobblin.util.io.StreamUtils Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.gobblin.util.io;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;

import com.google.common.base.Optional;
import com.google.common.base.Preconditions;

import java.util.zip.GZIPInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
import org.apache.commons.compress.utils.IOUtils;

import org.apache.commons.lang.StringUtils;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;

import org.apache.gobblin.configuration.ConfigurationKeys;


/**
 * Utility class of input/output stream helpers.
 */
public class StreamUtils {

  /**
   * Convert an instance of {@link InputStream} to a {@link FSDataInputStream} that is {@link Seekable} and
   * {@link PositionedReadable}.
   *
   * @see SeekableFSInputStream
   *
   */
  public static FSDataInputStream convertStream(InputStream in) throws IOException {
    return new FSDataInputStream(new SeekableFSInputStream(in));
  }

  /**
   * Copies an {@link InputStream} to and {@link OutputStream} using {@link Channels}.
   *
   * 

* Note: The method does not close the {@link InputStream} and {@link OutputStream}. However, the * {@link ReadableByteChannel} and {@link WritableByteChannel}s are closed *

* * @return Total bytes copied */ public static long copy(InputStream is, OutputStream os) throws IOException { return new StreamCopier(is, os).copy(); } /** * Copies a {@link ReadableByteChannel} to a {@link WritableByteChannel}. *

* Note: The {@link ReadableByteChannel} and {@link WritableByteChannel}s are NOT closed by the method *

* * @return Total bytes copied */ public static long copy(ReadableByteChannel inputChannel, WritableByteChannel outputChannel) throws IOException { return new StreamCopier(inputChannel, outputChannel).copy(); } /** * Creates a tar gzip file using a given {@link Path} as input and a given {@link Path} as a destination. If the given * input is a file then only that file will be added to tarball. If it is a directory then the entire directory will * be recursively put into the tarball. * * @param fs the {@link FileSystem} the input exists, and the the output should be written to. * @param sourcePath the {@link Path} of the input files, this can either be a file or a directory. * @param destPath the {@link Path} that tarball should be written to. */ public static void tar(FileSystem fs, Path sourcePath, Path destPath) throws IOException { tar(fs, fs, sourcePath, destPath); } /** * Similiar to {@link #tar(FileSystem, Path, Path)} except the source and destination {@link FileSystem} can be different. * * @see #tar(FileSystem, Path, Path) */ public static void tar(FileSystem sourceFs, FileSystem destFs, Path sourcePath, Path destPath) throws IOException { try (FSDataOutputStream fsDataOutputStream = destFs.create(destPath); TarArchiveOutputStream tarArchiveOutputStream = new TarArchiveOutputStream( new GzipCompressorOutputStream(fsDataOutputStream), ConfigurationKeys.DEFAULT_CHARSET_ENCODING.name())) { FileStatus fileStatus = sourceFs.getFileStatus(sourcePath); if (sourceFs.isDirectory(sourcePath)) { dirToTarArchiveOutputStreamRecursive(fileStatus, sourceFs, Optional. absent(), tarArchiveOutputStream); } else { try (FSDataInputStream fsDataInputStream = sourceFs.open(sourcePath)) { fileToTarArchiveOutputStream(fileStatus, fsDataInputStream, new Path(sourcePath.getName()), tarArchiveOutputStream); } } } } /** * Helper method for {@link #tar(FileSystem, FileSystem, Path, Path)} that recursively adds a directory to a given * {@link TarArchiveOutputStream}. */ private static void dirToTarArchiveOutputStreamRecursive(FileStatus dirFileStatus, FileSystem fs, Optional destDir, TarArchiveOutputStream tarArchiveOutputStream) throws IOException { Preconditions.checkState(fs.isDirectory(dirFileStatus.getPath())); Path dir = destDir.isPresent() ? new Path(destDir.get(), dirFileStatus.getPath().getName()) : new Path(dirFileStatus.getPath().getName()); dirToTarArchiveOutputStream(dir, tarArchiveOutputStream); for (FileStatus childFileStatus : fs.listStatus(dirFileStatus.getPath())) { Path childFile = new Path(dir, childFileStatus.getPath().getName()); if (fs.isDirectory(childFileStatus.getPath())) { dirToTarArchiveOutputStreamRecursive(childFileStatus, fs, Optional.of(childFile), tarArchiveOutputStream); } else { try (FSDataInputStream fsDataInputStream = fs.open(childFileStatus.getPath())) { fileToTarArchiveOutputStream(childFileStatus, fsDataInputStream, childFile, tarArchiveOutputStream); } } } } /** * Helper method for {@link #tar(FileSystem, FileSystem, Path, Path)} that adds a directory entry to a given * {@link TarArchiveOutputStream}. */ private static void dirToTarArchiveOutputStream(Path destDir, TarArchiveOutputStream tarArchiveOutputStream) throws IOException { TarArchiveEntry tarArchiveEntry = new TarArchiveEntry(formatPathToDir(destDir)); tarArchiveEntry.setModTime(System.currentTimeMillis()); tarArchiveOutputStream.putArchiveEntry(tarArchiveEntry); tarArchiveOutputStream.closeArchiveEntry(); } /** * Helper method for {@link #tar(FileSystem, FileSystem, Path, Path)} that adds a file entry to a given * {@link TarArchiveOutputStream} and copies the contents of the file to the new entry. */ private static void fileToTarArchiveOutputStream(FileStatus fileStatus, FSDataInputStream fsDataInputStream, Path destFile, TarArchiveOutputStream tarArchiveOutputStream) throws IOException { TarArchiveEntry tarArchiveEntry = new TarArchiveEntry(formatPathToFile(destFile)); tarArchiveEntry.setSize(fileStatus.getLen()); tarArchiveEntry.setModTime(System.currentTimeMillis()); tarArchiveOutputStream.putArchiveEntry(tarArchiveEntry); try { IOUtils.copy(fsDataInputStream, tarArchiveOutputStream); } finally { tarArchiveOutputStream.closeArchiveEntry(); } } /** * Convert a {@link Path} to a {@link String} and make sure it is properly formatted to be recognized as a directory * by {@link TarArchiveEntry}. */ private static String formatPathToDir(Path path) { return path.toString().endsWith(Path.SEPARATOR) ? path.toString() : path.toString() + Path.SEPARATOR; } /** * Convert a {@link Path} to a {@link String} and make sure it is properly formatted to be recognized as a file * by {@link TarArchiveEntry}. */ private static String formatPathToFile(Path path) { return StringUtils.removeEnd(path.toString(), Path.SEPARATOR); } /* * Determines if a byte array is compressed. The java.util.zip GZip * implementation does not expose the GZip header so it is difficult to determine * if a string is compressed. * Copied from Helix GZipCompressionUtil * @param bytes an array of bytes * @return true if the array is compressed or false otherwise */ public static boolean isCompressed(byte[] bytes) { if ((bytes == null) || (bytes.length < 2)) { return false; } else { return ((bytes[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (bytes[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8))); } } /** * Reads the full contents of a ByteBuffer and writes them to an OutputStream. The ByteBuffer is * consumed by this operation; eg in.remaining() will be 0 after it completes successfully. * @param in ByteBuffer to write into the OutputStream * @param out Destination stream * @throws IOException If there is an error writing into the OutputStream */ public static void byteBufferToOutputStream(ByteBuffer in, OutputStream out) throws IOException { final int BUF_SIZE = 8192; if (in.hasArray()) { out.write(in.array(), in.arrayOffset() + in.position(), in.remaining()); } else { final byte[] b = new byte[Math.min(in.remaining(), BUF_SIZE)]; while (in.remaining() > 0) { int bytesToRead = Math.min(in.remaining(), BUF_SIZE); in.get(b, 0, bytesToRead); out.write(b, 0, bytesToRead); } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy