org.apache.druid.java.util.common.FileUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
There is a newer version: 31.0.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.java.util.common;

import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import com.google.common.io.ByteSource;
import io.netty.util.SuppressForbidden;
import org.apache.commons.io.IOUtils;
import org.apache.druid.data.input.impl.prefetch.ObjectOpenFunction;

import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.file.AccessDeniedException;
import java.nio.file.FileSystemException;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.UUID;

public class FileUtils
{
  public enum LinkOrCopyResult
  {
    LINK,
    COPY
  }

  /**
   * Useful for retry functionality that doesn't want to stop Throwables, but does want to retry on Exceptions
   */
  public static final Predicate IS_EXCEPTION = input -> input instanceof Exception;

  /**
   * Copy input byte source to outFile. If outFile exists, it is attempted to be deleted.
   *
   * @param byteSource  Supplier for an input stream that is to be copied. The resulting stream is closed each iteration
   * @param outFile     Where the file should be written to.
   * @param shouldRetry Predicate indicating if an error is recoverable and should be retried.
   * @param maxAttempts The maximum number of assumed recoverable attempts to try before completely failing.
   *
   * @throws RuntimeException wrapping the inner exception on failure.
   */
  public static FileCopyResult retryCopy(
      final ByteSource byteSource,
      final File outFile,
      final Predicate shouldRetry,
      final int maxAttempts
  )
  {
    try {
      StreamUtils.retryCopy(
          byteSource,
          com.google.common.io.Files.asByteSink(outFile),
          shouldRetry,
          maxAttempts
      );
      return new FileCopyResult(outFile);
    }
    catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Keeps results of a file copy, including children and total size of the resultant files.
   * This class is NOT thread safe.
   * Child size is eagerly calculated and any modifications to the file after the child is added are not accounted for.
   * As such, this result should be considered immutable, even though it has no way to force that property on the files.
   */
  public static class FileCopyResult
  {
    private final Collection files = new ArrayList<>();
    private long size = 0L;

    public Collection getFiles()
    {
      return ImmutableList.copyOf(files);
    }

    // Only works for immutable children contents
    public long size()
    {
      return size;
    }

    public FileCopyResult(File... files)
    {
      this(files == null ? ImmutableList.of() : Arrays.asList(files));
    }

    public FileCopyResult(Collection files)
    {
      this.addSizedFiles(files);
    }

    protected void addSizedFiles(Collection files)
    {
      if (files == null || files.isEmpty()) {
        return;
      }
      long size = 0L;
      for (File file : files) {
        size += file.length();
      }
      this.files.addAll(files);
      this.size += size;
    }

    public void addFiles(Collection files)
    {
      this.addSizedFiles(files);
    }

    public void addFile(File file)
    {
      this.addFiles(ImmutableList.of(file));
    }
  }

  /**
   * Fully maps a file read-only in to memory as per
   * {@link FileChannel#map(FileChannel.MapMode, long, long)}.
   *
   * Files are mapped from offset 0 to its length.
   *
   * 
This only works for files <= {@link Integer#MAX_VALUE} bytes.
   *
   * 
Similar to {@link com.google.common.io.Files#map(File)}, but returns {@link MappedByteBufferHandler}, that
   * makes it easier to unmap the buffer within try-with-resources pattern:
   * 
{@code
   * try (MappedByteBufferHandler fileMappingHandler = FileUtils.map(file)) {
   *   ByteBuffer fileMapping = fileMappingHandler.get();
   *   // use mapped buffer
   * }}
   *
   * @param file the file to map
   *
   * @return a {@link MappedByteBufferHandler}, wrapping a read-only buffer reflecting {@code file}
   *
   * @throws FileNotFoundException    if the {@code file} does not exist
   * @throws IOException              if an I/O error occurs
   * @throws IllegalArgumentException if length is greater than {@link Integer#MAX_VALUE}
   * @see FileChannel#map(FileChannel.MapMode, long, long)
   */
  public static MappedByteBufferHandler map(File file) throws IOException
  {
    return map(file, 0, file.length());
  }

  /**
   * Fully maps a file read-only in to memory as per
   * {@link FileChannel#map(FileChannel.MapMode, long, long)}.
   *
   * @param file   the file to map
   * @param offset starting offset for the mmap
   * @param length length for the mmap
   *
   * @return a {@link MappedByteBufferHandler}, wrapping a read-only buffer reflecting {@code file}
   *
   * @throws FileNotFoundException    if the {@code file} does not exist
   * @throws IOException              if an I/O error occurs
   * @throws IllegalArgumentException if length is greater than {@link Integer#MAX_VALUE}
   * @see FileChannel#map(FileChannel.MapMode, long, long)
   */
  public static MappedByteBufferHandler map(File file, long offset, long length) throws IOException
  {
    if (length > Integer.MAX_VALUE) {
      throw new IAE("Cannot map region larger than %,d bytes", Integer.MAX_VALUE);
    }

    try (final RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r");
         final FileChannel channel = randomAccessFile.getChannel()) {
      final MappedByteBuffer mappedByteBuffer = channel.map(FileChannel.MapMode.READ_ONLY, offset, length);
      return new MappedByteBufferHandler(mappedByteBuffer);
    }
  }

  /**
   * Fully maps a file read-only in to memory as per
   * {@link FileChannel#map(FileChannel.MapMode, long, long)}.
   *
   * @param randomAccessFile the file to map. The file will not be closed.
   * @param offset           starting offset for the mmap
   * @param length           length for the mmap
   *
   * @return a {@link MappedByteBufferHandler}, wrapping a read-only buffer reflecting {@code randomAccessFile}
   *
   * @throws IOException              if an I/O error occurs
   * @throws IllegalArgumentException if length is greater than {@link Integer#MAX_VALUE}
   * @see FileChannel#map(FileChannel.MapMode, long, long)
   */
  public static MappedByteBufferHandler map(
      RandomAccessFile randomAccessFile,
      long offset,
      long length
  ) throws IOException
  {
    if (length > Integer.MAX_VALUE) {
      throw new IAE("Cannot map region larger than %,d bytes", Integer.MAX_VALUE);
    }

    final FileChannel channel = randomAccessFile.getChannel();
    final MappedByteBuffer mappedByteBuffer = channel.map(FileChannel.MapMode.READ_ONLY, offset, length);
    return new MappedByteBufferHandler(mappedByteBuffer);
  }

  /**
   * Write to a file atomically, by first writing to a temporary file in the same directory and then moving it to
   * the target location. More docs at {@link FileUtils#writeAtomically(File, File, OutputStreamConsumer)} .
   */
  public static  T writeAtomically(final File file, OutputStreamConsumer f) throws IOException
  {
    return writeAtomically(file, file.getParentFile(), f);
  }

  /**
   * Write to a file atomically, by first writing to a temporary file in given tmpDir directory and then moving it to
   * the target location. This function attempts to clean up its temporary files when possible, but they may stick
   * around (for example, if the JVM crashes partway through executing the function). In any case, the target file
   * should be unharmed.
   *
   * The OutputStream passed to the consumer is uncloseable; calling close on it will do nothing. This is to ensure
   * that the stream stays open so we can fsync it here before closing. Hopefully, this doesn't cause any problems
   * for callers.
   *
   * This method is not just thread-safe, but is also safe to use from multiple processes on the same machine.
   */
  public static  T writeAtomically(final File file, final File tmpDir, OutputStreamConsumer f) throws IOException
  {
    final File tmpFile = new File(tmpDir, StringUtils.format(".%s.%s", file.getName(), UUID.randomUUID()));

    //noinspection unused
    try (final Closeable deleter = () -> Files.deleteIfExists(tmpFile.toPath())) {
      final T retVal;

      try (
          final FileChannel fileChannel = FileChannel.open(
              tmpFile.toPath(),
              StandardOpenOption.WRITE,
              StandardOpenOption.CREATE_NEW
          );
          final OutputStream out = Channels.newOutputStream(fileChannel)
      ) {
        // Pass f an uncloseable stream so we can fsync before closing.
        retVal = f.apply(uncloseable(out));

        // fsync to avoid write-then-rename-then-crash causing empty files on some filesystems.
        // Must do this before "out" or "fileChannel" is closed. No need to flush "out" first, since
        // Channels.newOutputStream is unbuffered.
        // See also https://github.com/apache/druid/pull/5187#pullrequestreview-85188984
        fileChannel.force(true);
      }

      // No exception thrown; do the move.
      Files.move(
          tmpFile.toPath(),
          file.toPath(),
          StandardCopyOption.ATOMIC_MOVE,
          StandardCopyOption.REPLACE_EXISTING
      );

      // fsync the directory entry to ensure the new file will be visible after a crash.
      try (final FileChannel directory = FileChannel.open(file.getParentFile().toPath(), StandardOpenOption.READ)) {
        directory.force(true);
      }

      return retVal;
    }
  }

  private static OutputStream uncloseable(final OutputStream out)
  {
    return new FilterOutputStream(out)
    {
      // Default implementation of this method in FilterOutputStream converts single write operation to
      // multiple write operations of 1 byte each, which is terribly inefficient.
      @Override
      public void write(byte[] b, int off, int len) throws IOException
      {
        out.write(b, off, len);
      }

      @Override
      public void close()
      {
        // Do nothing.
      }
    };
  }

  /**
   * Copies data from the InputStream opened with objectOpenFunction to the given file.
   * This method is supposed to be used for copying large files.
   * The output file is deleted automatically if copy fails.
   *
   * @param object             object to open
   * @param objectOpenFunction function to open the given object
   * @param outFile            file to write data
   * @param fetchBuffer        a buffer to copy data from the input stream to the file
   * @param retryCondition     condition which should be satisfied for retry
   * @param numTries           max number of retries
   * @param messageOnRetry     log message on retry
   *
   * @return the number of bytes copied
   */
  public static  long copyLarge(
      T object,
      ObjectOpenFunction objectOpenFunction,
      File outFile,
      byte[] fetchBuffer,
      Predicate retryCondition,
      int numTries,
      String messageOnRetry
  ) throws IOException
  {
    return copyLarge(
        () -> objectOpenFunction.open(object),
        outFile,
        fetchBuffer,
        retryCondition,
        numTries,
        messageOnRetry
    );
  }

  /**
   * Copy a potentially large amount of data from an input source to a file.
   */
  public static long copyLarge(
      InputStreamSupplier inputSource,
      File outFile,
      byte[] fetchBuffer,
      Predicate retryCondition,
      int numTries,
      String messageOnRetry
  ) throws IOException
  {
    try {
      return RetryUtils.retry(
          () -> {
            try (InputStream in = inputSource.openStream();
                 OutputStream out = new FileOutputStream(outFile)) {
              return IOUtils.copyLarge(in, out, fetchBuffer);
            }
          },
          retryCondition,
          outFile::delete,
          numTries,
          messageOnRetry
      );
    }
    catch (Exception e) {
      throw new IOException(e);
    }
  }

  /**
   * Computes the size of the file. If it is a directory, computes the size up
   * to a depth of 1.
   */
  public static long getFileSize(File file)
  {
    if (file == null) {
      return 0;
    } else if (file.isDirectory()) {
      File[] children = file.listFiles();
      if (children == null) {
        return 0;
      }
      long totalSize = 0;
      for (File child : children) {
        totalSize += child.length();
      }
      return totalSize;
    } else {
      return file.length();
    }
  }

  /**
   * Creates a temporary directory inside the configured temporary space (java.io.tmpdir). Similar to the method
   * {@link com.google.common.io.Files#createTempDir()} from Guava, but has nicer error messages.
   *
   * @throws IllegalStateException if the directory could not be created
   */
  public static File createTempDir()
  {
    return createTempDir(null);
  }

  /**
   * Creates a temporary directory inside the configured temporary space (java.io.tmpdir). Similar to the method
   * {@link com.google.common.io.Files#createTempDir()} from Guava, but has nicer error messages.
   *
   * @param prefix base directory name; if null/empty then this method will use "druid"
   *
   * @throws IllegalStateException if the directory could not be created
   */
  public static File createTempDir(@Nullable final String prefix)
  {
    return createTempDirInLocation(getTempDir(), prefix);
  }

  public static Path getTempDir()
  {
    final String parentDirectory = System.getProperty("java.io.tmpdir");

    if (parentDirectory == null) {
      // Not expected.
      throw new ISE("System property java.io.tmpdir is not set, cannot create temporary directories");
    }
    return new File(parentDirectory).toPath();
  }

  @SuppressForbidden(reason = "Files#createTempDirectory")
  public static File createTempDirInLocation(final Path parentDirectory, @Nullable final String prefix)
  {
    try {
      final Path tmpPath = Files.createTempDirectory(
          parentDirectory,
          prefix == null || prefix.isEmpty() ? "druid" : prefix
      );
      return tmpPath.toFile();
    }
    catch (IOException e) {
      // Some inspection to improve error messages.
      if (e instanceof NoSuchFileException && !parentDirectory.toFile().exists()) {
        throw new ISE("Path [%s] does not exist", parentDirectory);
      } else if ((e instanceof FileSystemException && e.getMessage().contains("Read-only file system"))
                 || (e instanceof AccessDeniedException)) {
        throw new ISE("Path [%s] is not writable, check permissions", parentDirectory);
      } else {
        // Well, maybe it was something else.
        throw new ISE(e, "Failed to create temporary directory in path [%s]", parentDirectory);
      }
    }
  }

  /**
   * Create "directory" and all intermediate directories as needed. If the directory is successfully created, or already
   * exists, returns quietly. Otherwise, throws an IOException.
   *
   * Simpler to use than {@link File#mkdirs()}, and more reliable since it is safe from races where two threads try
   * to create the same directory at the same time.
   *
   * The name is inspired by UNIX {@code mkdir -p}, which has the same behavior.
   */
  @SuppressForbidden(reason = "File#mkdirs")
  public static void mkdirp(final File directory) throws IOException
  {
    // isDirectory check after mkdirs is necessary in case of concurrent calls to mkdirp, because two concurrent
    // calls to mkdirs cannot both succeed.
    if (!directory.mkdirs() && !directory.isDirectory()) {
      throw new IOE("Cannot create directory [%s]", directory);
    }
  }

  /**
   * Equivalent to {@link org.apache.commons.io.FileUtils#deleteDirectory(File)}. Exists here mostly so callers
   * can avoid dealing with our FileUtils and the Commons FileUtils having the same name.
   */
  @SuppressForbidden(reason = "FilesUtils#deleteDirectory")
  public static void deleteDirectory(final File directory) throws IOException
  {
    org.apache.commons.io.FileUtils.deleteDirectory(directory);
  }

  /**
   * Hard-link "src" as "dest", if possible. If not possible -- perhaps they are on separate filesystems -- then
   * copy "src" to "dest".
   *
   * @return whether a link or copy was made. Can be safely ignored if you don't care.
   *
   * @throws IOException if something went wrong
   */
  public static LinkOrCopyResult linkOrCopy(final File src, final File dest) throws IOException
  {
    try {
      Files.createLink(dest.toPath(), src.toPath());
      return LinkOrCopyResult.LINK;
    }
    catch (IOException e) {
      Files.copy(src.toPath(), dest.toPath(), StandardCopyOption.REPLACE_EXISTING);
      return LinkOrCopyResult.COPY;
    }
  }

  public interface OutputStreamConsumer
  {
    T apply(OutputStream outputStream) throws IOException;
  }

  /**
   * Like {@link ByteSource}, but this is an interface, which allows use of lambdas.
   */
  public interface InputStreamSupplier
  {
    InputStream openStream() throws IOException;
  }
}