All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.isi.nlp.files.FileUtils Maven / Gradle / Ivy

The newest version!
package edu.isi.nlp.files;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Predicates.not;
import static com.google.common.collect.Iterables.skip;
import static com.google.common.collect.Iterables.transform;
import static java.nio.file.Files.walkFileTree;

import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Splitter;
import com.google.common.collect.Collections2;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.io.ByteSink;
import com.google.common.io.ByteSource;
import com.google.common.io.CharSink;
import com.google.common.io.CharSource;
import com.google.common.io.Files;
import com.google.common.io.LineProcessor;
import com.google.common.primitives.Ints;
import edu.isi.nlp.IsiNlpImmutable;
import edu.isi.nlp.StringUtils;
import edu.isi.nlp.collections.KeyValueSink;
import edu.isi.nlp.collections.MapUtils;
import edu.isi.nlp.io.GZIPByteSink;
import edu.isi.nlp.io.GZIPByteSource;
import edu.isi.nlp.symbols.Symbol;
import edu.isi.nlp.symbols.SymbolUtils;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.charset.Charset;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import org.immutables.value.Value;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Utilities for working with files.
 *
 * @author Ryan Gabbard, Jay DeYoung, Constantine Lignos, Nicolas Ward
 */
@Value.Enclosing
public final class FileUtils {

  private static final Logger log = LoggerFactory.getLogger(FileUtils.class);

  private FileUtils() {
    throw new UnsupportedOperationException();
  }

  /** Create the parent directories of the given file, if needed. */
  public static void ensureParentDirectoryExists(File f) throws IOException {
    final File parent = f.getParentFile();
    if (parent != null) {
      java.nio.file.Files.createDirectories(parent.toPath());
    }
  }

  /**
   * Takes a file with filenames listed one per line and returns a list of the corresponding File
   * objects. Ignores blank lines and lines beginning with "#". Treats the file as UTF-8 encoded.
   */
  public static ImmutableList loadFileList(final File fileList) throws IOException {
    return loadFileList(Files.asCharSource(fileList, Charsets.UTF_8));
  }

  /**
   * Takes a {@link com.google.common.io.CharSource} with filenames listed one per line and returns
   * a list of the corresponding File objects. Ignores blank lines and lines beginning with "#".
   */
  public static ImmutableList loadFileList(final CharSource source) throws IOException {
    final ImmutableList.Builder ret = ImmutableList.builder();

    for (final String filename : source.readLines()) {
      if (!filename.isEmpty() && !isCommentLine(filename)) {
        ret.add(new File(filename.trim()));
      }
    }

    return ret.build();
  }

  /**
   * Takes a List of filenames and returns a list of files, ignoring any empty strings and any
   * trailing whitespace.
   */
  public static ImmutableList loadFileList(final Iterable fileNames)
      throws IOException {
    final ImmutableList.Builder ret = ImmutableList.builder();

    for (String filename : fileNames) {
      if (!filename.isEmpty()) {
        ret.add(new File(filename.trim()));
      }
    }

    return ret.build();
  }

  /**
   * Writes the absolutes paths of the given files in iteration order, one-per-line. Each line will
   * end with a Unix newline.
   */
  public static void writeFileList(Iterable files, CharSink sink) throws IOException {
    writeUnixLines(FluentIterable.from(files).transform(toAbsolutePathFunction()), sink);
  }

  /**
   * Like {@link #loadFileList(java.io.File)}, except if the file name ends in ".gz" or ".tgz" it is
   * treated as GZIP compressed. This is often convenient for loading e.g. document lists which
   * benefit from being compressed for large corpora.
   */
  public static ImmutableList loadPossiblyCompressedFileList(File fileList)
      throws IOException {
    final CharSource source;
    if (fileList.getName().endsWith(".gz") || fileList.getName().endsWith(".tgz")) {
      source = GZIPByteSource.fromCompressed(fileList).asCharSource(Charsets.UTF_8);
    } else {
      source = Files.asCharSource(fileList, Charsets.UTF_8);
    }
    return loadFileList(source);
  }

  /**
   * Takes a file with relative pathnames listed one per line and returns a list of the
   * corresponding {@link java.io.File} objects, resolved against the provided base path using the
   * {@link java.io.File#File(java.io.File, String)} constructor. Ignores blank lines and lines
   * beginning with "#".
   */
  public static ImmutableList loadFileListRelativeTo(File fileList, File basePath)
      throws IOException {
    checkNotNull(basePath);
    final ImmutableList.Builder ret = ImmutableList.builder();

    for (final String filename : Files.readLines(fileList, Charsets.UTF_8)) {
      if (!filename.isEmpty() && !isCommentLine(filename)) {
        ret.add(new File(basePath, filename.trim()));
      }
    }

    return ret.build();
  }

  /**
   * Returns another file just like the input but with a different extension. If the input file has
   * an extension (a suffix beginning with "."), everything after the . is replaced with
   * newExtension. Otherwise, a newExtension is appended to the filename and a new File is returned.
   * Note that unless you want double .s, newExtension should not begin with a .
   */
  public static File swapExtension(final File f, final String newExtension) {
    checkNotNull(f);
    checkNotNull(newExtension);
    Preconditions.checkArgument(!f.isDirectory());

    final String absolutePath = f.getAbsolutePath();
    final int dotIndex = absolutePath.lastIndexOf(".");
    String basePath;

    if (dotIndex >= 0) {
      basePath = absolutePath.substring(0, dotIndex);
    } else {
      basePath = absolutePath;
    }

    return new File(String.format("%s.%s", basePath, newExtension));
  }

  /**
   * Derives one {@link File} from another by adding the provided extension. The extension will be
   * separated from the base file name by a ".".
   */
  public static File addExtension(final File f, final String extension) {
    checkNotNull(f);
    checkNotNull(extension);
    Preconditions.checkArgument(!extension.isEmpty());
    Preconditions.checkArgument(!f.isDirectory());

    final String absolutePath = f.getAbsolutePath();
    return new File(absolutePath + "." + extension);
  }

  public static ImmutableMap loadSymbolToFileMap(final File f) throws IOException {
    return loadSymbolToFileMap(Files.asCharSource(f, Charsets.UTF_8));
  }

  public static ImmutableMap loadSymbolToFileMap(final CharSource source)
      throws IOException {
    return loadMap(source, SymbolUtils.symbolizeFunction(), FileFunction.INSTANCE);
  }

  public static ImmutableListMultimap loadSymbolToFileListMultimap(
      final CharSource source) throws IOException {
    return loadMultimap(source, SymbolUtils.symbolizeFunction(), FileFunction.INSTANCE);
  }

  /**
   * Writes a map from symbols to file absolute paths to a file. Each line has a mapping with the
   * key and value separated by a single tab. The file will have a trailing newline.
   */
  public static void writeSymbolToFileMap(Map symbolToFileMap, CharSink sink)
      throws IOException {
    writeSymbolToFileEntries(symbolToFileMap.entrySet(), sink);
  }

  private static final Function, String> TO_TAB_SEPARATED_ENTRY =
      MapUtils.toStringWithKeyValueSeparator("\t");

  /**
   * Writes map entries from symbols to file absolute paths to a file. Each line has a mapping with
   * the key and value separated by a single tab. The file will have a trailing newline. Note that
   * the same "key" may appear in the file with multiple mappings.
   */
  public static void writeSymbolToFileEntries(
      final Iterable> entries, final CharSink sink) throws IOException {

    writeUnixLines(
        transform(
            MapUtils.transformValues(entries, toAbsolutePathFunction()), TO_TAB_SEPARATED_ENTRY),
        sink);
  }

  public static Map loadSymbolToFileCharSourceMap(CharSource source)
      throws IOException {
    return Maps.transformValues(loadSymbolToFileMap(source), FileUtils.asUTF8CharSourceFunction());
  }

  /**
   * Reads an {@link Map} from a {@link File}, where each line is a key, a tab character ("\t"), and
   * a value. Blank lines and lines beginning with "#" are ignored.
   */
  public static Map loadStringToFileMap(final File f) throws IOException {
    return loadStringToFileMap(Files.asCharSource(f, Charsets.UTF_8));
  }

  /**
   * Reads a {@link Map} from a {@link CharSource}, where each line is a key, a tab character
   * ("\t"), and a value. Blank lines and lines beginning with "#" are ignored.
   */
  public static Map loadStringToFileMap(final CharSource source) throws IOException {
    return loadMap(
        source, Functions.identity(), FileFunction.INSTANCE, IsCommentLine.INSTANCE);
  }

  /**
   * Reads an {@link ImmutableListMultimap} from a {@link CharSource}, where each line is a key, a
   * tab character ("\t"), and a value. Blank lines and lines beginning with "#" are ignored.
   */
  public static ImmutableListMultimap loadStringToFileListMultimap(
      final CharSource source) throws IOException {
    return loadMultimap(
        source, Functions.identity(), FileFunction.INSTANCE, IsCommentLine.INSTANCE);
  }

  /**
   * Reads an {@link ImmutableMap} from a {@link CharSource}, where each line is a key, a tab
   * character ("\t"), and a value. Blank lines and lines beginning with "#" are ignored.
   */
  public static  ImmutableMap loadMap(
      final CharSource source,
      final Function keyFunction,
      final Function valueFunction)
      throws IOException {
    final ImmutableMap.Builder ret = ImmutableMap.builder();
    loadMapToSink(
        source, MapUtils.asMapSink(ret), keyFunction, valueFunction, IsCommentLine.INSTANCE);
    return ret.build();
  }

  /**
   * Reads an {@link ImmutableMap} from a {@link CharSource}, where each line is a key, a tab
   * character ("\t"), and a value. Blank lines and lines for which {@code skipLinePredicate} is
   * true are ignored.
   */
  public static  ImmutableMap loadMap(
      final CharSource source,
      final Function keyFunction,
      final Function valueFunction,
      final Predicate skipLinePredicate)
      throws IOException {
    final ImmutableMap.Builder ret = ImmutableMap.builder();
    loadMapToSink(source, MapUtils.asMapSink(ret), keyFunction, valueFunction, skipLinePredicate);
    return ret.build();
  }

  /**
   * Reads an {@link ImmutableMap} from a {@link File}, where each line is a key, a tab character
   * ("\t"), and a value. Blank lines and lines beginning with "#" are ignored.
   */
  public static  ImmutableMap loadMap(
      final File file,
      final Function keyFunction,
      final Function valueFunction)
      throws IOException {
    return loadMap(Files.asCharSource(file, Charsets.UTF_8), keyFunction, valueFunction);
  }

  /**
   * Reads an {@link ImmutableListMultimap} from a {@link CharSource}, where each line is a key, a
   * tab character ("\t"), and a value. Blank lines and lines beginning with "#" are ignored.
   */
  public static  ImmutableListMultimap loadMultimap(
      final CharSource source,
      final Function keyFunction,
      final Function valueFunction)
      throws IOException {
    final ImmutableListMultimap.Builder ret = ImmutableListMultimap.builder();
    loadMapToSink(
        source, MapUtils.asMapSink(ret), keyFunction, valueFunction, IsCommentLine.INSTANCE);
    return ret.build();
  }

  /**
   * Reads an {@link ImmutableListMultimap} from a {@link File}, where each line is a key, a tab
   * character ("\t"), and a value. Blank lines and lines beginning with "#" are ignored.
   */
  public static  ImmutableListMultimap loadMultimap(
      final File file,
      final Function keyFunction,
      final Function valueFunction)
      throws IOException {
    return loadMultimap(
        Files.asCharSource(file, Charsets.UTF_8),
        keyFunction,
        valueFunction,
        IsCommentLine.INSTANCE);
  }

  /**
   * Reads an {@link ImmutableListMultimap} from a {@link CharSource}, where each line is a key, a
   * tab character ("\t"), and a value. Lines for which {@code skipLinePredicate} is true are
   * ignored.
   */
  public static  ImmutableListMultimap loadMultimap(
      final CharSource source,
      final Function keyFunction,
      final Function valueFunction,
      final Predicate skipLinePredicate)
      throws IOException {
    final ImmutableListMultimap.Builder ret = ImmutableListMultimap.builder();
    loadMapToSink(source, MapUtils.asMapSink(ret), keyFunction, valueFunction, skipLinePredicate);
    return ret.build();
  }

  private static  void loadMapToSink(
      final CharSource source,
      final KeyValueSink mapSink,
      final Function keyFunction,
      final Function valueFunction,
      final Predicate skipLinePredicate)
      throws IOException {
    // Using a LineProcessor saves memory by not loading the whole file into memory. This can matter
    // for multi-gigabyte Gigaword-scale maps.
    final MapLineProcessor processor =
        new MapLineProcessor<>(
            mapSink,
            keyFunction,
            valueFunction,
            skipLinePredicate,
            Splitter.on("\t").trimResults());
    source.readLines(processor);
  }

  /**
   * Writes a single integer to the beginning of a file, overwriting what was there originally but
   * leaving the rest of the file intact. This is useful when you are writing a long binary file
   * with a size header, but don't know how many elements are there until the end.
   */
  public static void writeIntegerToStart(final File f, final int num) throws IOException {
    final RandomAccessFile fixupFile = new RandomAccessFile(f, "rw");
    fixupFile.writeInt(num);
    fixupFile.close();
  }

  public static int[] loadBinaryIntArray(final ByteSource inSup, final boolean compressed)
      throws IOException {
    InputStream in = inSup.openStream();
    if (compressed) {
      try {
        in = new GZIPInputStream(in);
      } catch (final IOException e) {
        in.close();
        throw e;
      }
    }

    try (DataInputStream dis = new DataInputStream(in)) {
      final int size = dis.readInt();
      final int[] ret = new int[size];
      for (int i = 0; i < size; ++i) {
        ret[i] = dis.readInt();
      }
      return ret;
    }
  }

  public static int[] loadTextIntArray(final File f) throws NumberFormatException, IOException {
    final List ret = Lists.newArrayList();

    for (final String line : Files.readLines(f, Charsets.UTF_8)) {
      ret.add(Integer.parseInt(line));
    }

    return Ints.toArray(ret);
  }

  public static void writeBinaryIntArray(final int[] arr, final ByteSink outSup)
      throws IOException {
    try (OutputStream out = outSup.openBufferedStream()) {
      try (DataOutputStream dos = new DataOutputStream(out)) {
        dos.writeInt(arr.length);
        for (final int x : arr) {
          dos.writeInt(x);
        }
      }
    }
  }

  public static void backup(final File f) throws IOException {
    new BackupRequest.Builder().fileToBackup(f).build().doBackup();
  }

  public static void backup(final File f, final String extension) throws IOException {
    new BackupRequest.Builder().fileToBackup(f).extension(extension).build().doBackup();
  }

  /** A request to backup a file. This request is executed by calling {@link #doBackup()}. */
  @IsiNlpImmutable
  @Value.Immutable
  public abstract static class BackupRequest {

    public abstract File fileToBackup();

    /**
     * The name of the type of object being backed up (e.g. "geonames database"). If this is
     * provided, a message is logged.
     */
    public abstract Optional nameOfThingToBackup();

    /**
     * The logger to write a log message to. If not specified, defaults to the logger of {@link
     * FileUtils}
     */
    @Value.Default
    public Logger logger() {
      return FileUtils.log;
    }

    /**
     * The extension to append to the backup file. A "." is automatically inserted. Defaults to
     * "bak"
     */
    @Value.Default
    public String extension() {
      return "bak";
    }

    /** Whether to delete the file being backed up. */
    @Value.Default
    public boolean deleteOriginal() {
      return false;
    }

    @Value.Check
    protected void check() {
      checkArgument(!extension().isEmpty(), "Backup extension may not be empty");
    }

    /** Execute the backup request. */
    public final void doBackup() throws IOException {
      if (fileToBackup().isFile()) {
        final File backupFile = addExtension(fileToBackup(), extension());
        final String operationMessage;
        if (deleteOriginal()) {
          operationMessage = "Moved";
          java.nio.file.Files.move(
              fileToBackup().toPath(), backupFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
        } else {
          operationMessage = "Copied";
          java.nio.file.Files.copy(
              fileToBackup().toPath(), backupFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
        }

        if (nameOfThingToBackup().isPresent()) {
          logger()
              .info(
                  "{} existing {} from {} to {}",
                  operationMessage,
                  nameOfThingToBackup().get(),
                  fileToBackup().getAbsolutePath(),
                  backupFile.getAbsolutePath());
        }
      }
    }

    public static class Builder extends ImmutableFileUtils.BackupRequest.Builder {}
  }

  /**
   * Given a file, returns a File representing a sibling directory with the specified name.
   *
   * @param f If f is the filesystem root, a runtime exeption will be thrown.
   * @param siblingDirName The non-empty name of the sibling directory.
   */
  public static File siblingDirectory(final File f, final String siblingDirName) {
    checkNotNull(f);
    checkNotNull(siblingDirName);
    checkArgument(!siblingDirName.isEmpty());

    final File parent = f.getParentFile();
    if (parent != null) {
      return new File(parent, siblingDirName);
    } else {
      throw new RuntimeException(
          String.format(
              "Cannot create sibling directory %s of %s because the latter has no parent.",
              siblingDirName, f));
    }
  }

  public static BufferedReader optionallyCompressedBufferedReader(
      final File f, final boolean compressed) throws IOException {
    InputStream stream = new BufferedInputStream(new FileInputStream(f));
    if (compressed) {
      try {
        stream = new GZIPInputStream(stream);
      } catch (final IOException e) {
        stream.close();
        throw e;
      }
    }
    return new BufferedReader(new InputStreamReader(stream, Charsets.UTF_8));
  }

  public static ImmutableList loadSymbolList(final File symbolListFile) throws IOException {
    return loadSymbolList(Files.asCharSource(symbolListFile, Charsets.UTF_8));
  }

  /**
   * Loads a list of {@link Symbol}s from a file, one-per-line, skipping lines starting with "#" as
   * comments.
   */
  public static ImmutableList loadSymbolList(final CharSource source) throws IOException {
    return SymbolUtils.listFrom(loadStringList(source));
  }

  public static Function toNameFunction() {
    return ToNameEnum.INSTANCE;
  }

  private enum ToNameEnum implements Function {
    INSTANCE;

    @Override
    public String apply(final File f) {
      return f.getName();
    }
  }

  public static Function toAbsolutePathFunction() {
    return ToAbsolutePathFunction.INSTANCE;
  }

  private enum ToAbsolutePathFunction implements Function {
    INSTANCE;

    @Override
    public String apply(final File input) {
      return input.getAbsolutePath();
    }
  }

  public static boolean isEmptyDirectory(final File directory) {
    return directory.exists() && directory.isDirectory() && directory.listFiles().length == 0;
  }

  /**
   * Make a predicate to test files for their name ending with the specified suffix.
   *
   * @param suffix May not be null or empty.
   */
  public static Predicate endsWithPredicate(final String suffix) {
    checkArgument(!suffix.isEmpty());

    return new EndsWithPredicate(suffix);
  }

  /**
   * Loads a file in the format {@code key value1 value2 value3} (tab-separated) into a {@link
   * com.google.common.collect.Multimap} of {@code String} to {@code String}. Each key should only
   * appear on one line, and there should be no duplicate values. Each key and value has whitespace
   * trimmed off. Skips empty lines and allows comment-lines with {@code #} in the first position.
   * If a key has no values, it will not show up in the keySet of the returned multimap.
   */
  public static ImmutableMultimap loadStringMultimap(CharSource multimapSource)
      throws IOException {
    final ImmutableMultimap.Builder ret = ImmutableMultimap.builder();

    int count = 0;
    for (final String line : multimapSource.readLines()) {
      ++count;
      if (isCommentLine(line)) {
        continue;
      }
      final List parts = multimapSplitter.splitToList(line);
      if (parts.isEmpty()) {
        continue;
      }
      ret.putAll(parts.get(0), skip(parts, 1));
    }

    return ret.build();
  }

  /**
   * Loads a file in the format {@code key value1 value2 value3} (tab-separated) into a {@link
   * com.google.common.collect.Multimap} of {@link Symbol} to Symbol. Each key should only appear on
   * one line, and there should be no duplicate values. Each key and value has whitespace trimmed
   * off. Skips empty lines and allows comment-lines with {@code #} in the first position. If a key
   * has no values, it will not show up in the keySet of the returned multimap.
   */
  public static ImmutableMultimap loadSymbolMultimap(CharSource multimapSource)
      throws IOException {
    final ImmutableMultimap stringMM = loadStringMultimap(multimapSource);
    final ImmutableMultimap.Builder ret = ImmutableMultimap.builder();

    for (final Map.Entry> entry : stringMM.asMap().entrySet()) {
      ret.putAll(
          Symbol.from(entry.getKey()), Collections2.transform(entry.getValue(), Symbol.FromString));
    }

    return ret.build();
  }

  private static final Splitter MAP_SPLITTER =
      Splitter.on("\t").trimResults().omitEmptyStrings().limit(2);

  /**
   * Loads a file in the format {@code key value1} (tab-separated) into a {@link
   * com.google.common.collect.ImmutableMap} of {@link String}s. Each key should only appear on one
   * line, and there should be no duplicate values. Each key and value has whitespace trimmed off.
   * Skips empty lines and allows comment-lines with {@code #} in the first position.
   */
  public static ImmutableMap loadStringMap(CharSource source) throws IOException {
    return loadStringMap(source, false);
  }

  /**
   * Like {@link #loadStringMap(CharSource)}, but differs in that lines can contain only a key and
   * no value and will be treated as having a value of empty string in the resulting map. This is
   * useful for specifying absent values for a specific key.
   *
   * @see FileUtils#loadStringMap(CharSource)
   */
  public static ImmutableMap loadStringMapAllowingEmptyValues(CharSource source)
      throws IOException {
    return loadStringMap(source, true);
  }

  private static ImmutableMap loadStringMap(
      CharSource source, final boolean allowEmptyValues) throws IOException {
    final ImmutableMap.Builder ret = ImmutableMap.builder();

    int count = 0;
    for (final String line : source.readLines()) {
      ++count;
      if (isCommentLine(line)) {
        continue;
      }
      final List parts = MAP_SPLITTER.splitToList(line);
      if (parts.isEmpty()) {
        continue;
      }
      if (parts.size() == 2) {
        ret.put(parts.get(0), parts.get(1));
      } else if (allowEmptyValues && parts.size() == 1) {
        ret.put(parts.get(0), "");
      } else {
        throw new RuntimeException(
            "When reading a map from " + source + ", line " + count + " is invalid: " + line);
      }
    }

    return ret.build();
  }

  /**
   * Loads a file in the format {@code key value1} (tab-separated) into a {@link
   * com.google.common.collect.ImmutableMap} of {@link String}s. Each key should only appear on one
   * line, and there should be no duplicate values. Each key and value has whitespace trimmed off.
   * Skips empty lines and allows comment-lines with {@code #} in the first position.
   */
  public static ImmutableMap loadSymbolMap(CharSource source) throws IOException {
    final ImmutableMap.Builder ret = ImmutableMap.builder();

    for (ImmutableMap.Entry row : loadStringMap(source).entrySet()) {
      ret.put(Symbol.from(row.getKey()), Symbol.from(row.getValue()));
    }

    return ret.build();
  }

  public static void writeSymbolMultimap(Multimap mm, CharSink charSink)
      throws IOException {
    final Joiner tabJoiner = Joiner.on('\t');
    writeUnixLines(
        transform(
            mm.asMap().entrySet(),
            new Function>, String>() {
              @Override
              public String apply(Map.Entry> input) {
                return input.getKey() + "\t" + tabJoiner.join(input.getValue());
              }
            }),
        charSink);
  }

  public static ImmutableTable loadSymbolTable(CharSource input)
      throws IOException {
    final ImmutableTable.Builder ret = ImmutableTable.builder();

    int lineNo = 0;
    for (final String line : input.readLines()) {
      final List parts = StringUtils.onTabs().splitToList(line);
      if (parts.size() != 3) {
        throw new IOException(
            String.format("Invalid line %d when reading symbol table: %s", lineNo, line));
      }
      ret.put(Symbol.from(parts.get(0)), Symbol.from(parts.get(1)), Symbol.from(parts.get(2)));
      ++lineNo;
    }

    return ret.build();
  }

  private static final Splitter multimapSplitter =
      Splitter.on("\t").trimResults().omitEmptyStrings();

  private enum AsUTF8CharSource implements Function {
    INSTANCE;

    @Override
    public CharSource apply(File f) {
      return Files.asCharSource(f, Charsets.UTF_8);
    }
  }

  /** Transforms a file to a {@link com.google.common.io.CharSource} with UTF-8 encoding. */
  public static Function asUTF8CharSourceFunction() {
    return AsUTF8CharSource.INSTANCE;
  }

  /**
   * Throws an {@link java.io.IOException} if the supplied directory either does not exist or is not
   * a directory.
   */
  public static void assertDirectoryExists(File directory) throws IOException {
    if (!directory.isDirectory()) {
      throw new IOException(directory + " does not exist or is not a directory");
    }
  }

  /**
   * Just like {@link Files#asByteSource(java.io.File)}, but decompresses the incoming data using
   * GZIP.
   */
  public static ByteSource asCompressedByteSource(File f) throws IOException {
    return GZIPByteSource.fromCompressed(Files.asByteSource(f));
  }

  /**
   * Just like {@link Files#asByteSink(java.io.File, com.google.common.io.FileWriteMode...)}, but
   * decompresses the incoming data using GZIP.
   */
  public static ByteSink asCompressedByteSink(File f) throws IOException {
    return GZIPByteSink.gzipCompress(Files.asByteSink(f));
  }

  /**
   * Just like {@link Files#asCharSource(java.io.File, java.nio.charset.Charset)}, but decompresses
   * the incoming data using GZIP.
   */
  public static CharSource asCompressedCharSource(File f, Charset charSet) throws IOException {
    return asCompressedByteSource(f).asCharSource(charSet);
  }

  /**
   * Just like {@link Files#asCharSink(java.io.File, java.nio.charset.Charset,
   * com.google.common.io.FileWriteMode...)}, but decompresses the incoming data using GZIP.
   */
  public static CharSink asCompressedCharSink(File f, Charset charSet) throws IOException {
    return asCompressedByteSink(f).asCharSink(charSet);
  }

  // Guava predicates and functions
  public static Predicate isDirectoryPredicate() {
    return new Predicate() {
      @Override
      public boolean apply(final File input) {
        return input.isDirectory();
      }
    };
  }

  /** wraps any IOException and throws a RuntimeException */
  public static Function> toLinesFunction(final Charset charset) {
    return new Function>() {
      @Override
      public Iterable apply(final File input) {
        try {
          return Files.readLines(input, charset);
        } catch (IOException e) {
          e.printStackTrace();
          throw new RuntimeException(e);
        }
      }
    };
  }

  /**
   * Loads a list of {@link Symbol}s from a file, one-per-line, skipping lines starting with "#" as
   * comments.
   */
  public static ImmutableSet loadSymbolSet(final CharSource source) throws IOException {
    return ImmutableSet.copyOf(loadSymbolList(source));
  }

  /**
   * Returns a {@link List} consisting of the lines of the provided {@link CharSource} in the order
   * given.
   */
  public static ImmutableList loadStringList(final CharSource source) throws IOException {
    return FluentIterable.from(source.readLines()).filter(not(IsCommentLine.INSTANCE)).toList();
  }

  /**
   * Loads a list of {@link String}s from a file, one-per-line, skipping lines starting with "#" as
   * comments.
   */
  public static ImmutableSet loadStringSet(final CharSource source) throws IOException {
    return ImmutableSet.copyOf(loadStringList(source));
  }

  /** Recursively delete this directory and all its contents. */
  public static void recursivelyDeleteDirectory(File directory) throws IOException {
    if (!directory.exists()) {
      return;
    }
    checkArgument(directory.isDirectory(), "Cannot recursively delete a non-directory");
    walkFileTree(directory.toPath(), new DeletionFileVisitor());
  }

  private static class DeletionFileVisitor implements FileVisitor {

    @Override
    public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs)
        throws IOException {
      return FileVisitResult.CONTINUE;
    }

    @Override
    public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs)
        throws IOException {
      java.nio.file.Files.delete(file);
      return FileVisitResult.CONTINUE;
    }

    @Override
    public FileVisitResult visitFileFailed(final Path file, final IOException exc)
        throws IOException {
      return FileVisitResult.CONTINUE;
    }

    @Override
    public FileVisitResult postVisitDirectory(final Path dir, final IOException exc)
        throws IOException {
      java.nio.file.Files.delete(dir);
      return FileVisitResult.CONTINUE;
    }
  }

  /** Calls {@link #recursivelyDeleteDirectory(File)} on JVM exit. */
  public static void recursivelyDeleteDirectoryOnExit(final File directory) {
    Runtime.getRuntime()
        .addShutdownHook(
            new Thread() {
              @Override
              public void run() {
                try {
                  recursivelyDeleteDirectory(directory);
                } catch (IOException e) {
                  throw new RuntimeException(e);
                }
              }
            });
  }

  /**
   * Recursively copies a directory.
   *
   * @param sourceDir the source directory
   * @param destDir the destination directory, which does not need to already exist
   * @param copyOption options to be used for copying files
   */
  public static void recursivelyCopyDirectory(
      final File sourceDir, final File destDir, final StandardCopyOption copyOption)
      throws IOException {
    checkNotNull(sourceDir);
    checkNotNull(destDir);
    checkArgument(sourceDir.isDirectory(), "Source directory does not exist");
    java.nio.file.Files.createDirectories(destDir.toPath());
    walkFileTree(
        sourceDir.toPath(), new CopyFileVisitor(sourceDir.toPath(), destDir.toPath(), copyOption));
  }

  private static class CopyFileVisitor implements FileVisitor {

    private final Path sourcePath;
    private final Path destPath;
    private final StandardCopyOption copyOption;

    private CopyFileVisitor(Path sourcePath, Path destPath, final StandardCopyOption copyOption) {
      this.sourcePath = checkNotNull(sourcePath);
      this.destPath = checkNotNull(destPath);
      this.copyOption = checkNotNull(copyOption);
    }

    @Override
    public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs)
        throws IOException {
      final Path newPath = destPath.resolve(sourcePath.relativize(dir));
      if (!java.nio.file.Files.exists(newPath)) {
        java.nio.file.Files.createDirectory(newPath);
      }
      return FileVisitResult.CONTINUE;
    }

    @Override
    public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs)
        throws IOException {
      final Path newPath = destPath.resolve(sourcePath.relativize(file));
      java.nio.file.Files.copy(file, newPath, copyOption);
      return FileVisitResult.CONTINUE;
    }

    @Override
    public FileVisitResult visitFileFailed(final Path file, final IOException exc)
        throws IOException {
      return FileVisitResult.TERMINATE;
    }

    @Override
    public FileVisitResult postVisitDirectory(final Path dir, final IOException exc)
        throws IOException {
      return FileVisitResult.CONTINUE;
    }
  }

  /**
   * Generally we want to avoid {@link CharSink#writeLines(Iterable)} because it uses the OS default
   * line separator, but our code always works with Unix line endings regardless of platform. This
   * is just like {@link CharSink#writeLines(Iterable)}, but always uses Unix endings.
   */
  public static void writeUnixLines(Iterable lines, CharSink sink)
      throws IOException {
    sink.writeLines(lines, "\n");
  }

  /** Creates a {@link File} from a {@link String} using the {@link File} constructor. */
  public Function asFileFunction() {
    return FileFunction.INSTANCE;
  }

  private enum FileFunction implements Function {
    INSTANCE;

    @Override
    public File apply(final String input) {
      return new File(checkNotNull(input));
    }
  }

  private static class MapLineProcessor implements LineProcessor {

    private int lineNo;
    private final KeyValueSink mapSink;
    private final Function keyFunction;
    private final Function valueFunction;
    private final Predicate skipLinePredicate;
    private final Splitter splitter;

    private MapLineProcessor(
        final KeyValueSink mapSink,
        final Function keyFunction,
        final Function valueFunction,
        final Predicate skipLinePredicate,
        final Splitter splitter) {
      this.mapSink = checkNotNull(mapSink);
      this.keyFunction = checkNotNull(keyFunction);
      this.valueFunction = checkNotNull(valueFunction);
      this.skipLinePredicate = checkNotNull(skipLinePredicate);
      this.splitter = checkNotNull(splitter);
    }

    @Override
    public boolean processLine(final String line) throws IOException {
      ++lineNo;
      if (line.isEmpty() || skipLinePredicate.apply(line)) {
        // Skip this line and go to the next one
        return true;
      }

      final Iterator parts = splitter.split(line).iterator();

      final String key;
      final String value;
      boolean good = true;

      if (parts.hasNext()) {
        key = parts.next();
      } else {
        key = null;
        good = false;
      }

      if (parts.hasNext()) {
        value = parts.next();
      } else {
        value = null;
        good = false;
      }

      if (!good || parts.hasNext()) {
        throw new RuntimeException(String.format("Corrupt line #%d: %s", lineNo, line));
      }

      try {
        mapSink.put(keyFunction.apply(key), valueFunction.apply(value));
      } catch (IllegalArgumentException iae) {
        throw new IOException(
            String.format("Error processing line %d of file map: %s", lineNo, line), iae);
      }
      // all lines should be processed
      return true;
    }

    @Override
    public Void getResult() {
      // We don't produce a result; we just write to mapSink as a side-effect
      return null;
    }
  }

  private static boolean isCommentLine(final String line) {
    return IsCommentLine.INSTANCE.apply(line);
  }

  private enum IsCommentLine implements Predicate {
    INSTANCE;

    @Override
    public boolean apply(final String input) {
      checkNotNull(input);
      return input.startsWith("#");
    }
  }

  private static class EndsWithPredicate implements Predicate {

    private final String suffix;

    public EndsWithPredicate(final String suffix) {
      this.suffix = suffix;
    }

    @Override
    public boolean apply(final File f) {
      return f.getName().endsWith(suffix);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy