All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.isi.nlp.files.DocIDToFileMapContains Maven / Gradle / Ivy

The newest version!
package edu.isi.nlp.files;

import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.google.common.io.Files;
import edu.isi.nlp.StringUtils;
import java.io.File;
import java.io.IOException;
import java.util.Set;

/**
 * Utility to check that a doc ID-to-file-map contains all documents on a given list.
 *
 * @author Ryan Gabbard
 */
public final class DocIDToFileMapContains {
  private DocIDToFileMapContains() {
    throw new UnsupportedOperationException();
  }

  public static void main(String[] argv) {
    // we wrap the main method in this way to
    // ensure a non-zero return value on failure
    try {
      trueMain(argv);
    } catch (Exception e) {
      e.printStackTrace();
      System.exit(1);
    }
  }

  private static void trueMain(String[] argv) throws IOException {
    if (argv.length != 2) {
      System.err.println("usage: DocIDToFileMapContains docIDToFileMap fileList");
      System.exit(1);
    }

    final File docIdMapFile = new File(argv[0]);
    final File docListFile = new File(argv[1]);
    final ImmutableSet idsInDocList =
        ImmutableSet.copyOf(Files.asCharSource(docListFile, Charsets.UTF_8).readLines());
    final Set idsMapped = FileUtils.loadStringToFileMap(docIdMapFile).keySet();
    final ImmutableSet difference =
        Sets.difference(idsInDocList, idsMapped).immutableCopy();

    if (!difference.isEmpty()) {
      System.out.println(StringUtils.unixNewlineJoiner().join(difference));
      System.exit(1);
    } else {
      System.exit(0);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy