All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.isi.nlp.files.SubtractFileMaps Maven / Gradle / Ivy

The newest version!
package edu.isi.nlp.files;

import com.google.common.base.Charsets;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.io.Files;
import edu.isi.nlp.IsiNlpImmutable;
import edu.isi.nlp.parameters.Parameters;
import edu.isi.nlp.symbols.Symbol;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import org.immutables.value.Value;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Given two key-to-file maps, produces a new map which includes all key-value mappsing from the
 * first where the key is not present in the second.
 *
 * @author Ryan Gabbard
 */
public final class SubtractFileMaps {

  private static final Logger log = LoggerFactory.getLogger(SubtractFileMaps.class);

  private SubtractFileMaps() {
    throw new UnsupportedOperationException();
  }

  public static void main(String[] argv) {
    // we wrap the main method in this way to
    // ensure a non-zero return value on failure
    try {
      trueMain(argv);
    } catch (Exception e) {
      e.printStackTrace();
      System.exit(1);
    }
  }

  private static void trueMain(String[] argv) throws IOException {
    final Parameters params = Parameters.loadSerifStyle(new File(argv[0]));
    log.info(params.dump());

    final File inputFileMapFile = params.getExistingFile("com.bbn.subtractFileMaps.inputMap");
    final File outputFile = params.getCreatableFile("com.bbn.subtractFileMaps.outputMap");
    final StuffToSubtract stuffToSubtract = loadStuffToSubtract(params);

    final ImmutableMap inputFiles = FileUtils.loadSymbolToFileMap(inputFileMapFile);

    final ImmutableMap.Builder outputFilesB = ImmutableMap.builder();
    for (final Map.Entry inputEntry : inputFiles.entrySet()) {
      if (stuffToSubtract.docIDs().contains(inputEntry.getKey())) {
        // put nothing, this entry is deleted
        if (stuffToSubtract.docIDsToFileMap().isPresent()) {
          // if paths are available and matching was requested, they must match
          final File pathInSubtractMap =
              stuffToSubtract.docIDsToFileMap().get().get(inputEntry.getKey());
          final boolean pathsMatch = pathInSubtractMap.equals(inputEntry.getValue());
          if (!pathsMatch) {
            throw new RuntimeException(
                "Mismatch in file maps: for "
                    + inputEntry.getKey()
                    + " input has "
                    + inputEntry.getValue()
                    + " to subtract has "
                    + pathInSubtractMap);
          }
        }
      } else {
        outputFilesB.put(inputEntry);
      }
    }

    final ImmutableMap outputFileMap = outputFilesB.build();

    log.info(
        "Subtracting {}'s {} files from {}'s {} files and writing {} files to to {}",
        stuffToSubtract.path(),
        stuffToSubtract.docIDs().size(),
        inputFileMapFile,
        inputFiles.size(),
        outputFileMap.size(),
        outputFile);

    FileUtils.writeSymbolToFileMap(outputFileMap, Files.asCharSink(outputFile, Charsets.UTF_8));
  }

  public static StuffToSubtract loadStuffToSubtract(final Parameters params) throws IOException {
    final File toSubtractFile = params.getExistingFile("com.bbn.subtractFileMaps.toSubtract");
    final boolean toSubtractIsMap =
        params.getOptionalBoolean("com.bbn.subtractFileMaps.subtrahendIsMap").or(true);
    final boolean requirePathMatch =
        params.getOptionalBoolean("com.bbn.subtractFileMaps.requirePathMatch").or(false);

    if (toSubtractIsMap) {
      final ImmutableMap filesToSubtract =
          FileUtils.loadSymbolToFileMap(toSubtractFile);
      final StuffToSubtract.Builder ret =
          new StuffToSubtract.Builder().path(toSubtractFile).docIDs(filesToSubtract.keySet());
      if (requirePathMatch) {
        ret.docIDsToFileMap(filesToSubtract).build();
      }
      return ret.build();
    } else {
      return new StuffToSubtract.Builder()
          .path(toSubtractFile)
          .docIDs(FileUtils.loadSymbolSet(Files.asCharSource(toSubtractFile, Charsets.UTF_8)))
          .build();
    }
  }
}

@IsiNlpImmutable
@Value.Immutable
abstract class StuffToSubtract {

  public abstract File path();

  public abstract ImmutableSet docIDs();

  public abstract Optional> docIDsToFileMap();

  public static class Builder extends ImmutableStuffToSubtract.Builder {}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy