All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.util.ArrayStringFilter Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.util;

import java.io.Serializable;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.function.Predicate;

/**
 * Filters Strings based on whether they exactly match any string in
 * the array it is initially constructed with.  Saves some time over
 * using regexes if the array of strings is small enough.  No specific
 * experiments exist for how long the array can be before performance
 * is worse than a regex, but the English dependencies code was helped
 * by replacing disjunction regexes of 6 words or fewer with this.
 *
 * @author John Bauer
 */
public class ArrayStringFilter implements Predicate, Serializable {
  private final String[] words;
  private final int length;
  private final Mode mode;

  public enum Mode {
    EXACT, PREFIX, CASE_INSENSITIVE
  }

  public ArrayStringFilter(Mode mode, String ... words) {
    if (mode == null) {
      throw new NullPointerException("Cannot handle null mode");
    }
    this.mode = mode;
    this.words = new String[words.length];
    System.arraycopy(words, 0, this.words, 0, words.length);
    this.length = words.length;
  }

  @Override
  public boolean test(String input) {
    switch (mode) {
    case EXACT:
      for (int i = 0; i < length; ++i) {
        if (words[i].equals(input)) {
          return true;
        }
      }
      return false;
    case PREFIX:
      if (input == null) {
        return false;
      }
      for (int i = 0; i < length; ++i) {
        if (input.startsWith(words[i])) {
          return true;
        }
      }
      return false;
    case CASE_INSENSITIVE:
      for (int i = 0; i < length; ++i) {
        if (words[i].equalsIgnoreCase(input)) {
          return true;
        }
      }
      return false;
    default:
      throw new IllegalArgumentException("Unknown mode " + mode);
    }
  }

  @Override
  public String toString() {
    return mode.toString() + ':' + StringUtils.join(words, ",");
  }

  @Override
  public int hashCode() {
    int result = 1;
    for (String word : words) {
      result += word.hashCode();
    }
    return result;
  }

  @Override
  public boolean equals(Object other) {
    if (other == this) {
      return true;
    }
    if (!(other instanceof ArrayStringFilter)) {
      return false;
    }
    ArrayStringFilter filter = (ArrayStringFilter) other;
    if (filter.mode != this.mode || filter.length != this.length) {
      return false;
    }
    Set myWords = new HashSet<>(Arrays.asList(this.words));
    Set otherWords = new HashSet<>(Arrays.asList(filter.words));
    return myWords.equals(otherWords);
  }

  private static final long serialVersionUID = 1;

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy