All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.coref.hybrid.sieve.Sieve Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.coref.hybrid.sieve;
import edu.stanford.nlp.util.logging.Redwood;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Properties;
import java.util.Set;

import edu.stanford.nlp.coref.data.CorefCluster;
import edu.stanford.nlp.coref.data.Dictionaries;
import edu.stanford.nlp.coref.data.Document;
import edu.stanford.nlp.coref.data.Mention;
import edu.stanford.nlp.coref.data.Dictionaries.MentionType;
import edu.stanford.nlp.coref.data.Dictionaries.Person;
import edu.stanford.nlp.coref.hybrid.HybridCorefPrinter;
import edu.stanford.nlp.coref.hybrid.HybridCorefProperties;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Generics;

public abstract class Sieve implements Serializable  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(Sieve.class);

  private static final long serialVersionUID = 3986463332365306868L;

  public enum ClassifierType {RULE, RF, ORACLE}

  public ClassifierType classifierType = null;

  protected Locale lang;

  public final String sievename;

  /** the maximum sentence distance for linking two mentions */
  public int maxSentDist = -1;

  /** type of mention we want to resolve. e.g., if mType is PRONOMINAL, we only resolve pronoun mentions */
  public final Set mType;

  /** type of mention we want to compare to. e.g., if aType is PROPER, the resolution can be done only with PROPER antecedent  */
  public final Set aType;

  public final Set mTypeStr;
  public final Set aTypeStr;

  public Properties props = null;

  public Sieve() {
    this.lang = Locale.ENGLISH;
    this.sievename = this.getClass().getSimpleName();
    this.aType = new HashSet<>(Arrays.asList(MentionType.values()));
    this.mType = new HashSet<>(Arrays.asList(MentionType.values()));
    this.maxSentDist = 1000;
    this.mTypeStr = Generics.newHashSet();
    this.aTypeStr = Generics.newHashSet();
  }

  public Sieve(Properties props){
    this.lang = HybridCorefProperties.getLanguage(props);
    this.sievename = this.getClass().getSimpleName();
    this.aType = HybridCorefProperties.getAntecedentType(props, sievename);
    this.mType = HybridCorefProperties.getMentionType(props, sievename);
    this.maxSentDist = HybridCorefProperties.getMaxSentDistForSieve(props, sievename);
    this.mTypeStr = HybridCorefProperties.getMentionTypeStr(props, sievename);
    this.aTypeStr = HybridCorefProperties.getAntecedentTypeStr(props, sievename);
  }

  public Sieve(Properties props, String sievename) {
    this.lang = HybridCorefProperties.getLanguage(props);
    this.sievename = sievename;
    this.aType = HybridCorefProperties.getAntecedentType(props, sievename);
    this.mType = HybridCorefProperties.getMentionType(props, sievename);
    this.maxSentDist = HybridCorefProperties.getMaxSentDistForSieve(props, sievename);
    this.mTypeStr = HybridCorefProperties.getMentionTypeStr(props, sievename);
    this.aTypeStr = HybridCorefProperties.getAntecedentTypeStr(props, sievename);
  }

  public String resolveMention(Document document, Dictionaries dict, Properties props) throws Exception {
    StringBuilder sbLog = new StringBuilder();

    if(HybridCorefProperties.debug(props)) {
      sbLog.append("=======================================================");
      sbLog.append(HybridCorefPrinter.printRawDoc(document, true, true));
    }

    for(List mentionsInSent : document.predictedMentions) {
      for(int mIdx = 0 ; mIdx < mentionsInSent.size() ; mIdx++) {
        Mention m = mentionsInSent.get(mIdx);
        if(skipMentionType(m, props)) continue;
        findCoreferentAntecedent(m, mIdx, document, dict, props, sbLog);
      }
    }
    return sbLog.toString();
  }

  public abstract void findCoreferentAntecedent(Mention m, int mIdx, Document document, Dictionaries dict, Properties props, StringBuilder sbLog) throws Exception;


  // load sieve (from file or make a deterministic sieve)
  public static Sieve loadSieve(Properties props, String sievename) throws Exception {
    // log.info("Loading sieve: "+sievename+" ...");
    switch(HybridCorefProperties.getClassifierType(props, sievename)) {
      case RULE:
        DeterministicCorefSieve sieve = (DeterministicCorefSieve) Class.forName("edu.stanford.nlp.coref.hybrid.sieve."+sievename).getConstructor().newInstance();
        sieve.props = props;
        sieve.lang = HybridCorefProperties.getLanguage(props);
        return sieve;

      case RF:
        log.info("Loading sieve: " + sievename + " from " + HybridCorefProperties.getPathModel(props, sievename) + " ... ");
        RFSieve rfsieve = IOUtils.readObjectFromURLOrClasspathOrFileSystem(HybridCorefProperties.getPathModel(props, sievename));
        rfsieve.thresMerge = HybridCorefProperties.getMergeThreshold(props, sievename);
        log.info("done. Merging threshold: " + rfsieve.thresMerge);
        return rfsieve;

      case ORACLE:
        OracleSieve oracleSieve = new OracleSieve(props, sievename);
        oracleSieve.props = props;
        return oracleSieve;

      default:
        throw new RuntimeException("no sieve type specified");
    }
  }


  public static List loadSieves(Properties props) throws Exception {
    List sieves = new ArrayList<>();
    String sieveProp = HybridCorefProperties.getSieves(props);
    String currentSieveForTrain = HybridCorefProperties.getCurrentSieveForTrain(props);
    String[] sievenames = (currentSieveForTrain==null)?
        sieveProp.trim().split(",\\s*") : sieveProp.split(currentSieveForTrain)[0].trim().split(",\\s*");
    for(String sievename : sievenames) {
      Sieve sieve = loadSieve(props, sievename);
      sieves.add(sieve);
    }
    return sieves;
  }

  public static boolean hasThat(List words) {
    for(CoreLabel cl : words) {
      if(cl.word().equalsIgnoreCase("that") && cl.tag().equalsIgnoreCase("IN")) {
        return true;
      }
    }
    return false;
  }

  public static boolean hasToVerb(List words) {
    for(int i=0 ; i types) {
    if(types.isEmpty()) return true;
    for(String type : types) {
      if(matchedMentionType(m, type)) return true;
    }
    return false;
  }
  protected static boolean matchedMentionType(Mention m, String type) {
    if(type==null) return false;
    if(type.equalsIgnoreCase("all") || type.equalsIgnoreCase(m.mentionType.toString())) return true;

    // check pronoun specific type
    if(type.equalsIgnoreCase("he") && m.isPronominal() && m.person == Person.HE) return true;
    if(type.equalsIgnoreCase("she") && m.isPronominal() && m.person == Person.SHE) return true;
    if(type.equalsIgnoreCase("you") && m.isPronominal() && m.person == Person.YOU) return true;
    if(type.equalsIgnoreCase("I") && m.isPronominal() && m.person == Person.I) return true;
    if(type.equalsIgnoreCase("it") && m.isPronominal() && m.person == Person.IT) return true;
    if(type.equalsIgnoreCase("they") && m.isPronominal() && m.person == Person.THEY) return true;
    if(type.equalsIgnoreCase("we") && m.isPronominal() && m.person == Person.WE) return true;

    // check named entity type
    if(type.toLowerCase().startsWith("ne:")) {
      if(type.toLowerCase().substring(3).startsWith(m.nerString.toLowerCase().substring(0, Math.min(3, m.nerString.length())))) return true;
    }

    return false;
  }

  public static List getOrderedAntecedents(
      Mention m,
      int antecedentSentence,
      int mPosition,
      List> orderedMentionsBySentence,
      Dictionaries dict) {
    List orderedAntecedents = new ArrayList<>();
    // ordering antecedents
    if (antecedentSentence == m.sentNum) {   // same sentence
      orderedAntecedents.addAll(orderedMentionsBySentence.get(m.sentNum).subList(0, mPosition));

      if(dict.relativePronouns.contains(m.spanToString())) Collections.reverse(orderedAntecedents);
      else {
        orderedAntecedents = sortMentionsByClause(orderedAntecedents, m);
      }

    } else {    // previous sentence
      orderedAntecedents.addAll(orderedMentionsBySentence.get(antecedentSentence));
    }
    return orderedAntecedents;
  }

  /** Divides a sentence into clauses and sort the antecedents for pronoun matching  */
  private static List sortMentionsByClause(List l, Mention m1) {
    List sorted = new ArrayList<>();
    Tree tree = m1.contextParseTree;
    Tree current = m1.mentionSubTree;
    if(tree==null || current==null) return l;
    while(true){
      current = current.ancestor(1, tree);
      String curLabel = current.label().value();
      if("TOP".equals(curLabel) || curLabel.startsWith("S") || curLabel.equals("NP")){
//      if(current.label().value().startsWith("S")){
        for(Mention m : l){
          if(!sorted.contains(m) && current.dominates(m.mentionSubTree)) sorted.add(m);
        }
      }
      if(current.ancestor(1, tree)==null) break;
    }
    return sorted;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy