All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.tagger.maxent.ExtractorVerbalVBNZero Maven / Gradle / Ivy

package edu.stanford.nlp.tagger.maxent;

import java.util.regex.Pattern;


/**
 * Look for verbs selecting a VBN verb.
 * This is now a zeroeth order observed data only feature.
 * But reminiscent of what was done in Toutanova and Manning 2000.
 * It doesn't seem to help tagging performance any more.
 *
 * @author Christopher Manning
 */
public class ExtractorVerbalVBNZero extends DictionaryExtractor {

  private static final String vbnTag = "VBN";
  private static final String vbdTag = "VBD";
  private static final String jjTag = "JJ";
  private static final String edSuff = "ed";
  private static final String enSuff = "en";
  private static final String oneSt = "1";
  private static final String naWord = "NA";

  private final int bound;
  private static final Pattern stopper = Pattern.compile("(?i:and|or|but|,|;|-|--)");
  private static final Pattern vbnWord = Pattern.compile("(?i:have|has|having|had|is|am|are|was|were|be|being|been|'ve|'s|s|'d|'re|'m|gotten|got|gets|get|getting)"); // cf. list in EnglishPTBTreebankCorrector


  public ExtractorVerbalVBNZero(int bound) {
    this.bound = bound;
  }


  @Override
  public boolean precondition(String tag) {
    System.err.println("VBN: Testing precondition on " + tag + ": " + (tag.equals(vbnTag) || tag.equals(vbdTag) || tag.equals(jjTag)));
    return tag.equals(vbnTag) || tag.equals(vbdTag) || tag.equals(jjTag);
  }


  @Override
  String extract(History h, PairsHolder pH) {
    String cword = pH.getWord(h, 0);
    int allCount = dict.sum(cword);
    int vBNCount = dict.getCount(cword, vbnTag);
    int vBDCount = dict.getCount(cword, vbdTag);

    // Conditions for deciding inapplicable
    if ((allCount == 0) && (!(cword.endsWith(edSuff) || cword.endsWith(enSuff)))) {
      return zeroSt;
    }
    if ((allCount > 0) && (vBNCount + vBDCount <= allCount / 100)) {
      return zeroSt;
    }

    String lastverb = naWord;
    //String lastvtag = zeroSt; // mg: written but never read

    for (int index = -1; index >= -bound; index--) {
      String word2 = pH.getWord(h, index);
      if ("NA".equals(word2)) {
        break;
      }
      if (stopper.matcher(word2).matches()) {
        break;
      }
      if (vbnWord.matcher(word2).matches()) {
        lastverb = word2;
        break;
      }
      index--;
    }

    if ( ! lastverb.equals(naWord)) {
      System.err.println("VBN: For " + cword + ", found preceding VBN cue " + lastverb);
      return oneSt;
    }

    return zeroSt;
  }

  @Override
  public String toString() {
    return "ExtractorVerbalVBNZero(bound=" + bound + ')';
  }



  private static final long serialVersionUID = -5881204185400060636L;

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy