All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.patterns.surface.PatternToken Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.patterns.surface;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

import edu.stanford.nlp.patterns.ConstantsAndVariables;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;

/**
 * Class to represent a target phrase. Note that you can give additional negative constraints 
 * in getTokenStr(List) but those are not used by toString, hashCode and equals functions
 * 
 * Author: Sonal Gupta ([email protected])
 */

public class PatternToken implements Serializable {

  private static final long serialVersionUID = 1L;
  String tag;
  boolean useTag;
  int numWordsCompound;
  boolean useNER = false;
  String nerTag = null;
  boolean useTargetParserParentRestriction = false;
  String grandparentParseTag;

  public PatternToken(String tag, boolean useTag, boolean getCompoundPhrases,
      int numWordsCompound, String nerTag, boolean useNER,
      boolean useTargetParserParentRestriction, String grandparentParseTag) {
    if(useNER && nerTag == null){
      throw new RuntimeException("NER tag is null and using NER restriction is true. Check your data.");
    }
    this.tag = tag;
    this.useTag = useTag;
    this.numWordsCompound = numWordsCompound;
    if (!getCompoundPhrases)
      this.numWordsCompound = 1;
    this.nerTag = nerTag;
    this.useNER = useNER;
    this.useTargetParserParentRestriction = useTargetParserParentRestriction;
    if(useTargetParserParentRestriction){
      if(grandparentParseTag == null){
        Redwood.log(ConstantsAndVariables.extremedebug,"Grand parent parse tag null ");
        this.grandparentParseTag = "null";
      }
      else
        this.grandparentParseTag = grandparentParseTag;
    }
  }

  // static public PatternToken parse(String str) {
  // String[] t = str.split("#");
  // String tag = t[0];
  // boolean usetag = Boolean.parseBoolean(t[1]);
  // int num = Integer.parseInt(t[2]);
  // boolean useNER = false;
  // String ner = "";
  // if(t.length > 3){
  // useNER = true;
  // ner = t[4];
  // }
  //
  // return new PatternToken(tag, usetag, true, num, ner, useNER);
  // }

  public String toStringToWrite() {
    String s = "X";
    if (useTag)
      s += ":" + tag;
    if (useNER)
      s += ":" + nerTag;
    if (useTargetParserParentRestriction)
      s += ":" + grandparentParseTag;
    // if(notAllowedClasses !=null && notAllowedClasses.size() > 0){
    // s+= ":!(";
    // s+= StringUtils.join(notAllowedClasses,"|")+")";
    // }
    if (numWordsCompound > 1)
      s += "{" + numWordsCompound + "}";
    return s;
  }

  String getTokenStr(List notAllowedClasses) {
    String str = " (?$term ";
    List restrictions = new ArrayList<>();
    if (useTag) {
      restrictions.add("{tag:/" + tag + ".*/}");
    }

    if (useNER) {
      restrictions.add("{ner:" + nerTag + "}");
    }

    if (useTargetParserParentRestriction) {
      restrictions.add("{grandparentparsetag:\"" + grandparentParseTag + "\"}");
    }

    if (notAllowedClasses != null && notAllowedClasses.size() > 0) {
      for (String na : notAllowedClasses)
        restrictions.add("!{" + na + ":" + na +"}");
    }
    str += "[" + StringUtils.join(restrictions, " & ") + "]{1,"
        + numWordsCompound + "}";

    str += ")";

    str = StringUtils.toAscii(str);
    return str;
  }


  @Override
  public boolean equals(Object b) {
    if (!(b instanceof PatternToken))
      return false;
    PatternToken t = (PatternToken) b;
    if(this.useNER != t.useNER || this.useTag != t.useTag || this.useTargetParserParentRestriction != t.useTargetParserParentRestriction || this.numWordsCompound != t.numWordsCompound)
      return false;
      
    if (useTag && ! this.tag.equals(t.tag)) {
      return false;
    }

    if (useNER && ! this.nerTag.equals(t.nerTag)){
      return false;
    }

    if (useTargetParserParentRestriction && ! this.grandparentParseTag.equals(t.grandparentParseTag))
      return false;
    
    return true;
  }

  @Override
  public int hashCode() {
    return getTokenStr(null).hashCode();
  }

  public PatternToken copy() {
    PatternToken t = new PatternToken(tag, useTag, numWordsCompound > 1, numWordsCompound, nerTag, useNER, useTargetParserParentRestriction, grandparentParseTag);
    return t;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy