edu.stanford.nlp.ie.EmpiricalNERPriorBIO Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.ie;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import java.util.List;
/**
* @author Mengqiu Wang
*/
public class EmpiricalNERPriorBIO extends EntityCachingAbstractSequencePriorBIO {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(EmpiricalNERPriorBIO.class);
private double[][] entityMatrix, subEntityMatrix;
private SeqClassifierFlags flags;
protected double p1 = Math.log(0.01);
protected double p2 = Math.log(2.0);
protected int ORGIndex, LOCIndex;
public static boolean DEBUG = false;
public EmpiricalNERPriorBIO(String backgroundSymbol, Index classIndex, Index tagIndex, List doc, Pair matrices, SeqClassifierFlags flags) {
super(backgroundSymbol, classIndex, tagIndex, doc);
entityMatrix = matrices.first();
subEntityMatrix = matrices.second();
this.flags = flags;
ORGIndex = tagIndex.indexOf("ORG");
LOCIndex = tagIndex.indexOf("LOC");
}
@Override
public double scoreOf(int[] sequence) {
double p = 0.0;
for (int i = 0; i < entities.length; i++) {
EntityBIO entity = entities[i];
if ((i == 0 || entities[i-1] != entity) && entity != null) {
int length = entity.words.size();
int tag1 = entity.type;
// String tag1 = classIndex.get(entity.type);
int[] other = entities[i].otherOccurrences;
for (int otherOccurrence : other) {
EntityBIO otherEntity = null;
for (int k = otherOccurrence; k < otherOccurrence + length && k < entities.length; k++) {
otherEntity = entities[k];
if (otherEntity != null) {
break;
}
}
// singleton + other instance null?
if (otherEntity == null) {
continue;
}
int oLength = otherEntity.words.size();
// String tag2 = classIndex.get(otherEntity.type);
int tag2 = otherEntity.type;
// exact match??
boolean exact = false;
int[] oOther = otherEntity.otherOccurrences;
for (int index : oOther) {
if (index >= i && index <= i + length - 1) {
exact = true;
break;
}
}
double factor; // initialized in 2 cases below
if (exact) {
if (DEBUG) {
log.info("Exact match of tag1=" + tagIndex.get(tag1) + ", tag2=" + tagIndex.get(tag2));
}
// entity not complete
if (length != oLength) {
// if (DEBUG)
// log.info("Entity Not Complete");
if (tag1 == tag2) {
p += Math.abs(oLength - length) * p1;
} else if (!(tag1 == ORGIndex && tag2 == LOCIndex) &&
!(tag1 == LOCIndex && tag2 == ORGIndex)) {
// shorter
p += (oLength + length) * p1;
}
}
factor = entityMatrix[tag1][tag2];
} else {
if (DEBUG)
log.info("Sub match of tag1=" + tagIndex.get(tag1) + ", tag2=" + tagIndex.get(tag2));
factor = subEntityMatrix[tag1][tag2];
}
if (tag1 == tag2) {
if (flags.matchNERIncentive) {
factor = p2;
// factor *= -1;
} else
factor = 0;
}
if (DEBUG)
log.info(" of factor=" + factor + ", p += " + (length * factor));
p += length * factor;
}
}
}
return p;
}
}