Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package edu.stanford.nlp.ling.tokensregex;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Pattern;
/**
* Table used to lookup multi-word phrases.
* This class provides functions for looking up all instances of known phrases in a document in an efficient manner.
*
* Phrases can be added to the phrase table using
*
*
readPhrases
*
readPhrasesWithTagScores
*
addPhrase
*
*
* You can lookup phrases in the table using
*
*
get
*
lookup
*
*
* You can find phrases occurring in a piece of text using
*
*
findAllMatches
*
findNonOverlappingPhrases
*
* @author Angel Chang
*/
public class PhraseTable implements Serializable
{
private static final String PHRASE_END = "";
private static final long serialVersionUID = 1L;
Map rootTree;
public boolean normalize = true;
public boolean caseInsensitive = false;
public boolean ignorePunctuation = false;
public boolean ignorePunctuationTokens = true;
public Annotator tokenizer; // tokenizing annotator
int nPhrases = 0;
int nStrings = 0;
transient CacheMap normalizedCache = new CacheMap<>(5000);
public PhraseTable() {}
public PhraseTable(int initSize) { rootTree = new HashMap<>(initSize); }
public PhraseTable(boolean normalize, boolean caseInsensitive, boolean ignorePunctuation) {
this.normalize = normalize;
this.caseInsensitive = caseInsensitive;
this.ignorePunctuation = ignorePunctuation;
}
public boolean isEmpty() {
return (nPhrases == 0);
}
public boolean containsKey(Object key) {
return get(key) != null;
}
public Phrase get(Object key) {
if (key instanceof String) {
return lookup((String) key);
} else if (key instanceof WordList) {
return lookup((WordList) key);
} else {
return null;
}
}
/**
* Clears this table
*/
public void clear()
{
rootTree = null;
nPhrases = 0;
nStrings = 0;
}
public void setNormalizationCacheSize(int cacheSize)
{
CacheMap newNormalizedCache = new CacheMap<>(cacheSize);
newNormalizedCache.putAll(normalizedCache);
normalizedCache = newNormalizedCache;
}
/**
* Input functions to read in phrases to the table
*/
private static final Pattern tabPattern = Pattern.compile("\t");
/**
* Read in phrases from a file (assumed to be tab delimited)
* @param filename - Name of file
* @param checkTag - Indicates if there is a tag column (assumed to be 2nd column)
* If false, treats entire line as the phrase
* @throws IOException
*/
public void readPhrases(String filename, boolean checkTag) throws IOException
{
readPhrases(filename, checkTag, tabPattern);
}
/**
* Read in phrases from a file. Column delimiters are matched using regex
* @param filename - Name of file
* @param checkTag - Indicates if there is a tag column (assumed to be 2nd column)
* If false, treats entire line as the phrase
* @param delimiterRegex - Regex for identifying column delimiter
* @throws IOException
*/
public void readPhrases(String filename, boolean checkTag, String delimiterRegex) throws IOException
{
readPhrases(filename, checkTag, Pattern.compile(delimiterRegex));
}
public void readPhrases(String filename, boolean checkTag, Pattern delimiterPattern) throws IOException
{
Timing timer = new Timing();
timer.doing("Reading phrases: " + filename);
BufferedReader br = IOUtils.getBufferedFileReader(filename);
String line;
while ((line = br.readLine()) != null) {
if (checkTag) {
String[] columns = delimiterPattern.split(line, 2);
if (columns.length == 1) {
addPhrase(columns[0]);
} else {
addPhrase(columns[0], columns[1]);
}
} else {
addPhrase(line);
}
}
br.close();
timer.done();
}
/**
* Read in phrases where there is each pattern has a score of being associated with a certain tag.
* The file format is assumed to be
* phrase\ttag1 count\ttag2 count...
* where the phrases and tags are delimited by tabs, and each tag and count is delimited by whitespaces
* @param filename
* @throws IOException
*/
public void readPhrasesWithTagScores(String filename) throws IOException
{
readPhrasesWithTagScores(filename, tabPattern, whitespacePattern);
}
public void readPhrasesWithTagScores(String filename, String fieldDelimiterRegex,
String countDelimiterRegex) throws IOException
{
readPhrasesWithTagScores(filename, Pattern.compile(fieldDelimiterRegex), Pattern.compile(countDelimiterRegex));
}
public void readPhrasesWithTagScores(String filename, Pattern fieldDelimiterPattern, Pattern countDelimiterPattern) throws IOException
{
Timing timer = new Timing();
timer.doing("Reading phrases: " + filename);
BufferedReader br = IOUtils.getBufferedFileReader(filename);
String line;
int lineno = 0;
while ((line = br.readLine()) != null) {
String[] columns = fieldDelimiterPattern.split(line);
String phrase = columns[0];
// Pick map factory to use depending on number of tags we have
MapFactory mapFactory = (columns.length < 20)?
MapFactory.arrayMapFactory(): MapFactory.linkedHashMapFactory();
Counter counts = new ClassicCounter<>(mapFactory);
for (int i = 1; i < columns.length; i++) {
String[] tagCount = countDelimiterPattern.split(columns[i], 2);
if (tagCount.length == 2) {
try {
counts.setCount(tagCount[0], Double.parseDouble(tagCount[1]));
} catch (NumberFormatException ex) {
throw new RuntimeException("Error processing field " + i + ": '" + columns[i] +
"' from (" + filename + ":" + lineno + "): " + line, ex);
}
} else {
throw new RuntimeException("Error processing field " + i + ": '" + columns[i] +
"' from + (" + filename + ":" + lineno + "): " + line);
}
}
addPhrase(phrase, null, counts);
lineno++;
}
br.close();
timer.done();
}
public void readPhrases(String filename, int phraseColIndex, int tagColIndex) throws IOException
{
if (phraseColIndex < 0) {
throw new IllegalArgumentException("Invalid phraseColIndex " + phraseColIndex);
}
Timing timer = new Timing();
timer.doing("Reading phrases: " + filename);
BufferedReader br = IOUtils.getBufferedFileReader(filename);
String line;
while ((line = br.readLine()) != null) {
String[] columns = tabPattern.split(line);
String phrase = columns[phraseColIndex];
String tag = (tagColIndex >= 0)? columns[tagColIndex]: null;
addPhrase(phrase, tag);
}
br.close();
timer.done();
}
public static Phrase getLongestPhrase(List phrases)
{
Phrase longest = null;
for (Phrase phrase:phrases) {
if (longest == null || phrase.isLonger(longest)) {
longest = phrase;
}
}
return longest;
}
public String[] splitText(String phraseText)
{
String[] words;
if (tokenizer != null) {
Annotation annotation = new Annotation(phraseText);
tokenizer.annotate(annotation);
List tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
words = new String[tokens.size()];
for (int i = 0; i < tokens.size(); i++) {
words[i] = tokens.get(i).word();
}
} else {
phraseText = possPattern.matcher(phraseText).replaceAll(" 's$1");
words = delimPattern.split(phraseText);
}
return words;
}
public WordList toWordList(String phraseText)
{
String[] words = splitText(phraseText);
return new StringList(words);
}
public WordList toNormalizedWordList(String phraseText)
{
String[] words = splitText(phraseText);
List list = new ArrayList<>(words.length);
for (String word:words) {
word = getNormalizedForm(word);
if (word.length() > 0) {
list.add(word);
}
}
return new StringList(list);
}
public void addPhrases(Collection phraseTexts)
{
for (String phraseText:phraseTexts) {
addPhrase(phraseText, null);
}
}
public void addPhrases(Map taggedPhraseTexts)
{
for (String phraseText:taggedPhraseTexts.keySet()) {
addPhrase(phraseText, taggedPhraseTexts.get(phraseText));
}
}
public boolean addPhrase(String phraseText)
{
return addPhrase(phraseText, null);
}
public boolean addPhrase(String phraseText, String tag)
{
return addPhrase(phraseText, tag, null);
}
public boolean addPhrase(String phraseText, String tag, Object phraseData)
{
WordList wordList = toNormalizedWordList(phraseText);
return addPhrase(phraseText, tag, wordList, phraseData);
}
public boolean addPhrase(List tokens)
{
return addPhrase(tokens, null);
}
public boolean addPhrase(List tokens, String tag)
{
return addPhrase(tokens, tag, null);
}
public boolean addPhrase(List tokens, String tag, Object phraseData)
{
WordList wordList = new StringList(tokens);
return addPhrase(StringUtils.join(tokens, " "), tag, wordList, phraseData);
}
private int MAX_LIST_SIZE = 20;
private synchronized boolean addPhrase(String phraseText, String tag, WordList wordList, Object phraseData)
{
if (rootTree == null) {
rootTree = new HashMap<>();
}
return addPhrase(rootTree, phraseText, tag, wordList, phraseData, 0);
}
private synchronized void addPhrase(Map tree, Phrase phrase, int wordIndex)
{
String word = (phrase.wordList.size() <= wordIndex)? PHRASE_END:phrase.wordList.getWord(wordIndex);
Object node = tree.get(word);
if (node == null) {
tree.put(word, phrase);
} else if (node instanceof Phrase) {
// create list with this phrase and other and put it here
List list = new ArrayList(2);
list.add(phrase);
list.add(node);
tree.put(word, list);
} else if (node instanceof Map) {
addPhrase((Map) node, phrase, wordIndex+1);
} else if (node instanceof List) {
((List) node).add(phrase);
} else {
throw new RuntimeException("Unexpected class " + node.getClass() + " while adding word "
+ wordIndex + "(" + word + ") in phrase " + phrase.getText());
}
}
private synchronized boolean addPhrase(Map tree,
String phraseText, String tag, WordList wordList, Object phraseData, int wordIndex)
{
// Find place to insert this item
boolean phraseAdded = false; // True if this phrase was successfully added to the phrase table
boolean newPhraseAdded = false; // True if the phrase was a new phrase
boolean oldPhraseNewFormAdded = false; // True if the phrase already exists, and this was new form added to old phrase
for (int i = wordIndex; i < wordList.size(); i++) {
String word = Interner.globalIntern(wordList.getWord(i));
Object node = tree.get(word);
if (node == null) {
// insert here
Phrase phrase = new Phrase(wordList, phraseText, tag, phraseData);
tree.put(word, phrase);
phraseAdded = true;
newPhraseAdded = true;
} else if (node instanceof Phrase) {
// check rest of the phrase matches
Phrase oldphrase = (Phrase) node;
int matchedTokenEnd = checkWordListMatch(
oldphrase, wordList, 0, wordList.size(), i+1, true);
if (matchedTokenEnd >= 0) {
oldPhraseNewFormAdded = oldphrase.addForm(phraseText);
} else {
// create list with this phrase and other and put it here
Phrase newphrase = new Phrase(wordList, phraseText, tag, phraseData);
List list = new ArrayList(2);
list.add(oldphrase);
list.add(newphrase);
tree.put(word, list);
newPhraseAdded = true;
}
phraseAdded = true;
} else if (node instanceof Map) {
tree = (Map) node;
} else if (node instanceof List) {
// Search through list for matches to word (at this point, the table is small, so no Map)
List lookupList = (List) node;
int nMaps = 0;
for (Object obj:lookupList) {
if (obj instanceof Phrase) {
// check rest of the phrase matches
Phrase oldphrase = (Phrase) obj;
int matchedTokenEnd = checkWordListMatch(
oldphrase, wordList, 0, wordList.size(), i, true);
if (matchedTokenEnd >= 0) {
oldPhraseNewFormAdded = oldphrase.addForm(phraseText);
phraseAdded = true;
break;
}
} else if (obj instanceof Map) {
if (nMaps == 1) {
throw new RuntimeException("More than one map in list while adding word "
+ i + "(" + word + ") in phrase " + phraseText);
}
tree = (Map) obj;
nMaps++;
} else {
throw new RuntimeException("Unexpected class in list " + obj.getClass() + " while adding word "
+ i + "(" + word + ") in phrase " + phraseText);
}
}
if (!phraseAdded && nMaps == 0) {
// add to list
Phrase newphrase = new Phrase(wordList, phraseText, tag, phraseData);
lookupList.add(newphrase);
newPhraseAdded = true;
phraseAdded = true;
if (lookupList.size() > MAX_LIST_SIZE) {
// convert lookupList (should consist only of phrases) to map
Map newMap = new HashMap(lookupList.size());
for (Object obj:lookupList) {
if (obj instanceof Phrase) {
Phrase oldphrase = (Phrase) obj;
addPhrase(newMap, oldphrase, i+1);
} else {
throw new RuntimeException("Unexpected class in list " + obj.getClass() + " while converting list to map");
}
}
tree.put(word,newMap);
}
}
} else {
throw new RuntimeException("Unexpected class in list " + node.getClass() + " while adding word "
+ i + "(" + word + ") in phrase " + phraseText);
}
if (phraseAdded) {
break;
}
}
if (!phraseAdded) {
if (wordList.size() == 0) {
System.err.println("WARNING: " + phraseText + " not added");
} else {
Phrase oldphrase = (Phrase) tree.get(PHRASE_END);
if (oldphrase != null) {
int matchedTokenEnd = checkWordListMatch(
oldphrase, wordList, 0, wordList.size(), wordList.size(), true);
if (matchedTokenEnd >= 0) {
oldPhraseNewFormAdded = oldphrase.addForm(phraseText);
} else {
// create list with this phrase and other and put it here
Phrase newphrase = new Phrase(wordList, phraseText, tag, phraseData);
List list = new ArrayList(2);
list.add(oldphrase);
list.add(newphrase);
tree.put(PHRASE_END, list);
newPhraseAdded = true;
}
} else {
Phrase newphrase = new Phrase(wordList, phraseText, tag, phraseData);
tree.put(PHRASE_END, newphrase);
newPhraseAdded = true;
}
}
}
if (newPhraseAdded) {
nPhrases++;
nStrings++;
} else {
nStrings++;
}
return (newPhraseAdded || oldPhraseNewFormAdded);
}
public String getNormalizedForm(String word)
{
String normalized = normalizedCache.get(word);
if (normalized == null) {
normalized = createNormalizedForm(word);
synchronized (this) {
normalizedCache.put(word, normalized);
}
}
return normalized;
}
private static final Pattern punctWhitespacePattern = Pattern.compile("\\s*(\\p{Punct})\\s*");
private static final Pattern whitespacePattern = Pattern.compile("\\s+");
private static final Pattern delimPattern = Pattern.compile("[\\s_-]+");
private static final Pattern possPattern = Pattern.compile("'s(\\s+|$)");
private String createNormalizedForm(String word)
{
if (normalize) {
word = StringUtils.normalize(word);
}
if (caseInsensitive) {
word = word.toLowerCase();
}
if (ignorePunctuation) {
word = punctWhitespacePattern.matcher(word).replaceAll("");
} else if (ignorePunctuationTokens) {
if (punctWhitespacePattern.matcher(word).matches()) {
word = "";
}
}
word = whitespacePattern.matcher(word).replaceAll("");
return word;
}
public Phrase lookup(String phrase)
{
return lookup(toWordList(phrase));
}
public Phrase lookupNormalized(String phrase)
{
return lookup(toNormalizedWordList(phrase));
}
public Phrase lookup(WordList wordList)
{
if (wordList == null || rootTree == null) return null;
Map tree = rootTree;
for (int i = 0; i < wordList.size(); i++) {
String word = wordList.getWord(i);
Object node = tree.get(word);
if (node == null) {
return null;
} else if (node instanceof Phrase) {
Phrase phrase = (Phrase) node;
int matchedTokenEnd = checkWordListMatch(
phrase, wordList, 0, wordList.size(), i, true);
if (matchedTokenEnd >= 0) {
return phrase;
}
} else if (node instanceof Map) {
tree = (Map) node;
} else if (node instanceof List) {
// Search through list for matches to word (at this point, the table is small, so no Map)
List lookupList = (List) node;
int nMaps = 0;
for (Object obj:lookupList) {
if (obj instanceof Phrase) {
// check rest of the phrase matches
Phrase phrase = (Phrase) obj;
int matchedTokenEnd = checkWordListMatch(
phrase, wordList, 0, wordList.size(), i, true);
if (matchedTokenEnd >= 0) {
return phrase;
}
} else if (obj instanceof Map) {
if (nMaps == 1) {
throw new RuntimeException("More than one map in list while looking up word "
+ i + "(" + word + ") in phrase " + wordList.toString());
}
tree = (Map) obj;
nMaps++;
} else {
throw new RuntimeException("Unexpected class in list " + obj.getClass() + " while looking up word "
+ i + "(" + word + ") in phrase " + wordList.toString());
}
}
if (nMaps == 0) {
return null;
}
} else {
throw new RuntimeException("Unexpected class in list " + node.getClass() + " while looking up word "
+ i + "(" + word + ") in phrase " + wordList.toString());
}
}
Phrase phrase = (Phrase) tree.get(PHRASE_END);
if (phrase != null) {
int matchedTokenEnd = checkWordListMatch(
phrase, wordList, 0, wordList.size(), wordList.size(), true);
return (matchedTokenEnd >= 0)? phrase:null;
} else {
return null;
}
}
/**
* Given a segment of text, returns list of spans (PhraseMatch) that corresponds
* to a phrase in the table
* @param text Input text to search over
* @return List of all matched spans
*/
public List findAllMatches(String text)
{
WordList tokens = toNormalizedWordList(text);
return findAllMatches(tokens, 0, tokens.size(), false);
}
/**
* Given a list of tokens, returns list of spans (PhraseMatch) that corresponds
* to a phrase in the table
* @param tokens List of tokens to search over
* @return List of all matched spans
*/
public List findAllMatches(WordList tokens)
{
return findAllMatches(tokens, 0, tokens.size(), true);
}
/**
* Given a segment of text, returns list of spans (PhraseMatch) that corresponds
* to a phrase in the table (filtered by the list of acceptable phrase)
* @param acceptablePhrases - What phrases to look for (need to be subset of phrases already in table)
* @param text Input text to search over
* @return List of all matched spans
*/
public List findAllMatches(List acceptablePhrases, String text)
{
WordList tokens = toNormalizedWordList(text);
return findAllMatches(acceptablePhrases, tokens, 0, tokens.size(), false);
}
/**
* Given a list of tokens, returns list of spans (PhraseMatch) that corresponds
* to a phrase in the table (filtered by the list of acceptable phrase)
* @param acceptablePhrases - What phrases to look for (need to be subset of phrases already in table)
* @param tokens List of tokens to search over
* @return List of all matched spans
*/
public List findAllMatches(List acceptablePhrases, WordList tokens)
{
return findAllMatches(acceptablePhrases, tokens, 0, tokens.size(), true);
}
public List findAllMatches(WordList tokens,
int tokenStart, int tokenEnd,
boolean needNormalization)
{
return findMatches(null, tokens, tokenStart, tokenEnd,
needNormalization,
true /* find all */,
false /* don't need to match end exactly */);
}
public List findAllMatches(List acceptablePhrases,
WordList tokens,
int tokenStart, int tokenEnd,
boolean needNormalization)
{
return findMatches(acceptablePhrases, tokens, tokenStart, tokenEnd,
needNormalization,
true /* find all */,
false /* don't need to match end exactly */);
}
public List findMatches(String text)
{
WordList tokens = toNormalizedWordList(text);
return findMatches(tokens, 0, tokens.size(), false);
}
public List findMatches(WordList tokens)
{
return findMatches(tokens, 0, tokens.size(), true);
}
public List findMatches(WordList tokens,
int tokenStart, int tokenEnd,
boolean needNormalization)
{
return findMatches(null, tokens, tokenStart, tokenEnd,
needNormalization,
false /* don't need to find all */,
false /* don't need to match end exactly */);
}
public List findMatches(String text,
int tokenStart, int tokenEnd,
boolean needNormalization)
{
WordList tokens = toNormalizedWordList(text);
return findMatches(tokens, tokenStart, tokenEnd, false);
}
protected int checkWordListMatch(Phrase phrase, WordList tokens,
int tokenStart, int tokenEnd,
int checkStart,
boolean matchEnd)
{
if (checkStart < tokenStart) return -1;
int i;
int phraseSize = phrase.wordList.size();
for (i = checkStart; i < tokenEnd && i - tokenStart < phraseSize; i++) {
String word = tokens.getWord(i);
String phraseWord = phrase.wordList.getWord(i - tokenStart);
if (!phraseWord.equals(word)) {
return -1;
}
}
if (i - tokenStart == phraseSize) {
// All tokens in phrase has been matched!
if (matchEnd) {
return (i == tokenEnd)? i:-1;
} else {
return i;
}
} else {
return -1;
}
}
public List findNonOverlappingPhrases(List phraseMatches)
{
if (phraseMatches.size() > 1) {
return IntervalTree.getNonOverlapping(phraseMatches, PHRASEMATCH_LENGTH_ENDPOINTS_COMPARATOR);
} else {
return phraseMatches;
}
}
protected List findMatches(Collection acceptablePhrases,
WordList tokens, int tokenStart, int tokenEnd,
boolean needNormalization, boolean findAll, boolean matchEnd)
{
if (needNormalization) {
assert(tokenStart >= 0);
assert(tokenEnd > tokenStart);
int n = tokenEnd - tokenStart;
List normalized = new ArrayList<>(n);
int[] tokenIndexMap = new int[n+1];
int j = 0, last = 0;
for (int i = tokenStart; i < tokenEnd; i++) {
String word = tokens.getWord(i);
word = getNormalizedForm(word);
if (word.length() != 0) {
normalized.add(word);
tokenIndexMap[j] = i;
last = i;
j++;
}
}
tokenIndexMap[j] = Math.min(last+1, tokenEnd);
List matched = findMatchesNormalized(acceptablePhrases, new StringList(normalized),
0, normalized.size(), findAll, matchEnd);
for (PhraseMatch pm:matched) {
assert(pm.tokenBegin >= 0);
assert(pm.tokenEnd >= pm.tokenBegin);
assert(pm.tokenEnd <= normalized.size());
if (pm.tokenEnd > 0 && pm.tokenEnd > pm.tokenBegin) {
pm.tokenEnd = tokenIndexMap[pm.tokenEnd-1]+1;
} else {
pm.tokenEnd = tokenIndexMap[pm.tokenEnd];
}
pm.tokenBegin = tokenIndexMap[pm.tokenBegin];
assert(pm.tokenBegin >= 0);
assert(pm.tokenEnd >= pm.tokenBegin);
}
return matched;
} else {
return findMatchesNormalized(acceptablePhrases, tokens, tokenStart, tokenEnd, findAll, matchEnd);
}
}
protected List findMatchesNormalized(Collection acceptablePhrases,
WordList tokens, int tokenStart, int tokenEnd,
boolean findAll, boolean matchEnd)
{
List matched = new ArrayList<>();
Stack todoStack = new Stack<>();
todoStack.push(new StackEntry(rootTree, tokenStart, tokenStart, tokenEnd, findAll? tokenStart+1:-1));
while (!todoStack.isEmpty()) {
StackEntry cur = todoStack.pop();
Map tree = cur.tree;
for (int i = cur.tokenNext; i <= cur.tokenEnd; i++) {
if (tree.containsKey(PHRASE_END)) {
Phrase phrase = (Phrase) tree.get(PHRASE_END);
if (acceptablePhrases == null || acceptablePhrases.contains(phrase)) {
int matchedTokenEnd = checkWordListMatch(
phrase, tokens, cur.tokenStart, cur.tokenEnd, i, matchEnd);
if (matchedTokenEnd >= 0) {
matched.add(new PhraseMatch(phrase, cur.tokenStart, matchedTokenEnd));
}
}
}
if (i == cur.tokenEnd) break;
String word = tokens.getWord(i);
Object node = tree.get(word);
if (node == null) {
break;
} else if (node instanceof Phrase) {
// check rest of the phrase matches
Phrase phrase = (Phrase) node;
if (acceptablePhrases == null || acceptablePhrases.contains(phrase)) {
int matchedTokenEnd = checkWordListMatch(
phrase, tokens, cur.tokenStart, cur.tokenEnd, i+1, matchEnd);
if (matchedTokenEnd >= 0) {
matched.add(new PhraseMatch(phrase, cur.tokenStart, matchedTokenEnd));
}
}
break;
} else if (node instanceof Map) {
tree = (Map) node;
} else if (node instanceof List) {
// Search through list for matches to word (at this point, the table is small, so no Map)
List lookupList = (List) node;
for (Object obj:lookupList) {
if (obj instanceof Phrase) {
// check rest of the phrase matches
Phrase phrase = (Phrase) obj;
if (acceptablePhrases == null || acceptablePhrases.contains(phrase)) {
int matchedTokenEnd = checkWordListMatch(
phrase, tokens, cur.tokenStart, cur.tokenEnd, i+1, matchEnd);
if (matchedTokenEnd >= 0) {
matched.add(new PhraseMatch(phrase, cur.tokenStart, matchedTokenEnd));
}
}
} else if (obj instanceof Map) {
todoStack.push(new StackEntry((Map) obj, cur.tokenStart, i+1, cur.tokenEnd, -1));
} else {
throw new RuntimeException("Unexpected class in list " + obj.getClass() + " while looking up " + word);
}
}
break;
} else {
throw new RuntimeException("Unexpected class " + node.getClass() + " while looking up " + word);
}
}
if (cur.continueAt >= 0) {
int newStart = (cur.continueAt > cur.tokenStart)? cur.continueAt: cur.tokenStart+1;
if (newStart < cur.tokenEnd) {
todoStack.push(new StackEntry(cur.tree, newStart, newStart, cur.tokenEnd, newStart+1));
}
}
}
return matched;
}
public Iterator iterator() {
return new PhraseTableIterator(this);
}
private static class PhraseTableIterator extends AbstractIterator {
private PhraseTable phraseTable;
private Stack> iteratorStack = new Stack<>();
private Phrase next = null;
public PhraseTableIterator(PhraseTable phraseTable) {
this.phraseTable = phraseTable;
this.iteratorStack.push(this.phraseTable.rootTree.values().iterator());
this.next = getNext();
}
private Phrase getNext() {
while (!iteratorStack.isEmpty()) {
Iterator