com.as.text_understanding.tree_util.head.HeadFinder Maven / Gradle / Ivy
Show all versions of text-understanding Show documentation
package com.as.text_understanding.tree_util.head;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.function.Function;
import org.apache.commons.lang3.mutable.MutableInt;
import com.as.text_understanding.common.TextUnderstandingException;
import com.as.text_understanding.representation.tree.TreeItem;
import com.as.text_understanding.representation.tree.TreeNode;
/**
* Finds the head item of the right-side of a context free rule.
* For a context-free rule (X ::= Y_1 Y_2 Y_3 ... Y_n) this class decides which of
* (Y_1, Y_2, ..., Y_n) is the head of that context-free-rule.
*
* Note that context free rule is actually expressed in every parent-and-children construct of a constituency-parse-tree. So
* this class actually finds the child that is considered the head.
*
* This class, and all the classes in this package, are implementation of the rules described in Michael Collins thesis Appendix A:
* "A Description of The Head Rules". page 238. Michael Collins, Ph.D. Dissertation, 1999. It is available in the web (download
* Collins parser, and find the thesis inside the downloaded tar.gz file).
*
*
* TODO Change the implementation to return the nodes themselves, rather than indexes.
* TODO List.get() is used by this class.
*
*
*
* Date: Mar 10, 2016
* @author asher
*
*/
public class HeadFinder
{
public static int findHead(TreeNode treeNode)
{
return templatedFindHead(treeItemTagExtractor.apply(treeNode.getItem()), treeNode.getChildren(), treeNodeTagExtractor);
}
public static int findHead(String lhsTag, List items)
{
return templatedFindHead(lhsTag, items, treeItemTagExtractor);
}
public static TreeNode findTerminalHead(TreeNode treeNode)
{
if (treeNode.getItem().isTerminal()) {return treeNode;}
int childHeadIndex = findHead(treeNode);
List children = treeNode.getChildren();
if (childHeadIndex >= children.size()) {throw new TextUnderstandingException("Wrong head index has been returned from findHead()");}
return findTerminalHead(children.get(childHeadIndex));
}
private static int scanLeftToRight(List items, List prioritizedTags, Function tagExtractor)
{
int size = items.size();
if (size==0) throw new TextUnderstandingException("empty vector of items");
for (String tag : prioritizedTags)
{
Iterator itemIterator = items.iterator();
for (int index=0; index int scanRightToLeft(List items, List prioritizedTags, Function tagExtractor)
{
final int size = items.size();
if (size==0) throw new TextUnderstandingException("empty vector of items");
for (String tag : prioritizedTags)
{
ListIterator itemIterator = items.listIterator(size);
for (int index=size-1; index>=0; --index)
{
T item = itemIterator.previous();
String itemString = tagExtractor.apply(item);
if (tag.equals(itemString))
{
return index;
}
}
}
return size-1;
}
private static boolean scanOptionalSetLeftToRight(List items, Set tags, Function tagExtractor, MutableInt index)
{
final int size = items.size();
Iterator itemIterator = items.iterator();
for (int i=0; i boolean scanOptionalSetRightToLeft(List items, Set tags, Function tagExtractor, MutableInt index)
{
final int size = items.size();
ListIterator itemIterator = items.listIterator(size);
for (int i=size-1; i>=0; --i)
{
T item = itemIterator.previous();
String itemTag = tagExtractor.apply(item);
if (tags.contains(itemTag))
{
index.setValue(i);
return true;
}
}
return false;
}
private static int findHeadOfNP(String lhsTag, List items, Function tagExtractor)
{
final int size = items.size();
int headIndex = 0;
final String lastWordTag = tagExtractor.apply(items.get(size-1));
if (lastWordTag.equals("POS"))
{
headIndex = size-1;
}
else
{
final List npRules = PrioritizedTagsContainer.INSTANCE.getNpRules();
boolean detected = false;
for (SetAndDirection rule : npRules)
{
if (rule.getDirection()==Direction.LEFT_TO_RIGHT)
{
MutableInt mHeadIndex = new MutableInt(headIndex);
detected = scanOptionalSetLeftToRight(items, rule.getSet(), tagExtractor, mHeadIndex);
headIndex = mHeadIndex.intValue();
}
else
{
MutableInt mHeadIndex = new MutableInt(headIndex);
detected = scanOptionalSetRightToLeft(items, rule.getSet(), tagExtractor, mHeadIndex);
headIndex = mHeadIndex.intValue();
}
if (detected)
{
break;
}
}
if (!detected)
{
headIndex = size-1;
}
}
if (headIndex>=2) // indexes start from 0
{
final String oneBeforeTag = tagExtractor.apply(items.get(headIndex-1));
if (oneBeforeTag.equals("CC"))
{
headIndex=headIndex-2;
}
}
return headIndex;
}
private static int templatedFindHead(String lhsTag, List items, Function tagExtractor)
{
if (lhsTag.equals("NP"))
{
return findHeadOfNP(lhsTag, items, tagExtractor);
}
else // not NP
{
List prioritizedTags = PrioritizedTagsContainer.INSTANCE.getPrioritizedTagsForLhsTag(lhsTag);
switch(PrioritizedTagsContainer.INSTANCE.getDirectionOfLhsTag(lhsTag))
{
case LEFT_TO_RIGHT:
return scanLeftToRight(items, prioritizedTags, tagExtractor);
case RIGHT_TO_LEFT:
return scanRightToLeft(items, prioritizedTags, tagExtractor);
default:
throw new TextUnderstandingException("bug");
}
}
}
private static class TreeItemTagExtractor implements Function
{
@Override
public String apply(TreeItem t)
{
if (null==t) return "";
String ret = null;
if (t.isTerminal())
{
ret = t.getTerminal().getTag();
}
else
{
ret = t.getSymbol();
}
if (null==ret) ret = "";
return ret;
}
}
private static class TreeNodeTagExtractor implements Function
{
@Override
public String apply(TreeNode t)
{
if (null==t) return null;
return treeItemTagExtractor.apply(t.getItem());
}
}
private static final TreeItemTagExtractor treeItemTagExtractor = new TreeItemTagExtractor();
private static final TreeNodeTagExtractor treeNodeTagExtractor = new TreeNodeTagExtractor();
}