com.as.text_understanding.tree_util.head.HeadFinder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of text-understanding Show documentation
Analyzing natural-language text, in particular predicate-argument structure.
The newest version!
package com.as.text_understanding.tree_util.head;

import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.function.Function;

import org.apache.commons.lang3.mutable.MutableInt;

import com.as.text_understanding.common.TextUnderstandingException;
import com.as.text_understanding.representation.tree.TreeItem;
import com.as.text_understanding.representation.tree.TreeNode;

/**
 * Finds the head item of the right-side of a context free rule. 
 * For a context-free rule (X ::= Y_1 Y_2 Y_3 ... Y_n) this class decides which of
 * (Y_1, Y_2, ..., Y_n) is the head of that context-free-rule.
 * 

 * Note that context free rule is actually expressed in every parent-and-children construct of a constituency-parse-tree. So
 * this class actually finds the child that is considered the head.
 * 
 * This class, and all the classes in this package, are implementation of the rules described in Michael Collins thesis Appendix A:
 * "A Description of The Head Rules". page 238. Michael Collins, Ph.D. Dissertation, 1999. It is available in the web (download
 * Collins parser, and find the thesis inside the downloaded tar.gz file).
 * 

 * 
 * TODO Change the implementation to return the nodes themselves, rather than indexes.
 * TODO List.get() is used by this class.
 *  
 *
 * 
 * Date: Mar 10, 2016
 * @author asher
 *
 */
public class HeadFinder
{
	public static int findHead(TreeNode treeNode)
	{
		return templatedFindHead(treeItemTagExtractor.apply(treeNode.getItem()), treeNode.getChildren(), treeNodeTagExtractor);
	}

	public static int findHead(String lhsTag, List items)
	{
		return templatedFindHead(lhsTag, items, treeItemTagExtractor);
	}
	
	public static TreeNode findTerminalHead(TreeNode treeNode)
	{
		if (treeNode.getItem().isTerminal()) {return treeNode;}
		int childHeadIndex = findHead(treeNode);
		List children = treeNode.getChildren();
		if (childHeadIndex >= children.size()) {throw new TextUnderstandingException("Wrong head index has been returned from findHead()");}
		return findTerminalHead(children.get(childHeadIndex));
	}

	
	private static  int scanLeftToRight(List items, List prioritizedTags, Function tagExtractor)
	{
		int size = items.size();
		if (size==0) throw new TextUnderstandingException("empty vector of items");
		for (String tag : prioritizedTags)
		{
			Iterator itemIterator = items.iterator();
			for (int index=0; index int scanRightToLeft(List items, List prioritizedTags, Function tagExtractor)
	{
		final int size = items.size();
		if (size==0) throw new TextUnderstandingException("empty vector of items");
		for (String tag : prioritizedTags)
		{
			ListIterator itemIterator = items.listIterator(size);
			for (int index=size-1; index>=0; --index)
			{
				T item = itemIterator.previous();
				String itemString = tagExtractor.apply(item);
				if (tag.equals(itemString))
				{
					return index;
				}
			}
		}
		return size-1;
	}
	
	
	
	private static  boolean scanOptionalSetLeftToRight(List items, Set tags, Function tagExtractor, MutableInt index)
	{
		final int size = items.size();
		Iterator itemIterator = items.iterator();
		for (int i=0; i boolean scanOptionalSetRightToLeft(List items, Set tags, Function tagExtractor, MutableInt index)
	{
		final int size = items.size();
		ListIterator itemIterator = items.listIterator(size);
		for (int i=size-1; i>=0; --i)
		{
			T item = itemIterator.previous();
			String itemTag = tagExtractor.apply(item);
			if (tags.contains(itemTag))
			{
				index.setValue(i);
				return true;
			}
		}
		return false;
	}
	
	
	
	private static  int findHeadOfNP(String lhsTag, List items, Function tagExtractor)
	{
		final int size = items.size();
		int headIndex = 0;
		final String lastWordTag = tagExtractor.apply(items.get(size-1));
		if (lastWordTag.equals("POS"))
		{
			headIndex = size-1;
		}
		else
		{
			final List npRules = PrioritizedTagsContainer.INSTANCE.getNpRules();
			boolean detected = false;
			for (SetAndDirection rule : npRules)
			{
				if (rule.getDirection()==Direction.LEFT_TO_RIGHT)
				{
					MutableInt mHeadIndex = new MutableInt(headIndex);
					detected = scanOptionalSetLeftToRight(items, rule.getSet(), tagExtractor, mHeadIndex);
					headIndex = mHeadIndex.intValue();
				}
				else
				{
					MutableInt mHeadIndex = new MutableInt(headIndex);
					detected = scanOptionalSetRightToLeft(items, rule.getSet(), tagExtractor, mHeadIndex);
					headIndex = mHeadIndex.intValue();
				}
				if (detected)
				{
					break;
				}
			}
			if (!detected)
			{
				headIndex = size-1;
			}
		}
		
		if (headIndex>=2) // indexes start from 0
		{
			final String oneBeforeTag = tagExtractor.apply(items.get(headIndex-1));
			if (oneBeforeTag.equals("CC"))
			{
				headIndex=headIndex-2;
			}
		}
		return headIndex;
	}
	
	private static  int templatedFindHead(String lhsTag, List items, Function tagExtractor)
	{
		if (lhsTag.equals("NP"))
		{
			return findHeadOfNP(lhsTag, items, tagExtractor);
		}
		else // not NP
		{
			List prioritizedTags = PrioritizedTagsContainer.INSTANCE.getPrioritizedTagsForLhsTag(lhsTag);
			switch(PrioritizedTagsContainer.INSTANCE.getDirectionOfLhsTag(lhsTag))
			{
			case LEFT_TO_RIGHT:
				return scanLeftToRight(items, prioritizedTags, tagExtractor);
			case RIGHT_TO_LEFT:
				return scanRightToLeft(items, prioritizedTags, tagExtractor);
			default:
				throw new TextUnderstandingException("bug");
			}
		}
	}
	
	private static class TreeItemTagExtractor implements Function
	{
		@Override
		public String apply(TreeItem t)
		{
			if (null==t) return "";
			String ret = null;
			if (t.isTerminal())
			{
				ret = t.getTerminal().getTag();
			}
			else
			{
				ret = t.getSymbol();
			}
			if (null==ret) ret = "";
			return ret;
		}
	}
	
	private static class TreeNodeTagExtractor implements Function
	{
		@Override
		public String apply(TreeNode t)
		{
			if (null==t) return null;
			return treeItemTagExtractor.apply(t.getItem());
		}
		
	}
	
	private static final TreeItemTagExtractor treeItemTagExtractor = new TreeItemTagExtractor();
	private static final TreeNodeTagExtractor treeNodeTagExtractor = new TreeNodeTagExtractor();
}