All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.emory.mathcs.nlp.conversion.C2DConverter Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2014, Emory University
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.emory.mathcs.nlp.conversion;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import edu.emory.mathcs.nlp.common.constituent.CTNode;
import edu.emory.mathcs.nlp.common.constituent.CTTagEn;
import edu.emory.mathcs.nlp.common.constituent.CTTree;
import edu.emory.mathcs.nlp.common.util.PatternUtils;
import edu.emory.mathcs.nlp.component.template.node.NLPNode;
import edu.emory.mathcs.nlp.conversion.util.C2DInfo;
import edu.emory.mathcs.nlp.conversion.util.HeadRule;
import edu.emory.mathcs.nlp.conversion.util.HeadRuleMap;
import edu.emory.mathcs.nlp.conversion.util.HeadTagSet;

/**
 * @author Jinho D. Choi ({@code [email protected]})
 */
abstract public class C2DConverter
{
	protected HeadRuleMap headrule_map;
	protected HeadRule    default_rule;
	
	public C2DConverter(HeadRuleMap headruleMap, HeadRule defaultRule)
	{
		headrule_map = headruleMap;
		default_rule = defaultRule;
	}
	
	/**
	 * Sets the head of the specific node and all its sub-nodes.
	 * Calls {@link C2DConverter#findHeads(CTNode)}.
	 */
	protected void setHeads(CTNode curr)
	{
		// terminal nodes become the heads of themselves
		if (curr.isTerminal())
		{
			curr.setC2DInfo(new C2DInfo(curr));
			return;
		}
		
		// set the heads of all children
		for (CTNode child : curr.getChildrenList())
			setHeads(child);
		
		// stop traversing if it is the top node
		if (curr.isConstituentTag(CTTagEn.TOP))
			return;
		
		// only one child
		if (curr.getChildrenSize() == 1)
		{
			curr.setC2DInfo(new C2DInfo(curr.getChild(0)));
			return;
		}
		
		// find the headrule of the current node
		HeadRule rule = headrule_map.get(curr.getConstituentTag());
				
		if (rule == null)
		{
			System.err.println("Error: headrules not found for \""+curr.getConstituentTag()+"\"");
			rule = default_rule;
		}
		
		// abstract method
		setHeadsAux(rule, curr);
	}
	
	/**
	 * @return the head of the input node-list according to the headrule.
	 * Every other node in the list becomes the dependent of the head node.
	 * @param rule the headrule to be consulted.
	 * @param nodes the list of nodes.
	 * @param flagSize the number of head flags.
	 */
	protected CTNode getHead(HeadRule rule, List nodes, int flagSize)
	{
		CTNode head = getDefaultHead(nodes);
		
		if (head == null)
		{
			nodes = new ArrayList<>(nodes);
			if (rule.isRightToLeft()) Collections.reverse(nodes);
			
			int i, size = nodes.size(), flag;
			int[] flags = new int[size];
			CTNode child;
			
			for (i=0; i nodes)
	{
		CTNode head = null;
		
		for (CTNode node : nodes)
		{
			if (!node.isEmptyCategoryTerminal())
			{
				if (head != null) return null;
				head = node;
			}
		}

		return head;
	}
	
	/** @return the dependency tree converted from the specific constituent tree without head information. */
	protected NLPNode[] initDEPTree(CTTree cTree)
	{
		List  cNodes = cTree.getTokenList();
		NLPNode[]     dNodes = new NLPNode[cNodes.size()];
		String form, pos;
		NLPNode dNode;
		int id;
		
		dNodes[0] = new NLPNode().toRoot();
		
		for (CTNode cNode : cNodes)
		{
			id   = cNode.getTokenID() + 1;
			form = PatternUtils.revertBrackets(cNode.getWordForm());
			pos  = cNode.getConstituentTag();
			
			dNode = new NLPNode(id, form, pos, cNode.getC2DInfo().getFeatMap());
			dNode.setSecondaryHeads(new ArrayList<>());
			dNodes[id] = dNode;
		}
		
		return dNodes;
	}
	
	/**
	 * Sets the head of the specific constituent node using the specific headrule.
	 * Called by {@link #setHeads(CTNode)}.
	 */
	abstract protected void setHeadsAux(HeadRule rule, CTNode curr);
	
	/**
	 * @return the head flag of the specific constituent node.
	 * @see EnglishC2DConverter#getHeadFlag(CTNode).
	 */
	abstract protected int getHeadFlag(CTNode child);
	
	/**
	 * Returns a dependency label given the specific phrase structure.
	 * @param C the current node.
	 * @param P the parent of {@code C}.
	 * @param p the head of {@code P}.
	 * @return a dependency label given the specific phrase structure.
	 */
	abstract protected String getDEPLabel(CTNode C, CTNode P, CTNode p);
	
	/**
	 * Returns the dependency tree converted from the specific constituent tree.
	 * If the constituent tree contains only empty categories, returns {@code null}.
	 * @param cTree the constituent tree to convert.
	 * @return the dependency tree converted from the specific constituent tree.
	 */
	abstract public NLPNode[] toDependencyGraph(CTTree cTree);
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy