All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.emory.mathcs.nlp.common.propbank.PBLib Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2015, Emory University
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.emory.mathcs.nlp.common.propbank;

import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.List;
import java.util.regex.Pattern;

import edu.emory.mathcs.nlp.common.constant.StringConst;
import edu.emory.mathcs.nlp.common.constituent.CTReader;
import edu.emory.mathcs.nlp.common.constituent.CTTree;
import edu.emory.mathcs.nlp.common.util.IOUtils;
import edu.emory.mathcs.nlp.common.util.PatternUtils;
import edu.emory.mathcs.nlp.common.util.StringUtils;


/**
 * @author Jinho D. Choi ({@code [email protected]})
 */
public class PBLib
{
	static public final String PREFIX_CONCATENATION	= "C-";
	static public final String PREFIX_REFERENT		= "R-";
	static public final String PREFIX_LINK			= "LINK-";
	static public final String DELIM_FUNCTION_TAG	= StringConst.HYPHEN;
	static public final Pattern P_R_ARG	= Pattern.compile("^"+PREFIX_REFERENT+"(.+)");
	static public final Pattern P_ARGN	= Pattern.compile("^(A|C-A|R-A)(RG)?(\\d|A)");
	static public final Pattern P_ARGM	= Pattern.compile("^A(RG)?M-(.+)");
	
	static private final Pattern P_LINK			= Pattern.compile("^"+PREFIX_LINK+"(.+)");
	static private final Pattern P_ARGN_CORE	= Pattern.compile("^A(RG)?(\\d|A)");
	
	private PBLib() {}
	
	static public List getTreeList(InputStream treebank, InputStream propbank)
	{
		return getTreeList(treebank, propbank, false);
	}
	
	static public List getTreeList(InputStream treebank, InputStream propbank, boolean normalize)
	{
		List trees = new CTReader(treebank).getTreeList();
		PBReader reader = new PBReader(propbank);
		PBInstance instance;
		CTTree tree;
		
		for (CTTree t : trees)
		{
			if (normalize) t.normalizeIndices();
			t.initPBLocations();
		}
		
		while ((instance = reader.nextInstance()) != null)
		{
			if (!isIllegalRolesetID(instance.getRolesetID()))
			{
				tree = trees.get(instance.getTreeID());
				tree.initPBInstance(instance);
			}
		}
		
		return trees;
	}
	
	/** @param out internally casted to {@code new PrintStream(new BufferedOutputStream(out))}. */
	static public void printInstances(List instances, OutputStream out)
	{
		PrintStream stream = IOUtils.createBufferedPrintStream(out);
		
		for (PBInstance instance : instances)
			stream.println(instance.toString());
				
		stream.close();
	}

	static public boolean isNumberedArgument(String label)
	{
		return P_ARGN.matcher(label).find();
	}

	static public boolean isCoreNumberedArgument(String label)
	{
		return P_ARGN_CORE.matcher(label).find();
	}
	
	static public boolean isLinkArgument(String label)
	{
		return label.startsWith(PREFIX_LINK);
	}
	
	static public boolean isConcatenatedArgument(String label)
	{
		return label.startsWith(PREFIX_CONCATENATION);
	}
	
	static public boolean isReferentArgument(String label)
	{
		return label.startsWith(PREFIX_REFERENT);
	}
	
	static public boolean isModifier(String label)
	{
		return P_ARGM.matcher(label).find();
	}
	
	static public boolean isIllegalRolesetID(String rolesetID)
	{
		return StringUtils.endsWithAny(rolesetID, "ER","NN","IE","YY");
	}
	
	static public boolean isUndefinedLabel(String label)
	{
		return label.endsWith("UNDEF");
	}
	
	static public boolean isLightVerbRoleset(String rolesetID)
	{
		return rolesetID.endsWith("LV");
	}
	
	static public String getShortLabel(String label)
	{
		return PBTag.PB_REL.equals(label) ? PBTag.PB_C_V : "A"+label.substring(3);
	}
	
	/**
	 * @return the number of an numbered argument (e.g., "0", "A").
	 * If the label is not a numbered argument, returns {@code null}. 
	 */
	static public String getNumber(String label)
	{
		return PatternUtils.getGroup(P_ARGN, label, 3);
	}
	
	static public String getLinkType(String label)
	{
		return PatternUtils.getGroup(P_LINK, label, 1);
	}
	
	/**@return the type of the modifier if exists (e.g., "TMP", "LOC"); otherwise, {@code null}. */
	static public String getModifierType(String label)
	{
		return PatternUtils.getGroup(P_ARGM, label, 2);
	}
	
	/** @return the label discarding prefixes such as C- or R-. */
	static public String getBaseLabel(String label)
	{
		if (label.startsWith(PREFIX_CONCATENATION))
			return label.substring(PREFIX_CONCATENATION.length());
		else if (label.startsWith(PREFIX_REFERENT))
			return label.substring(PREFIX_REFERENT.length());
		else
			return label;
	}
	
//	static public void toReferentArgument(SRLArc arc)
//	{
//		String label = arc.getLabel();
//		
//		if (label.startsWith("A"))
//			arc.setLabel(PREFIX_REFERENT + label);
//		else if (label.startsWith(PREFIX_CONCATENATION))
//			arc.setLabel(PREFIX_REFERENT + label.substring(PREFIX_CONCATENATION.length()));
//	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy