All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.emory.mathcs.nlp.common.propbank.PBReader Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2015, Emory University
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.emory.mathcs.nlp.common.propbank;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import edu.emory.mathcs.nlp.common.constant.StringConst;
import edu.emory.mathcs.nlp.common.constituent.CTReader;
import edu.emory.mathcs.nlp.common.constituent.CTTree;
import edu.emory.mathcs.nlp.common.util.IOUtils;
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;

/**
 * @author Jinho D. Choi ({@code [email protected]})
 */
public class PBReader
{
	private BufferedReader f_in;
	
	public PBReader() {}
	
	/** @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}}. */
	public PBReader(InputStream in)
	{
		open(in);
	}
	
	/** @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}}. */
	public void open(InputStream in)
	{
		f_in = IOUtils.createBufferedReader(in);
	}
	
	public void close()
	{
		try
		{
			f_in.close();
		}
		catch (IOException e) {e.printStackTrace();}
	}
	
	/** @return the next instance if exists; otherwise, {@code null}. */
	public PBInstance nextInstance()
	{
		try
		{
			String line = f_in.readLine();
			
			if (line != null)
				return new PBInstance(line);
		}
		catch (IOException e) {e.printStackTrace();}
		
		return null;
	}
	
	public Int2ObjectMap> getInstanceMap()
	{
		Int2ObjectMap> map = new Int2ObjectOpenHashMap>();
		List list;
		PBInstance instance;
		
		while ((instance = nextInstance()) != null)
		{
			if (map.containsKey(instance.getTreeID()))
				list = map.get(instance.getTreeID());
			else
			{
				list = new ArrayList<>();
				map.put(instance.getTreeID(), list);
			}
			
			list.add(instance);
		}
		
		return map;
	}

	/** @return the sorted list of instances. */
	public List getSortedInstanceList()
	{
		List list = new ArrayList<>();
		PBInstance instance;
		
		while ((instance = nextInstance()) != null)
			list.add(instance);

		close();
		Collections.sort(list);
		
		return list;
	}
	
	/** @return {@link #getSortedInstanceList(String, boolean)}, where {@code normalize=false}. */
	public List getSortedInstanceList(String treeDir)
	{
		return getSortedInstanceList(treeDir, false);
	}
	
	/**
	 * @return the sorted list of instances including constituent trees associated with them.
	 * @param treeDir the Treebank directory path.
	 * @param normalize if {@code true}, calls {@link CTTree#normalizeIndices()}.
	 */
	public List getSortedInstanceList(String treeDir, boolean normalize)
	{
		List list = getSortedInstanceList();
		CTReader reader = new CTReader();
		CTTree   tree   = null;
		String treeFile = "";
		int    treeID   = -1;
		
		for (PBInstance instance : list)
		{
			if (!instance.isTreePath(treeFile))
			{
				treeFile = instance.getTreePath();
				treeID   = -1;
				reader.close();
				reader.open(IOUtils.createFileInputStream(treeDir + StringConst.FW_SLASH + treeFile));
			}
			
			for (; treeID < instance.getTreeID(); treeID++)
				tree = reader.nextTree();
			
			if (normalize)	tree.normalizeIndices();
			tree.initPBLocations();
			instance.setTree(tree);
		}
		
		return list;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy