![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.io.PennTreebankReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.io;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.Trees;
import edu.berkeley.nlp.util.ConcatenationIterator;
/**
* @author Dan Klein
*/
public class PennTreebankReader {
static class TreeCollection extends AbstractCollection> {
List files;
Charset charset;
static class TreeIteratorIterator implements Iterator>> {
Iterator fileIterator;
Iterator> nextTreeIterator;
Charset charset;
BufferedReader currentFileReader, lastReader, readerToClose;
public boolean hasNext() {
return nextTreeIterator != null;
}
public Iterator> next() {
Iterator> currentTreeIterator = nextTreeIterator;
advance();
return currentTreeIterator;
}
public void remove() {
throw new UnsupportedOperationException();
}
private void advance() {
nextTreeIterator = null;
while (nextTreeIterator == null && fileIterator.hasNext()) {
File file = fileIterator.next();
// System.out.println(file);
try {
if (readerToClose!=null) {
// System.out.println("closing "+lastReader.toString());
readerToClose.close();
}
readerToClose = lastReader;
lastReader = currentFileReader;
// currentFileReader = new BufferedReader(
// new InputStreamReader(new FileInputStream(file), this.charset));
// nextTreeIterator = new Trees.PennTreeReader(currentFileReader);
nextTreeIterator = new Trees.PennTreeReader(new BufferedReader(
new InputStreamReader(new FileInputStream(file), this.charset)));
} catch (FileNotFoundException e) {
} catch (UnsupportedCharsetException e) {
throw new Error("Unsupported charset in file "+file.getPath());
}
catch (IOException e) {
new Error("Error closing file handle");
}
}
if (readerToClose!=null){
try {
readerToClose.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
TreeIteratorIterator(List files, Charset charset) {
this.fileIterator = files.iterator();
this.charset = charset;
advance();
}
}
@Override
public Iterator> iterator() {
return new ConcatenationIterator>(new TreeIteratorIterator(files, this.charset));
}
@Override
public int size() {
int size = 0;
Iterator i = iterator();
while (i.hasNext()) {
size++;
i.next();
}
return size;
}
private List getFilesUnder(String path, FileFilter fileFilter) {
File root = new File(path);
List files = new ArrayList();
addFilesUnder(root, files, fileFilter);
return files;
}
private void addFilesUnder(File root, List files, FileFilter fileFilter) {
if (! fileFilter.accept(root)) return;
if (root.isFile()) {
files.add(root);
return;
}
if (root.isDirectory()) {
SortedSet children = new TreeSet(Arrays.asList(root.listFiles()));
for (File child : children) {
addFilesUnder(child, files, fileFilter);
}
}
}
public TreeCollection(String path, int lowFileNum, int highFileNum, Charset charset) {
FileFilter fileFilter = new NumberRangeFileFilter(".mrg", lowFileNum, highFileNum, true);
this.files = getFilesUnder(path, fileFilter);
// for (File f : files) System.out.println(f.toString());
this.charset = charset;
}
public TreeCollection(String path, int lowFileNum, int highFileNum, String charsetName) {
this(path,lowFileNum,highFileNum,Charset.forName(charsetName));
}
public TreeCollection(String path, int lowFileNum, int highFileNum) {
this(path,lowFileNum,highFileNum,Charset.defaultCharset());
}
}
public static Collection> readTrees(String path, Charset charset) {
return readTrees(path, -1, Integer.MAX_VALUE, charset);
}
public static Collection> readTrees(String path, int lowFileNum, int highFileNumber, Charset charset) {
return new TreeCollection(path, lowFileNum, highFileNumber, charset);
}
public static void main(String[] args) {
Collection> trees = readTrees(args[0], Charset.defaultCharset());
for (Tree tree : trees) {
tree = (new Trees.StandardTreeNormalizer()).transformTree(tree);
System.out.println(Trees.PennTreeRenderer.render(tree));
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy