All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.washington.cs.knowitall.nlp.PreChunkedSentenceReader Maven / Gradle / Ivy

There is a newer version: 1.4.3
Show newest version
package edu.washington.cs.knowitall.nlp;

import java.io.BufferedReader;
import java.io.IOException;
import java.text.ParseException;
import java.util.Iterator;

import com.google.common.collect.AbstractIterator;

/***
 * A class for reading in sentences that have already been chunked by the
 * OpenNLP sentence chunker.
 *
 * @author afader
 *
 */
public class PreChunkedSentenceReader implements Iterable {

    private final BufferedReader input;

    public PreChunkedSentenceReader(BufferedReader input) {
        this.input = input;
    }

    @Override
    public Iterator iterator() {

        final OpenNlpChunkedSentenceParser parser = new OpenNlpChunkedSentenceParser();

        return new AbstractIterator() {
            protected ChunkedSentence computeNext() {
                String line;
                try {
                    while ((line = input.readLine()) != null) {
                        try {
                            return parser.parseSentence(line);
                        } catch (ParseException e) {
                            continue;
                        }
                    }
                    return endOfData();
                } catch (IOException e) {
                    return endOfData();
                }

            }
        };

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy