edu.stanford.nlp.objectbank.DelimitRegExIterator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.objectbank;
import java.util.function.Function;
import edu.stanford.nlp.util.AbstractIterator;
import java.io.BufferedReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/**
* An Iterator that reads the contents of a Reader, delimited by the specified
* delimiter, and then be subsequently processed by an Function to produce
* Objects of type T.
*
* @author Jenny Finkel getFactory(String delim) {
return DelimitRegExIteratorFactory.defaultDelimitRegExIteratorFactory(delim);
}
/**
* Returns a factory that vends DelimitRegExIterators that reads the contents of the
* given Reader, splits on the specified delimiter, applies op, then returns the result.
*/
public static IteratorFromReaderFactory getFactory(String delim, Function op) {
return new DelimitRegExIteratorFactory<>(delim, op);
}
public static class DelimitRegExIteratorFactory implements IteratorFromReaderFactory, Serializable {
private static final long serialVersionUID = 6846060575832573082L;
private final String delim;
private final Function op;
public static DelimitRegExIteratorFactory defaultDelimitRegExIteratorFactory(String delim) {
return new DelimitRegExIteratorFactory<>(delim, new IdentityFunction<>());
}
public DelimitRegExIteratorFactory(String delim, Function op) {
this.delim = delim;
this.op = op;
}
public Iterator getIterator(Reader r) {
return new DelimitRegExIterator<>(r, delim, op);
}
}
public static void main(String[] args) {
String s = "@@123\nthis\nis\na\nsentence\n\n@@124\nThis\nis\nanother\n.\n\n@125\nThis\nis\nthe\nlast\n";
DelimitRegExIterator di = DelimitRegExIterator.defaultDelimitRegExIterator(new StringReader(s), "\n\n");
while (di.hasNext()) {
System.out.println("****\n" + di.next() + "\n****");
}
}
}