All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.objectbank.DelimitRegExIterator Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.objectbank;

import java.util.function.Function;
import edu.stanford.nlp.util.AbstractIterator;

import java.io.BufferedReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

/**
 * An Iterator that reads the contents of a Reader, delimited by the specified
 * delimiter, and then be subsequently processed by an Function to produce
 * Objects of type T.
 *
 * @author Jenny Finkel  getFactory(String delim) {
    return DelimitRegExIteratorFactory.defaultDelimitRegExIteratorFactory(delim);
  }

  /**
   * Returns a factory that vends DelimitRegExIterators that reads the contents of the
   * given Reader, splits on the specified delimiter, applies op, then returns the result.
   */
  public static  IteratorFromReaderFactory getFactory(String delim, Function op) {
    return new DelimitRegExIteratorFactory<>(delim, op);
  }

  public static class DelimitRegExIteratorFactory implements IteratorFromReaderFactory, Serializable {

    private static final long serialVersionUID = 6846060575832573082L;

    private final String delim;
    private final Function op;

    public static DelimitRegExIteratorFactory defaultDelimitRegExIteratorFactory(String delim) {
      return new DelimitRegExIteratorFactory<>(delim, new IdentityFunction<>());
    }

    public DelimitRegExIteratorFactory(String delim, Function op) {
      this.delim = delim;
      this.op = op;
    }

    public Iterator getIterator(Reader r) {
      return new DelimitRegExIterator<>(r, delim, op);
    }

  }

  public static void main(String[] args) {
    String s = "@@123\nthis\nis\na\nsentence\n\n@@124\nThis\nis\nanother\n.\n\n@125\nThis\nis\nthe\nlast\n";
    DelimitRegExIterator di = DelimitRegExIterator.defaultDelimitRegExIterator(new StringReader(s), "\n\n");
    while (di.hasNext()) {
      System.out.println("****\n" + di.next() + "\n****");
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy