All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.text.sentenceiterator.UimaResultSetIterator Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta7
Show newest version
package org.deeplearning4j.text.sentenceiterator;

import org.apache.uima.cas.CAS;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.token.type.Sentence;
import org.deeplearning4j.text.annotator.SentenceAnnotator;
import org.deeplearning4j.text.annotator.TokenizerAnnotator;
import org.deeplearning4j.text.uima.UimaResource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * Iterates over and returns sentences
 * based on the passed in analysis engine
 *
 * Database version of UimaSentenceIterator based off Adam Gibson's UimaSentenceIterator but extends BasicResultSetIterator
 *
 * Please note: for reset functionality, the underlying JDBC ResultSet must not be of TYPE_FORWARD_ONLY
 * To achieve this using postgres you can make your query using:
 * connection.prepareStatement(sql,ResultSet.TYPE_SCROLL_INSENSITIVE,ResultSet.CONCUR_READ_ONLY);
 *
 * @author Brad Heap [email protected]
 */
public class UimaResultSetIterator extends BasicResultSetIterator {

    private UimaResource resource;
    protected volatile Iterator sentences;
    private static final Logger log = LoggerFactory.getLogger(UimaSentenceIterator.class);

    /**
     * Constructor which builds a new UimaResource object
     * @param rs the database result set object to iterate over
     * @param columnName the name of the column containing text
     * @throws ResourceInitializationException
     */
    public UimaResultSetIterator(ResultSet rs, String columnName) throws ResourceInitializationException {
        this(rs, columnName,
                        new UimaResource(AnalysisEngineFactory.createEngine(AnalysisEngineFactory
                                        .createEngineDescription(TokenizerAnnotator.getDescription(),
                                                        SentenceAnnotator.getDescription()))));
    }

    /**
     * Constructor which takes an existing UimaResource object
     * @param rs the database result set object to iterate over
     * @param columnName the name of the column containing text
     * @param resource
     */
    public UimaResultSetIterator(ResultSet rs, String columnName, UimaResource resource) {
        super(rs, columnName);
        this.resource = resource;
    }

    @Override
    public synchronized String nextSentence() {

        if (sentences == null || !sentences.hasNext()) {
            // if we have no sentence get the next row from the database
            try {
                String text = super.nextSentence();

                if (text == null)
                    return "";

                CAS cas = resource.retrieve();
                cas.setDocumentText(text);
                //                log.info("Document text: " + text);

                resource.getAnalysisEngine().process(cas);

                List list = new ArrayList<>();
                for (Sentence sentence : JCasUtil.select(cas.getJCas(), Sentence.class)) {
                    list.add(sentence.getCoveredText());
                }

                sentences = list.iterator();

                String ret = sentences.next();
                if (this.getPreProcessor() != null)
                    ret = this.getPreProcessor().preProcess(ret);
                //                    log.info("Sentence text: " + ret);
                return ret;

            } catch (Exception e) {
                throw new RuntimeException(e);
            }

        } else {
            String ret = sentences.next();
            if (this.getPreProcessor() != null)
                ret = this.getPreProcessor().preProcess(ret);
            //            log.info("Sentence text: " + ret);
            return ret;
        }
    }

    @Override
    public synchronized boolean hasNext() {
        try {
            if (sentences != null && sentences.hasNext())
                return true;
            return super.hasNext();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public void reset() {
        sentences = null;
        super.reset();
    }

    @Override
    public void finish() {
        sentences = null;
        super.finish();
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy