All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.text.sentenceiterator.BasicLineIterator Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
package org.deeplearning4j.text.sentenceiterator;

import lombok.NonNull;

import java.io.*;
import java.util.Iterator;

/**
 * Primitive single-line iterator, without any options involved.
 * Can be used over InputStream or File.
 *
 * Please note: for reset functionality, mark/reset should be supported by underlying InputStream.
 *
 * @author [email protected]
  */
public class BasicLineIterator implements SentenceIterator, Iterable {

    private BufferedReader reader;
    private InputStream backendStream;
    private SentencePreProcessor preProcessor;
    private boolean internal = false;

    public BasicLineIterator(@NonNull File file) throws FileNotFoundException {
        this(new FileInputStream(file));
        this.internal = true;
    }

    public BasicLineIterator(@NonNull InputStream stream) {
        this.backendStream = stream;
        reader = new BufferedReader(new InputStreamReader(new BufferedInputStream(backendStream, 10 * 1024 * 1024)));
    }

    public BasicLineIterator(@NonNull String filePath) throws FileNotFoundException {
        this(new FileInputStream(filePath));
        this.internal = true;
    }

    @Override
    public synchronized String nextSentence() {
        try {
            return (preProcessor != null) ? this.preProcessor.preProcess(reader.readLine()) : reader.readLine();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public synchronized boolean hasNext() {
        try {
            return reader.ready();
        } catch (Exception e) {
            return false;
        }
    }

    @Override
    public synchronized void reset() {
        try {
            if (backendStream instanceof FileInputStream) {
                ((FileInputStream) backendStream).getChannel().position(0);
            } else backendStream.reset();
            reader = new BufferedReader(new InputStreamReader(new BufferedInputStream(backendStream, 8192)));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override
    public void finish() {
        try {
            if (this.internal && backendStream != null) backendStream.close();
            if (reader != null) reader.close();
        } catch (Exception e) {
            // do nothing here
        }
    }

    @Override
    public SentencePreProcessor getPreProcessor() {
        return preProcessor;
    }

    @Override
    public void setPreProcessor(SentencePreProcessor preProcessor) {
        this.preProcessor = preProcessor;
    }

    @Override
    protected void finalize() throws Throwable {
        try {
            if (this.internal && backendStream != null) backendStream.close();
            if (reader != null) reader.close();
        } catch (Exception e) {
            // do nothing here
            e.printStackTrace();
        }
        super.finalize();
    }

    /**
     * Implentation for Iterable interface.
     * Please note: each call for iterator() resets underlying SentenceIterator to the beginning;
     *
     * @return
     */
    @Override
    public Iterator iterator() {
        this.reset();
        Iterator ret =  new Iterator() {
            @Override
            public boolean hasNext() {
                return BasicLineIterator.this.hasNext();
            }

            @Override
            public String next() {
                return BasicLineIterator.this.nextSentence();
            }

            @Override
            public void remove() {
                throw new UnsupportedOperationException();
            }
        };

        return ret;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy