org.deeplearning4j.text.sentenceiterator.BasicLineIterator Maven / Gradle / Ivy
/*
* ******************************************************************************
* *
* *
* * This program and the accompanying materials are made available under the
* * terms of the Apache License, Version 2.0 which is available at
* * https://www.apache.org/licenses/LICENSE-2.0.
* *
* * See the NOTICE file distributed with this work for additional
* * information regarding copyright ownership.
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* * License for the specific language governing permissions and limitations
* * under the License.
* *
* * SPDX-License-Identifier: Apache-2.0
* *****************************************************************************
*/
package org.deeplearning4j.text.sentenceiterator;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import java.io.*;
import java.util.Iterator;
@Slf4j
public class BasicLineIterator implements SentenceIterator, Iterable {
private BufferedReader reader;
private InputStream backendStream;
private SentencePreProcessor preProcessor;
private boolean internal = false;
public BasicLineIterator(@NonNull File file) throws FileNotFoundException {
this(new FileInputStream(file));
this.internal = true;
}
public BasicLineIterator(@NonNull InputStream stream) {
this.backendStream = stream;
reader = new BufferedReader(new InputStreamReader(new BufferedInputStream(backendStream, 10 * 1024 * 1024)));
}
public BasicLineIterator(@NonNull String filePath) throws FileNotFoundException {
this(new FileInputStream(filePath));
this.internal = true;
}
@Override
public synchronized String nextSentence() {
try {
return (preProcessor != null) ? this.preProcessor.preProcess(reader.readLine()) : reader.readLine();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public synchronized boolean hasNext() {
try {
return reader.ready();
} catch (Exception e) {
return false;
}
}
@Override
public synchronized void reset() {
try {
if (backendStream instanceof FileInputStream) {
((FileInputStream) backendStream).getChannel().position(0);
} else
backendStream.reset();
reader = new BufferedReader(new InputStreamReader(new BufferedInputStream(backendStream, 10 * 10 * 1024)));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public void finish() {
try {
if (this.internal && backendStream != null)
backendStream.close();
if (reader != null)
reader.close();
} catch (Exception e) {
// do nothing here
}
}
@Override
public SentencePreProcessor getPreProcessor() {
return preProcessor;
}
@Override
public void setPreProcessor(SentencePreProcessor preProcessor) {
this.preProcessor = preProcessor;
}
@Override
protected void finalize() throws Throwable {
try {
if (this.internal && backendStream != null)
backendStream.close();
if (reader != null)
reader.close();
} catch (Exception e) {
// do nothing here
log.error("",e);
}
super.finalize();
}
/**
* Implentation for Iterable interface.
* Please note: each call for iterator() resets underlying SentenceIterator to the beginning;
*
* @return
*/
@Override
public Iterator iterator() {
this.reset();
Iterator ret = new Iterator() {
@Override
public boolean hasNext() {
return BasicLineIterator.this.hasNext();
}
@Override
public String next() {
return BasicLineIterator.this.nextSentence();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
return ret;
}
}