
com.github.nicosensei.textbatch.input.AbstractInputFileReader Maven / Gradle / Ivy
/**
*
*/
package com.github.nicosensei.textbatch.input;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.LinkedList;
import java.util.List;
import com.github.nicosensei.textbatch.BatchExecutor;
/**
* Allows buffered reading of sections (fixed number of lines) of a text file.
* Synchronized.
*
* @author ngiraud
*
*/
public abstract class AbstractInputFileReader
implements InputFileReader {
private static final String ENCODING = BatchExecutor.getInstance().getProperty(
InputFileReader.class, "encoding");
/**
* Number of lines per section.
*/
private int sectionSize = -1;
private boolean ignoreEmptyLines = true;
/**
* Buffered reader for the input file.
*/
private BufferedReader inputFile;
private String inputFilePath;
private int usableLineCount;
private int emptyLineCount;
public AbstractInputFileReader(
String inputFile,
int sectionSize,
boolean ignoreEmptyLines) throws InputFileException {
this.inputFilePath = inputFile;
this.ignoreEmptyLines = ignoreEmptyLines;
try {
this.inputFile = new BufferedReader(
new InputStreamReader(new FileInputStream(inputFile),
getEncoding()));
} catch (FileNotFoundException e) {
throw InputFileException.fileNotFound(inputFile);
} catch (UnsupportedEncodingException e) {
throw InputFileException.ioError(inputFilePath, e);
}
this.sectionSize = sectionSize;
BatchExecutor.getInstance().logInfo("Processing input file by chunks of "
+ sectionSize + " lines.");
}
/**
* Closes the reader.
* @throws InputFileException
*/
public synchronized void close() throws InputFileException {
try {
inputFile.close();
} catch (IOException e) {
throw InputFileException.closeFailed(inputFilePath, e);
}
}
/**
* Atomically obtain a section of the combined path file
* @return
* @throws InputFileException
*/
public synchronized InputFileSection readSection()
throws InputFileException {
List lines = new LinkedList();
while (lines.size() < sectionSize) {
String l;
try {
l = inputFile.readLine();
} catch (IOException e) {
throw InputFileException.readError(inputFilePath, e);
}
if (l == null) {
break;
}
if (ignoreEmptyLines && lineIsEmpty(l)) {
continue; // skip empty lines
}
lines.add(parseLine(l));
}
return new InputFileSection(lines, lines.size() < sectionSize);
}
/**
* Atomically obtain a line of the combined path file
* @return
* @throws InputFileException
*/
public synchronized L readLine() throws InputFileException {
L line = null;
while (line == null) {
String l;
try {
l = inputFile.readLine();
} catch (IOException e) {
throw InputFileException.readError(inputFilePath, e);
}
if (l == null) {
break;
}
if (ignoreEmptyLines && lineIsEmpty(l)) {
continue; // skip empty lines
}
line = parseLine(l);
}
return line;
}
public int getLineCount() {
return usableLineCount + emptyLineCount;
}
public int getEmptyLineCount() {
return emptyLineCount;
}
public int getNonEmptyLineCount() {
return usableLineCount;
}
@Override
public String getEncoding() {
return ENCODING;
}
protected abstract L parseLine(String line) throws InputFileException;
protected String getInputFilePath() {
return inputFilePath;
}
public synchronized void countLines(String inputFile)
throws InputFileException {
this.usableLineCount = 0;
try {
BufferedReader tmp =
new BufferedReader(new FileReader(inputFile));
try {
String l = null;
while ((l = tmp.readLine()) != null) {
if (lineIsEmpty(l)) {
this.emptyLineCount++;
if (! ignoreEmptyLines) {
this.usableLineCount++;
}
} else {
this.usableLineCount++;
}
}
} finally {
tmp.close();
}
BatchExecutor.getInstance().logInfo(
usableLineCount + " lines to process in " + inputFile);
} catch (FileNotFoundException e) {
throw InputFileException.fileNotFound(inputFile);
} catch (IOException e) {
throw InputFileException.readError(inputFile, e);
}
}
private boolean lineIsEmpty(String l) {
return l.trim().isEmpty();
}
@Override
public int getSectionSize() {
return sectionSize;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy