All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.nicosensei.textbatch.input.AbstractInputFileReader Maven / Gradle / Ivy

/**
 *
 */
package com.github.nicosensei.textbatch.input;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.LinkedList;
import java.util.List;

import com.github.nicosensei.textbatch.BatchExecutor;


/**
 * Allows buffered reading of sections (fixed number of lines) of a text file.
 * Synchronized.
 *
 * @author ngiraud
 *
 */
public abstract class AbstractInputFileReader
implements InputFileReader {

    private static final String ENCODING = BatchExecutor.getInstance().getProperty(
            InputFileReader.class, "encoding");

    /**
     * Number of lines per section.
     */
    private int sectionSize = -1;

    private boolean ignoreEmptyLines = true;

    /**
     * Buffered reader for the input file.
     */
    private BufferedReader inputFile;

    private String inputFilePath;

    private int usableLineCount;
    private int emptyLineCount;

    public AbstractInputFileReader(
            String inputFile,
            int sectionSize,
            boolean ignoreEmptyLines) throws InputFileException {

        this.inputFilePath = inputFile;
        this.ignoreEmptyLines = ignoreEmptyLines;

        try {

            this.inputFile = new BufferedReader(
                    new InputStreamReader(new FileInputStream(inputFile),
                            getEncoding()));
        } catch (FileNotFoundException e) {
            throw InputFileException.fileNotFound(inputFile);
        } catch (UnsupportedEncodingException e) {
            throw InputFileException.ioError(inputFilePath, e);
        }

        this.sectionSize = sectionSize;

        BatchExecutor.getInstance().logInfo("Processing input file by chunks of "
                + sectionSize + " lines.");
    }

    /**
     * Closes the reader.
     * @throws InputFileException
     */
    public synchronized void close() throws InputFileException {
        try {
            inputFile.close();
        } catch (IOException e) {
            throw InputFileException.closeFailed(inputFilePath, e);
        }
    }

    /**
     * Atomically obtain a section of the combined path file
     * @return
     * @throws InputFileException
     */
    public synchronized InputFileSection readSection()
    throws InputFileException {

        List lines = new LinkedList();

        while (lines.size() < sectionSize) {
            String l;
            try {
                l = inputFile.readLine();
            } catch (IOException e) {
                throw InputFileException.readError(inputFilePath, e);
            }
            if (l == null) {
                break;
            }
            if (ignoreEmptyLines && lineIsEmpty(l)) {
                continue; // skip empty lines
            }
            lines.add(parseLine(l));
        }

        return new InputFileSection(lines, lines.size() < sectionSize);

    }

    /**
     * Atomically obtain a line of the combined path file
     * @return
     * @throws InputFileException
     */
    public synchronized L readLine() throws InputFileException {

        L line = null;
        while (line == null) {

            String l;
            try {
                l = inputFile.readLine();
            } catch (IOException e) {
                throw InputFileException.readError(inputFilePath, e);
            }
            if (l == null) {
                break;
            }
            if (ignoreEmptyLines && lineIsEmpty(l)) {
                continue; // skip empty lines
            }
            line = parseLine(l);
        }

        return line;

    }

    public int getLineCount() {
        return usableLineCount + emptyLineCount;
    }

    public int getEmptyLineCount() {
        return emptyLineCount;
    }

    public int getNonEmptyLineCount() {
        return usableLineCount;
    }

    @Override
    public String getEncoding() {
        return ENCODING;
    }

    protected abstract L parseLine(String line) throws InputFileException;

    protected String getInputFilePath() {
        return inputFilePath;
    }

    public synchronized void countLines(String inputFile)
    throws InputFileException {
        this.usableLineCount = 0;

        try {
            BufferedReader tmp =
                new BufferedReader(new FileReader(inputFile));

            try {
            	String l = null;
            	while ((l = tmp.readLine()) != null) {

            		if (lineIsEmpty(l)) {
            			this.emptyLineCount++;
            			if (! ignoreEmptyLines) {
            				this.usableLineCount++;
            			}
            		} else {
            			this.usableLineCount++;
            		}

            	}
            } finally {
            	tmp.close();
            }

            BatchExecutor.getInstance().logInfo(
                    usableLineCount + " lines to process in " + inputFile);

        } catch (FileNotFoundException e) {
            throw InputFileException.fileNotFound(inputFile);
        } catch (IOException e) {
            throw InputFileException.readError(inputFile, e);
        }
    }

    private boolean lineIsEmpty(String l) {
        return l.trim().isEmpty();
    }

	@Override
	public int getSectionSize() {
		return sectionSize;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy