com.opsmatters.core.documents.InputFileReader Maven / Gradle / Ivy
/*
* Copyright 2018 Gerald Curley
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.opsmatters.core.documents;
import java.io.InputStream;
import java.io.Reader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import java.util.TimeZone;
import java.util.NoSuchElementException;
import java.util.logging.Logger;
import java.text.SimpleDateFormat;
import com.opencsv.CSVReader;
import com.opsmatters.core.CommonFiles;
import com.opsmatters.core.util.Formats;
/**
* Defines the methods to parse and reference an input file in a CSV, XLS or XLSX format.
*
* @author Gerald Curley (opsmatters)
*/
public class InputFileReader
{
private static final Logger logger = Logger.getLogger(InputFileReader.class.getName());
private String name = "";
private FileDelimiter delimiter = FileDelimiter.COMMA;
private String worksheet = "";
private boolean trim = true;
private boolean removeQuotes = false;
private InputStream stream;
private String[] headers;
private List rows = new ArrayList();
/**
* Default constructor.
*/
public InputFileReader()
{
}
/**
* Constructor that takes a name.
* @param name The name of the input file
*/
public InputFileReader(String name)
{
this.name = name;
}
/**
* Sets the name of the input file.
* @param name The name of the input file
*/
public void setName(String name)
{
this.name = name;
}
/**
* Returns the name of the input file.
* @return The name of the input file
*/
public String getName()
{
return name;
}
/**
* Sets the delimiter used in the input file (CSV only).
* @param delimiter The delimiter used in the input file (CSV only)
*/
public void setDelimiter(FileDelimiter delimiter)
{
this.delimiter = delimiter;
}
/**
* Returns the delimiter used in the input file (CSV only).
* @return The delimiter used in the input file (CSV only)
*/
public FileDelimiter getDelimiter()
{
return delimiter;
}
/**
* Set to true
if the input file cells should be trimmed for leading and trailing whitespace.
* @param trim true
if the input file cells should be trimmed for leading and trailing whitespace
*/
public void setTrim(boolean trim)
{
this.trim = trim;
}
/**
* Returns true
if the input file cells should be trimmed for leading and trailing whitespace.
* @return true
if the input file cells should be trimmed for leading and trailing whitespace
*/
public boolean getTrim()
{
return trim;
}
/**
* Returns the worksheet to be opened in the input file (XLS, XLSX only).
* @return The worksheet to be opened in the input file (XLS, XLSX only)
*/
public String getWorksheet()
{
return worksheet;
}
/**
* Sets the worksheet to be opened in the input file (XLS, XLSX only).
* @param worksheet The worksheet to be opened in the input file (XLS, XLSX only)
*/
public void setWorksheet(String worksheet)
{
this.worksheet = worksheet;
}
/**
* Sets to true
if quotes should be removed when parsing the file.
* @param removeQuotes true
if quotes should be removed when parsing the file
*/
public void setRemoveQuotes(boolean removeQuotes)
{
this.removeQuotes = removeQuotes;
}
/**
* Returns true
if quotes should be removed when parsing the file.
* @return true
if quotes should be removed when parsing the file
*/
public boolean getRemoveQuotes()
{
return removeQuotes;
}
/**
* Sets the input stream for the input file.
* @param stream The input stream for the input file
*/
public void setInputStream(InputStream stream)
{
this.stream = stream;
}
/**
* Returns the input stream for the input file.
* @return The input stream for the input file
*/
public InputStream getInputStream()
{
return stream;
}
/**
* Returns the number of rows in the input file.
* @return The number of rows in the input file
*/
public int numRows()
{
return rows.size();
}
/**
* Returns the number of columns in the input file.
* @return The number of columns in the input file
*/
public int numColumns()
{
return headers != null ? headers.length : -1;
}
/**
* Returns the column headers in the input file.
* @return The column headers in the input file
*/
public String[] getHeaders()
{
return headers;
}
/**
* Returns the column header in the input file at the given index.
* @param col The column in the input file
* @return The column header in the input file at the given index
*/
public String getHeader(int col)
{
return headers != null ? headers[col] : null;
}
/**
* Returns the lines in the input file.
* @return The lines in the input file
*/
public List getRows()
{
return rows;
}
/**
* Returns the line in the input file for the given row.
* @param row The row in the input file
* @return The line in the input file for the given row
*/
public String[] getRow(int row)
{
return rows.get(row);
}
/**
* Returns the value from the input file at the given co-ordinates.
* @param row The row in the input file
* @param col The column in the input file
* @return The value at the given co-ordinates
*/
public String getValue(int row, int col)
{
return rows.get(row)[col];
}
/**
* Parses the input file into an array in memory.
* @return The rows of data from the input file
* @throws IOException if there is a problem reading the input file or it does not exist
*/
public List parse() throws IOException
{
if(stream == null)
throw new IllegalArgumentException("input stream null");
return parse(stream);
}
/**
* Parses the input file into an array in memory.
* @param stream The input stream with the file contents
* @return The rows of data from the input file
* @throws IOException if there is a problem reading the input file or it does not exist
*/
public List parse(InputStream stream) throws IOException
{
int columnCount = -1;
List lines = new ArrayList();
rows.clear();
// Excel spreadsheet
if(CommonFiles.isExcelFile(name))
{
if(worksheet != null && worksheet.length() > 0)
{
SimpleDateFormat df = new SimpleDateFormat(Formats.DATETIME_FORMAT);
df.setTimeZone(TimeZone.getTimeZone("GMT"));
Workbook workbook = Workbook.getWorkbook(name, stream);
Worksheet sheet = workbook.getSheet(worksheet);
if(sheet != null)
{
columnCount = sheet.getColumns();
for(int i = 0; i < sheet.getRows(); i++)
{
String[] values = sheet.getRow(i, df);
if(values != null)
lines.add(values);
}
}
else
{
throw new IllegalStateException("Worksheet '"+worksheet+"' not found");
}
workbook.close();
}
else
{
throw new IllegalArgumentException("Worksheet not specified");
}
}
else if(CommonFiles.isCsvFile(name))
{
Reader reader = new InputStreamReader(stream);
CSVReader csv = new CSVReader(reader, delimiter.separator().charAt(0));
String [] line;
while ((line = csv.readNext()) != null)
{
if(columnCount == -1)
columnCount = line.length;
lines.add(line);
}
reader.close();
}
int lineCount = lines.size();
logger.fine("InputFileReader.parse: lines="+lineCount+" columns="+columnCount);
// Error if the file is empty
if(lineCount <= 0 || columnCount == 0)
throw new IllegalStateException("input file does not contain any data");
// Parse the file headers and rows
int count = 0;
for(String[] line : lines)
{
// Ignore the line if there is no data
if(isBlankRow(line))
continue;
String[] row = new String[columnCount]; // the row of data
for(int column = 0; column < columnCount; column++)
{
// Check that the row column count matches the header column count
if(column >= line.length)
{
if(line.length > 0 && isCompleteLine(line))
break;
else
throw new IllegalStateException("Row "+count+" has more columns than the header row");
}
// The first row gives the column headers
if(count == 0)
{
try
{
row[column] = line[column].trim();
}
catch(NoSuchElementException e)
{
row[column] = "";
}
}
else // The subsequent rows give the actual data
{
try
{
String token = line[column];
// Remove quotes
if(removeQuotes && token.startsWith("\""))
{
token = token.substring(1);
if(token.endsWith("\""))
token = token.substring(0,token.length()-1);
// Replace all other double quotes with a single quote
token = token.replaceAll("\"\"", "\"");
}
// Trim the value (if required)
if(trim)
token.trim();
// Put the processed token in the array
row[column] = token;
logger.fine("InputFileReader.parse: row["+count+","+headers[column]+"]="+token);
}
catch(NoSuchElementException e)
{
row[column] = "";
}
}
}
if(count == 0)
headers = row;
else
rows.add(row);
++count;
}
return rows;
}
/**
* Returns true
if the given array of strings represents a complete line.
*
* Returns false
if end of the last item in the given array of strings has a line break in the middle.
*/
private boolean isCompleteLine(String[] line)
{
// If the last string starts with a quote, but doesnt't end with one
// then it must be a linefeed in the middle of a data field
String lastStr = line[line.length-1];
boolean isPartial = lastStr != null
&& (lastStr.equals("\"") || line.length == 1
|| (lastStr.startsWith("\"") && !lastStr.endsWith("\"")));
return !isPartial;
}
/**
* Returns true
if the given array of strings represents a blank line.
*/
private boolean isBlankRow(String[] line)
{
boolean ret = true;
for(int i = 0; i < line.length && ret; i++)
{
if(line[i] != null && line[i].trim().length() > 0)
ret = false;
}
return ret;
}
/**
* Returns a builder for the reader.
* @return The builder instance.
*/
public static Builder builder()
{
return new Builder();
}
/**
* Builder to make reader construction easier.
*/
public static class Builder
{
private InputFileReader reader = new InputFileReader();
/**
* Sets the name of the input file.
* @param name The name of the input file
* @return This object
*/
public Builder name(String name)
{
reader.setName(name);
return this;
}
/**
* Set to true
if the input file cells should be trimmed for leading and trailing whitespace.
* @param trim true
if the input file cells should be trimmed for leading and trailing whitespace
* @return This object
*/
public Builder trim(boolean trim)
{
reader.setTrim(trim);
return this;
}
/**
* Sets the worksheet to be opened in the input file (XLS, XLSX only).
* @param worksheet The worksheet to be opened in the input file (XLS, XLSX only)
* @return This object
*/
public Builder worksheet(String worksheet)
{
reader.setWorksheet(worksheet);
return this;
}
/**
* Sets the delimiter used in the input file (CSV only).
* @param delimiter The delimiter used in the input file (CSV only)
* @return This object
*/
public Builder delimiter(FileDelimiter delimiter)
{
reader.setDelimiter(delimiter);
return this;
}
/**
* Sets the input stream for the input file.
* @param stream The input stream for the input file
* @return This object
*/
public Builder withInputStream(InputStream stream)
{
reader.setInputStream(stream);
return this;
}
/**
* Returns the configured reader instance
* @return The reader instance
*/
public InputFileReader build()
{
return reader;
}
}
}