data.CsvData Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of java-timeseries Show documentation
Show all versions of java-timeseries Show documentation
Time Series Analysis in Java
The newest version!
/*
* Copyright (c) 2016 Jacob Rachiele
*
*/
package data;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.Charset;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Data from a CSV file.
*
* @author Jacob Rachiele
*/
final class CsvData {
private final File csvFile;
private final List parsedRecords;
private final List header;
/**
* Create a new CSV data object using the given file path. This constructor assumes no header row is present.
*
* @param csvFilePath the path to the file.
*/
CsvData(final String csvFilePath) {
this(csvFilePath, false);
}
/**
* Create a new CSV data object using the given file path and header flag.
*
* @param csvFilePath the path to the file.
* @param headerRow indicates whether the file contains a header row.
*/
CsvData(final String csvFilePath, final boolean headerRow) {
URL resource = getClass().getClassLoader().getResource(csvFilePath);
if (resource == null) {
throw new IllegalArgumentException(
"The resource given by " + csvFilePath + " could not be found on the file system.");
} else {
this.csvFile = new File(resource.getFile());
}
final CSVParser parser = getParser(csvFile);
List records;
if (parser == null) {
throw new IllegalArgumentException(
"The resource given by " + csvFilePath + " could not be found on the file system.");
} else {
try {
records = parser.getRecords();
} catch (IOException ie) {
throw new RuntimeException("The csv parser failed to retrieve the records.");
} finally {
try {
parser.close();
} catch (IOException ie) {
ie.printStackTrace();
}
}
// TODO: Add method for large csv files where we don't read all records at once.
if (headerRow) {
this.header = new ArrayList<>();
CSVRecord headerRecord = records.remove(0);
for (String s : headerRecord) {
this.header.add(s);
}
} else {
this.header = Collections.emptyList();
}
this.parsedRecords = records;
}
}
private CSVParser getParser(final File csvFile) {
try {
return CSVParser.parse(csvFile, Charset.defaultCharset(), CSVFormat.DEFAULT);
} catch (IOException ie) {
ie.printStackTrace();
System.out.println("Error parsing csv file. Path : " + csvFile.getAbsolutePath());
return null;
}
}
/**
* The csv file.
*
* @return the csv file.
*/
File csvFile() {
return this.csvFile;
}
/**
* The data row-by-row.
*
* @return the data row-by-row.
*/
List getRows() {
return this.parsedRecords;
}
/**
* The ith row of data.
*
* @param i the index of the row.
* @return the ith row of data.
*/
CSVRecord getRow(final int i) {
return this.parsedRecords.get(i);
}
/**
* The ith column of data.
*
* @param i the index of the column.
* @return the ith column of data.
*/
List getColumn(final int i) {
List column = new ArrayList<>(this.parsedRecords.size());
for (CSVRecord record : parsedRecords) {
column.add(record.get(i));
}
return column;
}
/**
* The header row.
*
* @return the header row.
*/
List getHeader() {
return Collections.unmodifiableList(this.header);
}
/**
* The data column-by-column.
*
* @return the data column-by-column.
*/
List> getColumns() {
int nCols = this.parsedRecords.get(0).size();
List> columns = new ArrayList<>(nCols);
List column;
for (int i = 0; i < nCols; i++) {
column = new ArrayList<>(this.parsedRecords.size());
for (CSVRecord row : this.parsedRecords) {
column.add(row.get(i));
}
columns.add(column);
}
return columns;
//return Collections.unmodifiableList(columns);
}
/**
* The column with the given name.
*
* @param columnName the name of the column.
* @return the column with the given name.
*/
List getColumn(final String columnName) {
List headerRow = getHeader();
for (int i = 0; i < headerRow.size(); i++) {
if (headerRow.get(i).equals(columnName)) {
return getColumn(i);
}
}
throw new IllegalArgumentException("A column with the name " + columnName + " was not found.");
}
/**
* Create a new dataframe with the column types set according to the given schema.
*
* @param schema a list of data types for each column in the dataframe.
* @return a new dataframe with the column types set according to the given schema.
*/
DataFrame createDataFrame(Schema schema) {
List> columns = getColumns();
if (schema.size() != columns.size()) {
throw new IllegalArgumentException(
"The schema size was " + schema.size() + " but the number of columns was " + columns.size() +
". There must be one data type for each column.");
}
DataFrame df = new DataFrame(schema.size());
int colId = 0;
String stringColId;
DataType type;
List column;
List header = this.getHeader();
for (int i = 0; i < schema.size(); i++) {
type = schema.get(i);
stringColId = (header.isEmpty()) ? Integer.toString(colId++) : header.get(i);
column = columns.get(i);
switch (type) {
case DOUBLE:
Double[] doubles = Types.toDoubleArray(column);
df.add(stringColId, Double[].class, doubles);
break;
case BOOLEAN:
Boolean[] booleans = Types.toBooleanArray(column);
df.add(stringColId, Boolean[].class, booleans);
break;
case LOCAL_DATE_TIME:
LocalDateTime[] localDateTimes = Types.toLocalDateTimeArray(column);
df.add(stringColId, LocalDateTime[].class, localDateTimes);
break;
case OFFSET_DATE_TIME:
OffsetDateTime[] offsetDateTimes = Types.toOffsetDateTimeArray(column);
df.add(stringColId, OffsetDateTime[].class, offsetDateTimes);
break;
case STRING:
df.add(stringColId, String[].class, Types.toStringArray(column));
break;
default:
df.add(stringColId, String[].class, Types.toStringArray(column));
}
}
return df;
}
/**
* Create a dataframe from this csv data with each column type set to String.
*
* @return a new dataframe from this csv data with each column type set to String.
*/
DataFrame createDataFrame() {
List> columns = getColumns();
DataFrame df = new DataFrame(columns.size());
int colId = 0;
String stringColId;
List column;
List header = this.getHeader();
for (int i = 0; i < columns.size(); i++) {
stringColId = (header.isEmpty()) ? Integer.toString(colId++) : header.get(i);
column = columns.get(i);
df.add(stringColId, String[].class, Types.toStringArray(column));
}
return df;
}
}