at.spardat.xma.datasource.TabularData Maven / Gradle / Ivy
The newest version!
/*******************************************************************************
* Copyright (c) 2003, 2007 s IT Solutions AT Spardat GmbH .
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* s IT Solutions AT Spardat GmbH - initial API and implementation
*******************************************************************************/
// @(#) $Id: TabularData.java 2089 2007-11-28 13:56:13Z s3460 $
package at.spardat.xma.datasource;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.StringTokenizer;
import at.spardat.xma.mdl.Atom;
/**
* A TabularDatatable stores data in a row/column matrix of Atoms in
* memory, with column header names.
* It can be read from a File or an InputStream and written
* to an OutputStream or File in CSV format. The "UTF-8"
* encoding is used.
*
*/
public class TabularData implements ITabularData {
private final static String COL_DELIM = ",";
private final static String NEW_LINE = "\r\n";
private final static String ENCODING = "UTF-8";
/**
* Holds the column names
*/
TabularDataHeader header = new TabularDataHeader();
/**
* Holds the rows of the table, type of contained objects is TabularDataRow.
*/
ArrayList rows = new ArrayList();
/**
* Creates an empty table.
*
*/
public TabularData() {
}
/**
* Creates a table by reading it in from a UTF-8 encoded CSV file.
*
* In CSV format, each record is stored on a separate line,
* with the column header names on the first line.
* Fields on a line are separated by commas. Spaces before and
* after the comma are not allowed. Every value must be delimted
* using double quotes.
*
* @param file the file to read from
*/
public static TabularData readFrom (File file) throws IOException {
FileInputStream fis = new FileInputStream (file);
return readFrom (fis);
}
/**
* Creates a table by reading it from an InputStream in UTF-8-encoded CSV
* format. This method closes the inputStream when finished.
*
* @param inputStream - The InputStream to read from.
* @return The table read.
* @throws IOException and RuntimeExceptions if inputStream could not be read.
*/
public static TabularData readFrom (InputStream inputStream) throws IOException {
TabularData result = null;
if (inputStream == null) {
throw new IllegalArgumentException("Inputstream is null.");
}
InputStreamReader streamReader = null;
BufferedReader reader = null;
// Create BufferedReader
try {
streamReader = new InputStreamReader(inputStream, ENCODING);
reader = new BufferedReader(streamReader);
// Read header
String [] colNames = null;
String firstLine = reader.readLine();
if (firstLine == null) throw new IOException ("Not a single line in stream.");
colNames = readHeader (firstLine);
// create TabularDomData or TabularData, depending on header information
result = TabularDomData.isDomainColStructure(colNames) ? new TabularDomData() : new TabularData();
// add columns
for (int i = 0; i < colNames.length; i++) {
result.addColumn(colNames[i]);
}
// Read data
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
if (line.length() == 0) continue;
result.readRow (line);
}
} finally {
reader.close();
streamReader.close();
inputStream.close();
}
return result;
}
/**
* Reads the header from a text line.
*
* @param line the line of text holding the header info
* @return Array of column names
* @throws IOException if columns cannot be read from line
*/
private static String[] readHeader (String line) throws IOException {
if (line == null) throw new IllegalArgumentException();
ArrayList cols = new ArrayList();
StringTokenizer rowStrings = new StringTokenizer(line, COL_DELIM);
while (rowStrings.hasMoreElements()) {
String columnName = rowStrings.nextToken().trim();
if (columnName.length() == 0) {
throw new IOException("Empty column name.");
}
cols.add(columnName);
}
if (cols.size() == 0) throw new IOException ("Not a single column available.");
String [] colArr = new String[cols.size()];
cols.toArray (colArr);
return colArr;
}
/**
* Reads row data from a BufferedReader.
*
* @param reader - The BufferedReader to read the row from.
* @throws IOException
*/
private void readRow (String line) throws IOException {
StringTokenizer rowStrings = new StringTokenizer(line, COL_DELIM);
TabularDataRow row = new TabularDataRow (this);
while (rowStrings.hasMoreElements()) {
String transportString = rowStrings.nextToken();
Atom atom = Atom.newTransportInstance(transportString);
row.add(atom);
}
addRow(row);
}
/**
* Creates a table containing given data
*
* @param columnNames - The column header names
* @param rows - The data as a matrix of strings
*/
public TabularData(String[] columnNames, Atom[][] rows)
throws IllegalArgumentException {
// Header
for (int i = 0; i < columnNames.length; i++) {
addColumn(columnNames[i]);
}
// Rows
for (int rowIndex = 0; rowIndex < rows.length; rowIndex++) {
TabularDataRow row = new TabularDataRow(this);
for (int colIndex = 0;
colIndex < rows[rowIndex].length;
colIndex++) {
Atom atom = rows[rowIndex][colIndex];
row.add(atom);
}
addRow(row);
}
}
/**
* Add a column. Note that adding columns is only allowed after construction, when there
* are no rows in the table.
*
* @param name the name of the column to be added.
* @exception IllegalArgumentException if the name is not valid or a column with that name already exists.
* @exception IllegalStateException if there are already rows in the table.
*/
public void addColumn (String name) throws IllegalArgumentException {
if (name == null || name.length() == 0) throw new IllegalArgumentException ();
if (size() > 0) {
throw new IllegalStateException("Not empty.");
}
header.addColumn(name);
}
/**
* @see at.spardat.xma.datasource.ITabularData#size()
*/
public int size() {
return rows.size();
}
/**
* @see at.spardat.xma.datasource.ITabularData#numCols()
*/
public int numCols() {
return header.size();
}
/**
* Gets the table row with the index row.
*
* @param row the index of the row.
* @return TabularDataRow, never null
* @exception RuntimeException if row out of bounds.
*/
public TabularDataRow getRow(int row) {
return (TabularDataRow) rows.get(row); // IndexOutOfBoundsEx if out of bounds
}
/**
* Adds a row in the table.
*
* @param r the row to add.
* @exception IllegalArgumentException if no columns have been defined yet.
*/
public void addRow (TabularDataRow r) throws IllegalArgumentException {
if (header.size() == 0) {
throw new IllegalArgumentException("No columns known.");
}
rows.add(r);
}
/**
* Get the value from a cell specified bei row and col.
*
* @param row - The row index.
* @param col - The column index.
* @see at.spardat.xma.datasource.ITabularData#getCell(int, int)
*/
public Atom getCell(int row, int col) {
TabularDataRow currRow = getRow(row);
return (Atom) currRow.get(col);
}
/**
* Returns the value from a cell specified bei row and columnName.
*
* @param row - The row index.
* @param columnName - The name of the column.
*/
public Atom getCell(int row, String columnName) {
TabularDataRow currRow = getRow(row);
return (Atom) currRow.get(columnName);
}
/**
* Returns the index of the column with the name colName.
*
* @param colName the name of the column
* @return the index of the column or -1, if there is no such column.
*/
public int getColumnIndex(String colName) {
return header.getColumnIndex(colName);
}
/**
* Returns the header name of the n'th column in the table
*
* @param col the column index
* @exception IllegalArgumentException if col is out of range.
*/
public String getColumnName (int col) {
return header.getColumnName(col);
}
/**
* Writes a TabularData to an OutputStream.
*
* @param outputStream - The OutputStream to write the TabularData to.
* @throws IOException
*/
public void write (OutputStream outputStream) throws IOException {
OutputStreamWriter oWriter = null;
BufferedWriter bWriter = null;
try {
oWriter = new OutputStreamWriter (outputStream, ENCODING);
bWriter = new BufferedWriter (oWriter);
// Write header
writeHeader (bWriter);
// Write rows
for (int i=0, size=rows.size(); iWriter.
*
* @param writer the Writer to write the header to.
* @throws IOException
*/
private void writeHeader (Writer writer) throws IOException {
int numCols = header.size();
for (int col = 0; col < numCols; col++) {
if (col > 0) {
writer.write (COL_DELIM);
}
writer.write (header.getColumnName(col));
}
writer.write (NEW_LINE);
}
/**
* Writes a row to an Writer.
*
* @param writer the Writer to write the row to.
* @param row the index of the row to write.
* @throws IOException
*/
private void writeRow (Writer writer, int row) throws IOException {
int numCols = header.size();
TabularDataRow rowData = getRow(row);
for (int col = 0; col < numCols; col++) {
if (col > 0) {
writer.write (COL_DELIM);
}
Atom value = rowData.get(col);
writer.write (value.toTransportString());
}
writer.write (NEW_LINE);
}
/**
* Store the table in to a given file, in CSV format.
*
* @param file - The file to write to.
*/
public void save(File file) throws IOException {
write(new FileOutputStream(file));
}
/**
* Clears the table data.
*
*/
public void clear() {
rows.clear();
}
/**
* Maps this TabularData to an integer hash code. Since this hash code is
* used for probabilistic uptodate checks, be sure to include any
* table data in the calculation and ensure that the generated hash
* is equally distributed over the int domain.
*
* @see java.lang.Object#hashCode()
*/
public int hashCode() {
int hashHeader = header.hashCode();
int hashRows = rows.hashCode();
return hashHeader ^ hashRows;
}
}