at.spardat.xma.datasource.TabularData Maven / Gradle / Ivy

Go to download
/*******************************************************************************
 * Copyright (c) 2003, 2007 s IT Solutions AT Spardat GmbH .
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     s IT Solutions AT Spardat GmbH - initial API and implementation
 *******************************************************************************/

// @(#) $Id: TabularData.java 2089 2007-11-28 13:56:13Z s3460 $
package at.spardat.xma.datasource;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.StringTokenizer;

import at.spardat.xma.mdl.Atom;

/**
 * A TabularDatatable stores data in a row/column matrix of Atoms in
 * memory, with column header names.
 * It can be read from a File or an InputStream and written
 * to an OutputStream or File in CSV format. The "UTF-8"
 * encoding is used.
 *
 */
public class TabularData implements ITabularData {

    private final static String COL_DELIM = ",";
    private final static String NEW_LINE = "\r\n";
    private final static String ENCODING = "UTF-8";

    /**
     * Holds the column names
     */
    TabularDataHeader       header = new TabularDataHeader();

    /**
     * Holds the rows of the table, type of contained objects is TabularDataRow.
     */
    ArrayList               rows = new ArrayList();

    /**
     * Creates an empty table.
     *
     */
    public TabularData() {

    }

    /**
     * Creates a table by reading it in from a UTF-8 encoded CSV file.
     *
     * In CSV format, each record is stored on a separate line,
     * with the column header names on the first line.
     * Fields on a line are separated by commas. Spaces before and
     * after the comma are not allowed. Every value must be delimted
     * using double quotes.
     *
     * @param file the file to read from
     */
    public static TabularData readFrom (File file) throws IOException {
        FileInputStream     fis = new FileInputStream (file);
        return readFrom (fis);
    }

    /**
     * Creates a table by reading it from an InputStream in UTF-8-encoded CSV
     * format. This method closes the inputStream when finished.
     *
     * @param inputStream - The InputStream to read from.
     * @return The table read.
     * @throws IOException and RuntimeExceptions if inputStream could not be read.
     */
    public static TabularData readFrom (InputStream inputStream) throws IOException {
        TabularData         result = null;

        if (inputStream == null) {
            throw new IllegalArgumentException("Inputstream is null.");
        }

        InputStreamReader   streamReader = null;
        BufferedReader      reader = null;

        // Create BufferedReader
        try {
            streamReader = new InputStreamReader(inputStream, ENCODING);
            reader = new BufferedReader(streamReader);

            // Read header
            String []       colNames = null;
            String          firstLine = reader.readLine();
            if (firstLine == null) throw new IOException ("Not a single line in stream.");
            colNames = readHeader (firstLine);

            // create TabularDomData or TabularData, depending on header information
            result = TabularDomData.isDomainColStructure(colNames) ? new TabularDomData() : new TabularData();
            // add columns
            for (int i = 0; i < colNames.length; i++) {
                result.addColumn(colNames[i]);
            }
            // Read data
            String          line;
            while ((line = reader.readLine()) != null) {
                line = line.trim();
                if (line.length() == 0) continue;
                result.readRow (line);
            }
        } finally {
            reader.close();
            streamReader.close();
            inputStream.close();
        }
        return result;
    }

    /**
     * Reads the header from a text line.
     *
     * @param line the line of text holding the header info
     * @return Array of column names
     * @throws IOException if columns cannot be read from line
     */
    private static String[] readHeader (String line) throws IOException {
        if (line == null) throw new IllegalArgumentException();
        ArrayList           cols = new ArrayList();
        StringTokenizer     rowStrings = new StringTokenizer(line, COL_DELIM);
        while (rowStrings.hasMoreElements()) {

            String columnName = rowStrings.nextToken().trim();
            if (columnName.length() == 0) {
                throw new IOException("Empty column name.");
            }

            cols.add(columnName);
        }
        if (cols.size() == 0) throw new IOException ("Not a single column available.");
        String [] colArr = new String[cols.size()];
        cols.toArray (colArr);
        return colArr;
    }

    /**
     * Reads row data from a BufferedReader.
     *
     * @param reader - The BufferedReader to read the row from.
     * @throws IOException
     */
    private void readRow (String line) throws IOException {
        StringTokenizer rowStrings = new StringTokenizer(line, COL_DELIM);

        TabularDataRow row = new TabularDataRow (this);
        while (rowStrings.hasMoreElements()) {
            String transportString = rowStrings.nextToken();
            Atom atom = Atom.newTransportInstance(transportString);
            row.add(atom);
        }
        addRow(row);
    }

    /**
     * Creates a table containing given data
     *
     * @param columnNames - The column header names
     * @param rows - The data as a matrix of strings
     */
    public TabularData(String[] columnNames, Atom[][] rows)
        throws IllegalArgumentException {

        // Header
        for (int i = 0; i < columnNames.length; i++) {
            addColumn(columnNames[i]);
        }

        // Rows
        for (int rowIndex = 0; rowIndex < rows.length; rowIndex++) {
            TabularDataRow row = new TabularDataRow(this);
            for (int colIndex = 0;
                colIndex < rows[rowIndex].length;
                colIndex++) {
                Atom atom = rows[rowIndex][colIndex];
                row.add(atom);
            }
            addRow(row);
        }
    }

    /**
     * Add a column. Note that adding columns is only allowed after construction, when there
     * are no rows in the table.
     *
     * @param name the name of the column to be added.
     * @exception IllegalArgumentException if the name is not valid or a column with that name already exists.
     * @exception IllegalStateException if there are already rows in the table.
     */
    public void addColumn (String name) throws IllegalArgumentException {

        if (name == null || name.length() == 0) throw new IllegalArgumentException ();

        if (size() > 0) {
            throw new IllegalStateException("Not empty.");
        }

        header.addColumn(name);
    }

    /**
     * @see at.spardat.xma.datasource.ITabularData#size()
     */
    public int size() {
        return rows.size();
    }

    /**
     * @see at.spardat.xma.datasource.ITabularData#numCols()
     */
    public int numCols() {
        return header.size();
    }

    /**
     * Gets the table row with the index row.
     *
     * @param row the index of the row.
     * @return TabularDataRow, never null
     * @exception RuntimeException if row out of bounds.
     */
    public TabularDataRow getRow(int row) {
        return (TabularDataRow) rows.get(row);  // IndexOutOfBoundsEx if out of bounds
    }

    /**
     * Adds a row in the table.
     *
     * @param r the row to add.
     * @exception IllegalArgumentException if no columns have been defined yet.
     */
    public void addRow (TabularDataRow r) throws IllegalArgumentException {

        if (header.size() == 0) {
            throw new IllegalArgumentException("No columns known.");
        }

        rows.add(r);
    }

    /**
     * Get the value from a cell specified bei row and col.
     *
     * @param row - The row index.
     * @param col - The column index.
     * @see at.spardat.xma.datasource.ITabularData#getCell(int, int)
     */
    public Atom getCell(int row, int col) {

        TabularDataRow currRow = getRow(row);
        return (Atom) currRow.get(col);
    }

    /**
     * Returns the value from a cell specified bei row and columnName.
     *
     * @param row - The row index.
     * @param columnName - The name of the column.
     */
    public Atom getCell(int row, String columnName) {

        TabularDataRow currRow = getRow(row);
        return (Atom) currRow.get(columnName);
    }

    /**
     * Returns the index of the column with the name colName.
     *
     * @param colName the name of the column
     * @return the index of the column or -1, if there is no such column.
     */
    public int getColumnIndex(String colName) {

        return header.getColumnIndex(colName);
    }

    /**
     * Returns the header name of the n'th column in the table
     *
     * @param col the column index
     * @exception IllegalArgumentException if col is out of range.
     */
    public String getColumnName (int col) {

        return header.getColumnName(col);
    }

    /**
     * Writes a TabularData to an OutputStream.
     *
     * @param outputStream - The OutputStream to write the TabularData to.
     * @throws IOException
     */
    public void write (OutputStream outputStream) throws IOException {

        OutputStreamWriter oWriter = null;
        BufferedWriter     bWriter = null;

        try {

            oWriter = new OutputStreamWriter (outputStream, ENCODING);
            bWriter = new BufferedWriter (oWriter);
            // Write header
            writeHeader (bWriter);

            // Write rows
            for (int i=0, size=rows.size(); iWriter.
     *
     * @param writer the Writer to write the header to.
     * @throws IOException
     */
    private void writeHeader (Writer writer) throws IOException {

        int numCols = header.size();

        for (int col = 0; col < numCols; col++) {
            if (col > 0) {
                writer.write (COL_DELIM);
            }
            writer.write (header.getColumnName(col));
        }
        writer.write (NEW_LINE);
    }

    /**
     * Writes a row to an Writer.
     *
     * @param writer the Writer to write the row to.
     * @param row the index of the row to write.
     * @throws IOException
     */
    private void writeRow (Writer writer, int row) throws IOException {

        int numCols = header.size();
        TabularDataRow rowData = getRow(row);

        for (int col = 0; col < numCols; col++) {
            if (col > 0) {
                writer.write (COL_DELIM);
            }
            Atom value = rowData.get(col);
            writer.write (value.toTransportString());
        }
        writer.write (NEW_LINE);
    }

    /**
     * Store the table in to a given file, in CSV format.
     *
     * @param file - The file to write to.
     */
    public void save(File file) throws IOException {
        write(new FileOutputStream(file));
    }

    /**
     * Clears the table data.
     *
     */
    public void clear() {
        rows.clear();
    }

    /**
     * Maps this TabularData to an integer hash code. Since this hash code is
     * used for probabilistic uptodate checks, be sure to include any
     * table data in the calculation and ensure that the generated hash
     * is equally distributed over the int domain.
     *
     * @see java.lang.Object#hashCode()
     */
    public int hashCode() {
        int hashHeader = header.hashCode();
        int hashRows   = rows.hashCode();
        return hashHeader ^ hashRows;
    }

}