All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pepsoft.util.CSVDataSource Maven / Gradle / Ivy

There is a newer version: 2.15.18
Show newest version
package org.pepsoft.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.google.common.base.Strings.isNullOrEmpty;
import static java.util.Arrays.asList;
import static java.util.stream.Collectors.joining;

/**
 * Utility class for reading and writing RFC 4180 CSV files. This class does not
 * support line breaks in fields, and it only supports files with headers.
 */
public class CSVDataSource {
    /**
     * Open a CSV formatted character stream for reading. A
     * {@code CSVDataSource} that has been used for reading once cannot be
     * reused.
     *
     * @param in The character stream to read.
     * @throws IOException If an I/O error occurs reading the headers or the
     * first row of data.
     */
    public void openForReading(Reader in) throws IOException {
        if ((this.in != null) || (out != null)) {
            throw new IllegalStateException("Already open");
        }
        if (in instanceof BufferedReader) {
            this.in = (BufferedReader) in;
        } else {
            this.in = new BufferedReader(in);
        }
        readHeaders();
        readValues();
    }

    /**
     * Open a CSV formatted character stream for writing. A
     * {@code CSVDataSource} that has been used for writing once cannot be
     * reused.
     *
     * @param out The character stream to write to.
     * @param columnNames The names of all the columns that will be written.
     * @throws IOException If an I/O error occurs writing the headers.
     */
    public void openForWriting(Writer out, String... columnNames) throws IOException {
        if ((in != null) || (this.out != null)) {
            throw new IllegalStateException("Already open");
        }
        this.out = out;
        columnsByIndex = asList(columnNames);
        columnsByName = new HashMap<>();
        for (int i = 0; i < columnNames.length; i++) {
            columnsByName.put(columnNames[i], i);
        }
        writeHeaders();
        currentRow = asList(new String[columnNames.length]);
    }

    /**
     * Indicates whether the end of the file has been reached. Only applicable
     * when reading.
     *
     * @return {@code true} if the end of the file has been reached.
     */
    public boolean isEndOfFile() {
        return currentRow == null;
    }

    /**
     * Advance to the next row, if any. When writing this will write a row of
     * values to the stream. When reading, {@link #isEndOfFile()} should be
     * invoked afterwards, and before trying to get data, to determine whether
     * the end of the file had been reached.
     *
     * @throws IOException If an I/O error occurs reading from or writing to the
     * stream.
     */
    public void next() throws IOException {
        if (in != null) {
            readValues();
        } else {
            writeValues();
        }
    }

    /**
     * Get a string-typed value by column name.
     *
     * @param columnName The name of the column.
     * @return The value of the specified column in the current row.
     */
    public String getString(String columnName) {
        checkColumnName(columnName);
        return getString(columnsByName.get(columnName));
    }

    /**
     * Get a string-typed value by column name.
     *
     * @param columnName The name of the column.
     * @param defaultValue The value to return if the specified column is not present, or the value is not set.
     * @return The value of the specified column in the current row.
     */
    public String getString(String columnName, String defaultValue) {
        return columnsByName.containsKey(columnName) ? getString(columnsByName.get(columnName), defaultValue) : defaultValue;
    }

    /**
     * Get a string-typed value by column index.
     *
     * @param columnIndex The index of the column.
     * @return The value of the specified column in the current row.
     */
    public String getString(int columnIndex) {
        return currentRow.get(columnIndex);
    }

    /**
     * Get a string-typed value by column index.
     *
     * @param columnIndex The index of the column.
     * @return The value of the specified column in the current row, or {@code defaultValue} if the column does not
     * exist or the value is not set.
     */
    public String getString(int columnIndex, String defaultValue) {
        final String value = currentRow.get(columnIndex);
        return isNullOrEmpty(value) ? defaultValue : value;
    }

    /**
     * Set a string-typed value by column name. {@code null} values are
     * supported but are converted into empty strings.
     *
     * @param columnName The name of the column.
     * @param value The value to store in the column.
     */
    public void setString(String columnName, String value) {
        checkColumnName(columnName);
        setString(columnsByName.get(columnName), value);
    }

    /**
     * Set a string-typed value by column index. {@code null} values are
     * supported but are converted into empty strings.
     *
     * @param columnIndex The index of the column.
     * @param value The value to store in the column.
     */
    public void setString(int columnIndex, String value) {
        currentRow.set(columnIndex, value);
    }

    public int getInt(String columnName) {
        checkColumnName(columnName);
        try {
            return Integer.parseInt(getString(columnName));
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("Not a valid integer value for column " + columnName + ": \"" + getString(columnName) + "\"", e);
        }
    }

    public int getInt(String columnName, int defaultValue) {
        if (! columnsByName.containsKey(columnName)) {
            return defaultValue;
        } else {
            final String stringValue = getString(columnName);
            try {
                return (stringValue != null) ? Integer.parseInt(stringValue) : defaultValue;
            } catch (NumberFormatException e) {
                throw new IllegalArgumentException("Not a valid integer value for column " + columnName + ": \"" + getString(columnName) + "\"", e);
            }
        }
    }

    public void setInt(String columnName, int value) {
        checkColumnName(columnName);
        setString(columnName, Integer.toString(value));
    }

    public boolean getBoolean(String columnName) {
        checkColumnName(columnName);
        return Boolean.parseBoolean(getString(columnName));
    }

    public boolean getBoolean(String columnName, boolean defaultValue) {
        if (! columnsByName.containsKey(columnName)) {
            return defaultValue;
        } else {
            final String stringValue = getString(columnName);
            return (stringValue != null) ? Boolean.parseBoolean(stringValue) : defaultValue;
        }
    }

    public void setBoolean(String columnName, boolean value) {
        checkColumnName(columnName);
        setString(columnName, Boolean.toString(value));
    }

    private void checkColumnName(String columnName) {
        if (! columnsByName.containsKey(columnName)) {
            throw new IllegalArgumentException("There is no column named \"" + columnName + "\"");
        }
    }

    private void readHeaders() throws IOException {
        List headers = readLine();
        columnsByName = new HashMap<>();
        for (int i = 0; i < headers.size(); i++) {
            columnsByName.put(headers.get(i), i);
        }
    }

    private void writeHeaders() throws IOException {
        out.write(columnsByIndex.stream().map(this::quoteIfNecessary).collect(joining(",")));
    }

    private void readValues() throws IOException {
        currentRow = readLine();
    }

    private void writeValues() throws IOException {
        out.write("\r\n");
        out.write(currentRow.stream()
                .map(str -> (str != null) ? str : "")
                .map(this::quoteIfNecessary)
                .collect(joining(",")));
        currentRow.replaceAll(str -> null);
    }

    private String quoteIfNecessary(String text) {
        return (text.contains(",") || text.contains("\""))
                ? ('"' + text.replace("\"", "\"\"") + '"')
                : text;
    }

    private List readLine() throws IOException {
        String line = in.readLine();
        if (line == null) {
            return null;
        }

        List values = new ArrayList<>();
        StringBuilder currentValue = new StringBuilder();
        final int IDLE = 0;
        final int READING_QUOTED_VALUE = 1;
        final int READING_UNQUOTED_VALUE = 2;
        final int QUOTE_ENCOUNTERED_IN_QUOTED_VALUE = 3;
        int state = IDLE;
        for (char c: line.toCharArray()) {
            switch (state) {
                case IDLE:
                    if (c == '"') {
                        state = READING_QUOTED_VALUE;
                    } else if (c == ',') {
                        values.add(currentValue.toString());
                        currentValue.setLength(0);
                    } else {
                        currentValue.append(c);
                        state = READING_UNQUOTED_VALUE;
                    }
                    break;
                case READING_QUOTED_VALUE:
                    if (c == '"') {
                        state = QUOTE_ENCOUNTERED_IN_QUOTED_VALUE;
                    } else {
                        currentValue.append(c);
                    }
                    break;
                case READING_UNQUOTED_VALUE:
                    if (c == ',') {
                        values.add(currentValue.toString());
                        currentValue.setLength(0);
                        state = IDLE;
                    } else {
                        currentValue.append(c);
                    }
                    break;
                case QUOTE_ENCOUNTERED_IN_QUOTED_VALUE:
                    if (c == '"') {
                        currentValue.append('"');
                        state = READING_QUOTED_VALUE;
                    } else if (c == ',') {
                        values.add(currentValue.toString());
                        currentValue.setLength(0);
                        state = IDLE;
                    } else {
                        throw new IOException("Single double quote encountered in quoted field (line: \"" + line + "\")");
                    }
                    break;
            }
        }
        switch (state) {
            case IDLE:
            case READING_UNQUOTED_VALUE:
            case QUOTE_ENCOUNTERED_IN_QUOTED_VALUE:
                values.add(currentValue.toString());
                break;
            case READING_QUOTED_VALUE:
                throw new IOException("Quoted field not closed (line: \"" + line + "\")");
        }

        return values;
    }

    private BufferedReader in;
    private Writer out;
    private Map columnsByName;
    private List columnsByIndex;
    private List currentRow;
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy