All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.cq.commerce.pim.common.Csv Maven / Gradle / Ivy

/*
 * Copyright 1997-2008 Day Management AG
 * Barfuesserplatz 6, 4001 Basel, Switzerland
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information of
 * Day Management AG, ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Day.
 */
package com.adobe.cq.commerce.pim.common;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Iterator;

/**
 * A facility to read from and write to CSV (comma separated values) files.
 *
 * This is a simplified version of the CSV tool of the H2 database:
 * http://code.google.com/p/h2database/source/browse/trunk/h2/src/main/org/h2/tools/Csv.java
 * (also written by Thomas Mueller)
 *
 * @author Thomas Mueller
 */
public class Csv {

    private static final int IO_BUFFER_SIZE = 4 * 1024;

    private char fieldSeparatorRead = ',';
    private char commentLineStart = '#';
    private String fieldSeparatorWrite = ",";
    private String rowSeparatorWrite;
    private char fieldDelimiter = '\"';
    private char escapeCharacter = '\"';
    private String lineSeparator = System.getProperty("line.separator");
    private String nullString = "";
    private Reader input;
    private char[] inputBuffer;
    private int inputBufferPos;
    private int inputBufferStart = -1;
    private int inputBufferEnd;
    private Writer output;
    private boolean endOfLine, endOfFile;

    /**
     * Reads from the CSV file and returns an iterator. The rows in the result
     * set are created on demand, that means the file is kept open until all
     * rows are read or the CSV tool is closed.
     * 
     * Depending on the contents of the file, the first line of the result may
     * or may not contain the column names.
     *
     * @param in the input stream
     * @param charset the charset or null to use the system default charset (see
     *            system property file.encoding)
     * @return the iterator
     * @throws IOException
     */
    public Iterator read(InputStream in, String charset) throws IOException {
        if (charset == null) {
            charset = System.getProperty("file.encoding");
        }
        in = new BufferedInputStream(in, IO_BUFFER_SIZE);
        input = new InputStreamReader(in, charset);
        return read();
    }

    /**
     * Reads from the CSV file and returns an iterator. The rows in the result
     * set are created on demand, that means the file is kept open until all
     * rows are read or the CSV tool is closed.
     * 
     * Depending on the contents of the file, the first line of the result may
     * or may not contain the column names.
     *
     * @param reader the reader
     * @return the iterator
     * @throws IOException
     */
    public Iterator read(Reader reader) throws IOException {
        if (!(reader instanceof BufferedReader)) {
            reader = new BufferedReader(reader);
        }
        input = reader;
        return read();
    }

    private Iterator read() throws IOException {
        inputBuffer = new char[IO_BUFFER_SIZE * 2];
        return new RowIterator();
    }

    /**
     * Initialize writing.
     *
     * @param out the output stream
     * @param charset the character set or null
     */
    public void writeInit(OutputStream out, String charset) throws IOException {
        if (charset == null) {
            charset = System.getProperty("file.encoding");
        }
        out = new BufferedOutputStream(out, IO_BUFFER_SIZE);
        output = new BufferedWriter(new OutputStreamWriter(out, charset));
    }

    /**
     * Initialize writing.
     *
     * @param writer the writer
     */
    public void writeInit(Writer writer) throws IOException {
        if (!(writer instanceof BufferedWriter)) {
            writer = new BufferedWriter(writer);
        }
        output = writer;
    }

    /**
     * Write a row.
     *
     * @param values the values
     */
    public void writeRow(String... values) throws IOException {
        for (int i = 0; i < values.length; i++) {
            if (i > 0) {
                if (fieldSeparatorWrite != null) {
                    output.write(fieldSeparatorWrite);
                }
            }
            String s = values[i];
            if (s != null) {
                if (escapeCharacter != 0) {
                    if (fieldDelimiter != 0) {
                        output.write(fieldDelimiter);
                    }
                    output.write(escape(s));
                    if (fieldDelimiter != 0) {
                        output.write(fieldDelimiter);
                    }
                } else {
                    output.write(s);
                }
            } else if (nullString != null && nullString.length() > 0) {
                output.write(nullString);
            }
        }
        if (rowSeparatorWrite != null) {
            output.write(rowSeparatorWrite);
        }
        output.write(lineSeparator);
    }

    private String escape(String data) {
        if (data.indexOf(fieldDelimiter) < 0) {
            if (escapeCharacter == fieldDelimiter || data.indexOf(escapeCharacter) < 0) {
                return data;
            }
        }
        StringBuilder buff = new StringBuilder(data.length());
        for (int i = 0; i < data.length(); i++) {
            char ch = data.charAt(i);
            if (ch == fieldDelimiter || ch == escapeCharacter) {
                buff.append(escapeCharacter);
            }
            buff.append(ch);
        }
        return buff.toString();
    }

    private void pushBack() {
        inputBufferPos--;
    }

    private int readChar() throws IOException {
        if (inputBufferPos >= inputBufferEnd) {
            return readBuffer();
        }
        return inputBuffer[inputBufferPos++];
    }

    private int readBuffer() throws IOException {
        if (endOfFile) {
            return -1;
        }
        int keep;
        if (inputBufferStart >= 0) {
            keep = inputBufferPos - inputBufferStart;
            if (keep > 0) {
                char[] src = inputBuffer;
                if (keep + IO_BUFFER_SIZE > src.length) {
                    inputBuffer = new char[src.length * 2];
                }
                System.arraycopy(src, inputBufferStart, inputBuffer, 0, keep);
            }
            inputBufferStart = 0;
        } else {
            keep = 0;
        }
        inputBufferPos = keep;
        int len = input.read(inputBuffer, keep, IO_BUFFER_SIZE);
        if (len == -1) {
            // ensure bufferPos > bufferEnd
            // even after pushBack
            inputBufferEnd = -1024;
            endOfFile = true;
            // ensure the right number of characters are read
            // in case the input buffer is still used
            inputBufferPos++;
            return -1;
        }
        inputBufferEnd = keep + len;
        return inputBuffer[inputBufferPos++];
    }

    private String readValue() throws IOException {
        endOfLine = false;
        inputBufferStart = inputBufferPos;
        while (true) {
            int ch = readChar();
            if (ch == fieldDelimiter) {
                // delimited value
                boolean containsEscape = false;
                inputBufferStart = inputBufferPos;
                int sep;
                while (true) {
                    ch = readChar();
                    if (ch == fieldDelimiter) {
                        ch = readChar();
                        if (ch != fieldDelimiter) {
                            sep = 2;
                            break;
                        }
                        containsEscape = true;
                    } else if (ch == escapeCharacter) {
                        ch = readChar();
                        if (ch < 0) {
                            sep = 1;
                            break;
                        }
                        containsEscape = true;
                    } else if (ch < 0) {
                        sep = 1;
                        break;
                    }
                }
                String s = new String(inputBuffer, inputBufferStart, inputBufferPos - inputBufferStart - sep);
                if (containsEscape) {
                    s = unEscape(s);
                }
                inputBufferStart = -1;
                while (true) {
                    if (ch == fieldSeparatorRead) {
                        break;
                    } else if (ch == '\n' || ch < 0 || ch == '\r') {
                        endOfLine = true;
                        break;
                    } else if (ch == ' ' || ch == '\t') {
                        // ignore
                    } else {
                        pushBack();
                        break;
                    }
                    ch = readChar();
                }
                return s;
            } else if (ch == '\n' || ch < 0 || ch == '\r') {
                endOfLine = true;
                return null;
            } else if (ch == fieldSeparatorRead) {
                // null
                return null;
            } else if (ch <= ' ') {
                // ignore spaces
                continue;
            } else if (ch == commentLineStart) {
                // comment until end of line
                inputBufferStart = -1;
                while (true) {
                    ch = readChar();
                    if (ch == '\n' || ch < 0 || ch == '\r') {
                        break;
                    }
                }
                endOfLine = true;
                return null;
            } else {
                // un-delimited value
                while (true) {
                    ch = readChar();
                    if (ch == fieldSeparatorRead) {
                        break;
                    } else if (ch == '\n' || ch < 0 || ch == '\r') {
                        endOfLine = true;
                        break;
                    }
                }
                String s = new String(inputBuffer, inputBufferStart, inputBufferPos - inputBufferStart - 1);
                inputBufferStart = -1;
                // check un-delimited value for nullString
                return readNull(s.trim());
            }
        }
    }

    private String readNull(String s) {
        return s.equals(nullString) ? null : s;
    }

    private String unEscape(String s) {
        StringBuilder buff = new StringBuilder(s.length());
        int start = 0;
        char[] chars = null;
        while (true) {
            int idx = s.indexOf(escapeCharacter, start);
            if (idx < 0) {
                idx = s.indexOf(fieldDelimiter, start);
                if (idx < 0) {
                    break;
                }
            }
            if (chars == null) {
                chars = s.toCharArray();
            }
            buff.append(chars, start, idx - start);
            if (idx == s.length() - 1) {
                start = s.length();
                break;
            }
            buff.append(chars[idx + 1]);
            start = idx + 2;
        }
        buff.append(s.substring(start));
        return buff.toString();
    }

    private String[] readRow() {
        if (input == null) {
            return null;
        }
        ArrayList row = new ArrayList();
        try {
            while (true) {
                String v = readValue();
                if (v == null) {
                    if (endOfLine) {
                        if (row.size() == 0) {
                            if (endOfFile) {
                                return null;
                            }
                            // empty line
                            continue;
                        }
                        break;
                    }
                }
                row.add(v);
                if (endOfLine) {
                    break;
                }
            }
        } catch (IOException e) {
            throw new RuntimeException("IOException while reading", e);
        }
        return row.toArray(new String[row.size()]);
    }

    /**
     * Close all streams. Exceptions are ignored.
     */
    public void close() throws IOException {
        if (input != null) {
            input.close();
            input = null;
        }
        if (output != null) {
            output.close();
            output = null;
        }
    }

    /**
     * Override the field separator for writing. The default is ",".
     *
     * @param fieldSeparatorWrite the field separator
     */
    public void setFieldSeparatorWrite(String fieldSeparatorWrite) {
        this.fieldSeparatorWrite = fieldSeparatorWrite;
    }

    /**
     * Get the current field separator for writing.
     *
     * @return the field separator
     */
    public String getFieldSeparatorWrite() {
        return fieldSeparatorWrite;
    }

    /**
     * Override the field separator for reading. The default is ','.
     *
     * @param fieldSeparatorRead the field separator
     */
    public void setFieldSeparatorRead(char fieldSeparatorRead) {
        this.fieldSeparatorRead = fieldSeparatorRead;
    }

    /**
     * Get the current field separator for reading.
     *
     * @return the field separator
     */
    public char getFieldSeparatorRead() {
        return fieldSeparatorRead;
    }

    /**
     * Get the current row separator for writing.
     *
     * @return the row separator
     */
    public String getRowSeparatorWrite() {
        return rowSeparatorWrite;
    }

    /**
     * Override the end-of-row marker for writing. The default is null. After
     * writing the end-of-row marker, a line feed is written (\n or \r\n
     * depending on the system settings).
     *
     * @param rowSeparatorWrite the row separator
     */
    public void setRowSeparatorWrite(String rowSeparatorWrite) {
        this.rowSeparatorWrite = rowSeparatorWrite;
    }

    /**
     * Set the field delimiter. The default is " (a double quote).
     * The value 0 means no field delimiter is used.
     *
     * @param fieldDelimiter the field delimiter
     */
    public void setFieldDelimiter(char fieldDelimiter) {
        this.fieldDelimiter = fieldDelimiter;
    }

    /**
     * Get the current field delimiter.
     *
     * @return the field delimiter
     */
    public char getFieldDelimiter() {
        return fieldDelimiter;
    }

    /**
     * Set the escape character (used to escape the field delimiter). The
     * default is " (a double quote). The value 0 means no escape character is used.
     *
     * @param escapeCharacter the escape character
     */
    public void setEscapeCharacter(char escapeCharacter) {
        this.escapeCharacter = escapeCharacter;
    }

    /**
     * Get the current escape character.
     *
     * @return the escape character
     */
    public char getEscapeCharacter() {
        return escapeCharacter;
    }

    /**
     * Set the line separator.
     *
     * @param lineSeparator the line separator
     */
    public void setLineSeparator(String lineSeparator) {
        this.lineSeparator = lineSeparator;
    }

    /**
     * Set the value that represents NULL. The default is an empty string.
     *
     * @param nullString the null
     */
    public void setNullString(String nullString) {
        this.nullString = nullString;
    }

    /**
     * Get the current null string.
     *
     * @return the null string.
     */
    public String getNullString() {
        return nullString;
    }

    class RowIterator implements Iterator {

        private String[] current;

        RowIterator() {
            readNext();
        }

        private void readNext() {
            current = readRow();
        }

        public boolean hasNext() {
            return current != null;
        }

        public String[] next() {
            String[] r = current;
            readNext();
            return r;
        }

        public void remove() {
            throw new UnsupportedOperationException();
        }

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy