All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.text.csv.Csv Maven / Gradle / Ivy

The newest version!
/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * __________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/
package com.day.text.csv;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Iterator;

/**
 * A facility to read from and write to CSV (comma separated values) files.
 *
 * This is a simplified version of the CSV tool of the H2 database:
 * http://code.google.com/p/h2database/source/browse/trunk/h2/src/main/org/h2/tools/Csv.java
 * (also written by Thomas Mueller)
 *
 * @author Thomas Mueller
 */
public class Csv {

    private static final int IO_BUFFER_SIZE = 4 * 1024;

    private char fieldSeparatorRead = ',';
    private char commentLineStart = '#';
    private String fieldSeparatorWrite = ",";
    private String rowSeparatorWrite;
    private char fieldDelimiter = '\"';
    private char escapeCharacter = '\"';
    private String lineSeparator = System.getProperty("line.separator");
    private String nullString = "";
    private Reader input;
    private char[] inputBuffer;
    private int inputBufferPos;
    private int inputBufferStart = -1;
    private int inputBufferEnd;
    private Writer output;
    private boolean endOfLine, endOfFile;

    /**
     * Reads from the CSV file and returns an iterator. The rows in the result
     * set are created on demand, that means the file is kept open until all
     * rows are read or the CSV tool is closed.
     * 
* Depending on the contents of the file, the first line of the result may * or may not contain the column names. * * @param in the input stream * @param charset the charset or null to use the system default charset (see * system property file.encoding) * @return the iterator * @throws IOException */ public Iterator read(InputStream in, String charset) throws IOException { if (charset == null) { charset = System.getProperty("file.encoding"); } in = new BufferedInputStream(in, IO_BUFFER_SIZE); input = new InputStreamReader(in, charset); return read(); } /** * Reads from the CSV file and returns an iterator. The rows in the result * set are created on demand, that means the file is kept open until all * rows are read or the CSV tool is closed. *
* Depending on the contents of the file, the first line of the result may * or may not contain the column names. * * @param reader the reader * @return the iterator * @throws IOException */ public Iterator read(Reader reader) throws IOException { if (!(reader instanceof BufferedReader)) { reader = new BufferedReader(reader); } input = reader; return read(); } private Iterator read() throws IOException { inputBuffer = new char[IO_BUFFER_SIZE * 2]; return new RowIterator(); } /** * Initialize writing. * * @param out the output stream * @param charset the character set or null */ public void writeInit(OutputStream out, String charset) throws IOException { if (charset == null) { charset = System.getProperty("file.encoding"); } out = new BufferedOutputStream(out, IO_BUFFER_SIZE); output = new BufferedWriter(new OutputStreamWriter(out, charset)); } /** * Initialize writing. * * @param writer the writer */ public void writeInit(Writer writer) throws IOException { if (!(writer instanceof BufferedWriter)) { writer = new BufferedWriter(writer); } output = writer; } /** * Write a row. * * @param values the values */ public void writeRow(String... values) throws IOException { for (int i = 0; i < values.length; i++) { if (i > 0) { if (fieldSeparatorWrite != null) { output.write(fieldSeparatorWrite); } } String s = values[i]; if (s != null) { if (escapeCharacter != 0) { if (fieldDelimiter != 0) { output.write(fieldDelimiter); } output.write(escape(s)); if (fieldDelimiter != 0) { output.write(fieldDelimiter); } } else { output.write(s); } } else if (nullString != null && nullString.length() > 0) { output.write(nullString); } } if (rowSeparatorWrite != null) { output.write(rowSeparatorWrite); } output.write(lineSeparator); } private String escape(String data) { if (data.indexOf(fieldDelimiter) < 0) { if (escapeCharacter == fieldDelimiter || data.indexOf(escapeCharacter) < 0) { return data; } } StringBuilder buff = new StringBuilder(data.length()); for (int i = 0; i < data.length(); i++) { char ch = data.charAt(i); if (ch == fieldDelimiter || ch == escapeCharacter) { buff.append(escapeCharacter); } buff.append(ch); } return buff.toString(); } private void pushBack() { inputBufferPos--; } private int readChar() throws IOException { if (inputBufferPos >= inputBufferEnd) { return readBuffer(); } return inputBuffer[inputBufferPos++]; } private int readBuffer() throws IOException { if (endOfFile) { return -1; } int keep; if (inputBufferStart >= 0) { keep = inputBufferPos - inputBufferStart; if (keep > 0) { char[] src = inputBuffer; if (keep + IO_BUFFER_SIZE > src.length) { inputBuffer = new char[src.length * 2]; } System.arraycopy(src, inputBufferStart, inputBuffer, 0, keep); } inputBufferStart = 0; } else { keep = 0; } inputBufferPos = keep; int len = input.read(inputBuffer, keep, IO_BUFFER_SIZE); if (len == -1) { // ensure bufferPos > bufferEnd // even after pushBack inputBufferEnd = -1024; endOfFile = true; // ensure the right number of characters are read // in case the input buffer is still used inputBufferPos++; return -1; } inputBufferEnd = keep + len; return inputBuffer[inputBufferPos++]; } private String readValue() throws IOException { endOfLine = false; inputBufferStart = inputBufferPos; while (true) { int ch = readChar(); if (ch == fieldDelimiter) { // delimited value boolean containsEscape = false; inputBufferStart = inputBufferPos; int sep; while (true) { ch = readChar(); if (ch == fieldDelimiter) { ch = readChar(); if (ch != fieldDelimiter) { sep = 2; break; } containsEscape = true; } else if (ch == escapeCharacter) { ch = readChar(); if (ch < 0) { sep = 1; break; } containsEscape = true; } else if (ch < 0) { sep = 1; break; } } String s = new String(inputBuffer, inputBufferStart, inputBufferPos - inputBufferStart - sep); if (containsEscape) { s = unEscape(s); } inputBufferStart = -1; while (true) { if (ch == fieldSeparatorRead) { break; } else if (ch == '\n' || ch < 0 || ch == '\r') { endOfLine = true; break; } else if (ch == ' ' || ch == '\t') { // ignore } else { pushBack(); break; } ch = readChar(); } return s; } else if (ch == '\n' || ch < 0 || ch == '\r') { endOfLine = true; return null; } else if (ch == fieldSeparatorRead) { // null return null; } else if (ch <= ' ') { // ignore spaces continue; } else if (ch == commentLineStart) { // comment until end of line inputBufferStart = -1; while (true) { ch = readChar(); if (ch == '\n' || ch < 0 || ch == '\r') { break; } } endOfLine = true; return null; } else { // un-delimited value while (true) { ch = readChar(); if (ch == fieldSeparatorRead) { break; } else if (ch == '\n' || ch < 0 || ch == '\r') { endOfLine = true; break; } } String s = new String(inputBuffer, inputBufferStart, inputBufferPos - inputBufferStart - 1); inputBufferStart = -1; // check un-delimited value for nullString return readNull(s.trim()); } } } private String readNull(String s) { return s.equals(nullString) ? null : s; } private String unEscape(String s) { StringBuilder buff = new StringBuilder(s.length()); int start = 0; char[] chars = null; while (true) { int idx = s.indexOf(escapeCharacter, start); if (idx < 0) { idx = s.indexOf(fieldDelimiter, start); if (idx < 0) { break; } } if (chars == null) { chars = s.toCharArray(); } buff.append(chars, start, idx - start); if (idx == s.length() - 1) { start = s.length(); break; } buff.append(chars[idx + 1]); start = idx + 2; } buff.append(s.substring(start)); return buff.toString(); } private String[] readRow() { if (input == null) { return null; } ArrayList row = new ArrayList(); try { int i = 0; while (true) { String v = readValue(); if (v == null) { v = this.nullString; if (endOfLine) { if (i == 0) { if (endOfFile) { return null; } // empty line continue; } break; } } row.add(v); i++; if (endOfLine) { break; } } } catch (IOException e) { throw new RuntimeException("IOException while reading", e); } return row.toArray(new String[row.size()]); } /** * Close all streams. Exceptions are ignored. */ public void close() throws IOException { if (input != null) { input.close(); input = null; } if (output != null) { output.close(); output = null; } } /** * Override the field separator for writing. The default is ",". * * @param fieldSeparatorWrite the field separator */ public void setFieldSeparatorWrite(String fieldSeparatorWrite) { this.fieldSeparatorWrite = fieldSeparatorWrite; } /** * Get the current field separator for writing. * * @return the field separator */ public String getFieldSeparatorWrite() { return fieldSeparatorWrite; } /** * Override the field separator for reading. The default is ','. * * @param fieldSeparatorRead the field separator */ public void setFieldSeparatorRead(char fieldSeparatorRead) { this.fieldSeparatorRead = fieldSeparatorRead; } /** * Get the current field separator for reading. * * @return the field separator */ public char getFieldSeparatorRead() { return fieldSeparatorRead; } /** * Get the current row separator for writing. * * @return the row separator */ public String getRowSeparatorWrite() { return rowSeparatorWrite; } /** * Override the end-of-row marker for writing. The default is null. After * writing the end-of-row marker, a line feed is written (\n or \r\n * depending on the system settings). * * @param rowSeparatorWrite the row separator */ public void setRowSeparatorWrite(String rowSeparatorWrite) { this.rowSeparatorWrite = rowSeparatorWrite; } /** * Set the field delimiter. The default is " (a double quote). * The value 0 means no field delimiter is used. * * @param fieldDelimiter the field delimiter */ public void setFieldDelimiter(char fieldDelimiter) { this.fieldDelimiter = fieldDelimiter; } /** * Get the current field delimiter. * * @return the field delimiter */ public char getFieldDelimiter() { return fieldDelimiter; } /** * Set the escape character (used to escape the field delimiter). The * default is " (a double quote). The value 0 means no escape character is used. * * @param escapeCharacter the escape character */ public void setEscapeCharacter(char escapeCharacter) { this.escapeCharacter = escapeCharacter; } /** * Get the current escape character. * * @return the escape character */ public char getEscapeCharacter() { return escapeCharacter; } /** * Set the line separator. * * @param lineSeparator the line separator */ public void setLineSeparator(String lineSeparator) { this.lineSeparator = lineSeparator; } /** * Set the value that represents NULL. The default is an empty string. * * @param nullString the null */ public void setNullString(String nullString) { this.nullString = nullString; } /** * Get the current null string. * * @return the null string. */ public String getNullString() { return nullString; } class RowIterator implements Iterator { private String[] current; RowIterator() { readNext(); } private void readNext() { current = readRow(); } public boolean hasNext() { return current != null; } public String[] next() { String[] r = current; readNext(); return r; } public void remove() { throw new UnsupportedOperationException(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy