All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.h2.tools.Csv Maven / Gradle / Ivy

There is a newer version: 8.1.2
Show newest version
/*
 * Copyright 2004-2014 H2 Group. Multiple-Licensed under the MPL 2.0,
 * and the EPL 1.0 (http://h2database.com/html/license.html).
 * Initial Developer: H2 Group
 */
package org.h2.tools;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.ArrayList;
import org.h2.api.ErrorCode;
import org.h2.engine.Constants;
import org.h2.engine.SysProperties;
import org.h2.message.DbException;
import org.h2.store.fs.FileUtils;
import org.h2.util.IOUtils;
import org.h2.util.JdbcUtils;
import org.h2.util.New;
import org.h2.util.StringUtils;

/**
 * A facility to read from and write to CSV (comma separated values) files. When
 * reading, the BOM (the byte-order-mark) character 0xfeff at the beginning of
 * the file is ignored.
 *
 * @author Thomas Mueller, Sylvain Cuaz
 */
public class Csv implements SimpleRowSource {

    private String[] columnNames;

    private String characterSet = SysProperties.FILE_ENCODING;
    private char escapeCharacter = '\"';
    private char fieldDelimiter = '\"';
    private char fieldSeparatorRead = ',';
    private String fieldSeparatorWrite = ",";
    private boolean caseSensitiveColumnNames;
    private boolean preserveWhitespace;
    private boolean writeColumnHeader = true;
    private char lineComment;
    private String lineSeparator = SysProperties.LINE_SEPARATOR;
    private String nullString = "";

    private String fileName;
    private Reader input;
    private char[] inputBuffer;
    private int inputBufferPos;
    private int inputBufferStart = -1;
    private int inputBufferEnd;
    private Writer output;
    private boolean endOfLine, endOfFile;

    private int writeResultSet(ResultSet rs) throws SQLException {
        try {
            int rows = 0;
            ResultSetMetaData meta = rs.getMetaData();
            int columnCount = meta.getColumnCount();
            String[] row = new String[columnCount];
            int[] sqlTypes = new int[columnCount];
            for (int i = 0; i < columnCount; i++) {
                row[i] = meta.getColumnLabel(i + 1);
                sqlTypes[i] = meta.getColumnType(i + 1);
            }
            if (writeColumnHeader) {
                writeRow(row);
            }
            while (rs.next()) {
                for (int i = 0; i < columnCount; i++) {
                    Object o;
                    switch (sqlTypes[i]) {
                    case Types.DATE:
                        o = rs.getDate(i + 1);
                        break;
                    case Types.TIME:
                        o = rs.getTime(i + 1);
                        break;
                    case Types.TIMESTAMP:
                        o = rs.getTimestamp(i + 1);
                        break;
                    default:
                        o = rs.getString(i + 1);
                    }
                    row[i] = o == null ? null : o.toString();
                }
                writeRow(row);
                rows++;
            }
            output.close();
            return rows;
        } catch (IOException e) {
            throw DbException.convertIOException(e, null);
        } finally {
            close();
            JdbcUtils.closeSilently(rs);
        }
    }

    /**
     * Writes the result set to a file in the CSV format.
     *
     * @param writer the writer
     * @param rs the result set
     * @return the number of rows written
     */
    public int write(Writer writer, ResultSet rs) throws SQLException {
        this.output = writer;
        return writeResultSet(rs);
    }

    /**
     * Writes the result set to a file in the CSV format. The result set is read
     * using the following loop:
     *
     * 
     * while (rs.next()) {
     *     writeRow(row);
     * }
     * 
* * @param outputFileName the name of the csv file * @param rs the result set - the result set must be positioned before the * first row. * @param charset the charset or null to use the system default charset * (see system property file.encoding) * @return the number of rows written */ public int write(String outputFileName, ResultSet rs, String charset) throws SQLException { init(outputFileName, charset); try { initWrite(); return writeResultSet(rs); } catch (IOException e) { throw convertException("IOException writing " + outputFileName, e); } } /** * Writes the result set of a query to a file in the CSV format. * * @param conn the connection * @param outputFileName the file name * @param sql the query * @param charset the charset or null to use the system default charset * (see system property file.encoding) * @return the number of rows written */ public int write(Connection conn, String outputFileName, String sql, String charset) throws SQLException { Statement stat = conn.createStatement(); ResultSet rs = stat.executeQuery(sql); int rows = write(outputFileName, rs, charset); stat.close(); return rows; } /** * Reads from the CSV file and returns a result set. The rows in the result * set are created on demand, that means the file is kept open until all * rows are read or the result set is closed. *
* If the columns are read from the CSV file, then the following rules are * used: columns names that start with a letter or '_', and only * contain letters, '_', and digits, are considered case insensitive * and are converted to uppercase. Other column names are considered * case sensitive (that means they need to be quoted when accessed). * * @param inputFileName the file name * @param colNames or null if the column names should be read from the CSV * file * @param charset the charset or null to use the system default charset * (see system property file.encoding) * @return the result set */ public ResultSet read(String inputFileName, String[] colNames, String charset) throws SQLException { init(inputFileName, charset); try { return readResultSet(colNames); } catch (IOException e) { throw convertException("IOException reading " + inputFileName, e); } } /** * Reads CSV data from a reader and returns a result set. The rows in the * result set are created on demand, that means the reader is kept open * until all rows are read or the result set is closed. * * @param reader the reader * @param colNames or null if the column names should be read from the CSV * file * @return the result set */ public ResultSet read(Reader reader, String[] colNames) throws IOException { init(null, null); this.input = reader; return readResultSet(colNames); } private ResultSet readResultSet(String[] colNames) throws IOException { this.columnNames = colNames; initRead(); SimpleResultSet result = new SimpleResultSet(this); makeColumnNamesUnique(); for (String columnName : columnNames) { result.addColumn(columnName, Types.VARCHAR, Integer.MAX_VALUE, 0); } return result; } private void makeColumnNamesUnique() { for (int i = 0; i < columnNames.length; i++) { StringBuilder buff = new StringBuilder(); String n = columnNames[i]; if (n == null || n.length() == 0) { buff.append('C').append(i + 1); } else { buff.append(n); } for (int j = 0; j < i; j++) { String y = columnNames[j]; if (buff.toString().equals(y)) { buff.append('1'); j = -1; } } columnNames[i] = buff.toString(); } } private void init(String newFileName, String charset) { this.fileName = newFileName; if (charset != null) { this.characterSet = charset; } } private void initWrite() throws IOException { if (output == null) { try { OutputStream out = FileUtils.newOutputStream(fileName, false); out = new BufferedOutputStream(out, Constants.IO_BUFFER_SIZE); output = new BufferedWriter(new OutputStreamWriter(out, characterSet)); } catch (Exception e) { close(); throw DbException.convertToIOException(e); } } } private void writeRow(String[] values) throws IOException { for (int i = 0; i < values.length; i++) { if (i > 0) { if (fieldSeparatorWrite != null) { output.write(fieldSeparatorWrite); } } String s = values[i]; if (s != null) { if (escapeCharacter != 0) { if (fieldDelimiter != 0) { output.write(fieldDelimiter); } output.write(escape(s)); if (fieldDelimiter != 0) { output.write(fieldDelimiter); } } else { output.write(s); } } else if (nullString != null && nullString.length() > 0) { output.write(nullString); } } output.write(lineSeparator); } private String escape(String data) { if (data.indexOf(fieldDelimiter) < 0) { if (escapeCharacter == fieldDelimiter || data.indexOf(escapeCharacter) < 0) { return data; } } int length = data.length(); StringBuilder buff = new StringBuilder(length); for (int i = 0; i < length; i++) { char ch = data.charAt(i); if (ch == fieldDelimiter || ch == escapeCharacter) { buff.append(escapeCharacter); } buff.append(ch); } return buff.toString(); } private void initRead() throws IOException { if (input == null) { try { InputStream in = FileUtils.newInputStream(fileName); in = new BufferedInputStream(in, Constants.IO_BUFFER_SIZE); input = new InputStreamReader(in, characterSet); } catch (IOException e) { close(); throw e; } } if (!input.markSupported()) { input = new BufferedReader(input); } input.mark(1); int bom = input.read(); if (bom != 0xfeff) { // Microsoft Excel compatibility // ignore pseudo-BOM input.reset(); } inputBuffer = new char[Constants.IO_BUFFER_SIZE * 2]; if (columnNames == null) { readHeader(); } } private void readHeader() throws IOException { ArrayList list = New.arrayList(); while (true) { String v = readValue(); if (v == null) { if (endOfLine) { if (endOfFile || list.size() > 0) { break; } } else { v = "COLUMN" + list.size(); list.add(v); } } else { if (v.length() == 0) { v = "COLUMN" + list.size(); } else if (!caseSensitiveColumnNames && isSimpleColumnName(v)) { v = StringUtils.toUpperEnglish(v); } list.add(v); if (endOfLine) { break; } } } columnNames = new String[list.size()]; list.toArray(columnNames); } private static boolean isSimpleColumnName(String columnName) { for (int i = 0, length = columnName.length(); i < length; i++) { char ch = columnName.charAt(i); if (i == 0) { if (ch != '_' && !Character.isLetter(ch)) { return false; } } else { if (ch != '_' && !Character.isLetterOrDigit(ch)) { return false; } } } if (columnName.length() == 0) { return false; } return true; } private void pushBack() { inputBufferPos--; } private int readChar() throws IOException { if (inputBufferPos >= inputBufferEnd) { return readBuffer(); } return inputBuffer[inputBufferPos++]; } private int readBuffer() throws IOException { if (endOfFile) { return -1; } int keep; if (inputBufferStart >= 0) { keep = inputBufferPos - inputBufferStart; if (keep > 0) { char[] src = inputBuffer; if (keep + Constants.IO_BUFFER_SIZE > src.length) { inputBuffer = new char[src.length * 2]; } System.arraycopy(src, inputBufferStart, inputBuffer, 0, keep); } inputBufferStart = 0; } else { keep = 0; } inputBufferPos = keep; int len = input.read(inputBuffer, keep, Constants.IO_BUFFER_SIZE); if (len == -1) { // ensure bufferPos > bufferEnd // even after pushBack inputBufferEnd = -1024; endOfFile = true; // ensure the right number of characters are read // in case the input buffer is still used inputBufferPos++; return -1; } inputBufferEnd = keep + len; return inputBuffer[inputBufferPos++]; } private String readValue() throws IOException { endOfLine = false; inputBufferStart = inputBufferPos; while (true) { int ch = readChar(); if (ch == fieldDelimiter) { // delimited value boolean containsEscape = false; inputBufferStart = inputBufferPos; int sep; while (true) { ch = readChar(); if (ch == fieldDelimiter) { ch = readChar(); if (ch != fieldDelimiter) { sep = 2; break; } containsEscape = true; } else if (ch == escapeCharacter) { ch = readChar(); if (ch < 0) { sep = 1; break; } containsEscape = true; } else if (ch < 0) { sep = 1; break; } } String s = new String(inputBuffer, inputBufferStart, inputBufferPos - inputBufferStart - sep); if (containsEscape) { s = unEscape(s); } inputBufferStart = -1; while (true) { if (ch == fieldSeparatorRead) { break; } else if (ch == '\n' || ch < 0 || ch == '\r') { endOfLine = true; break; } else if (ch == ' ' || ch == '\t') { // ignore } else { pushBack(); break; } ch = readChar(); } return s; } else if (ch == '\n' || ch < 0 || ch == '\r') { endOfLine = true; return null; } else if (ch == fieldSeparatorRead) { // null return null; } else if (ch <= ' ') { // ignore spaces continue; } else if (lineComment != 0 && ch == lineComment) { // comment until end of line inputBufferStart = -1; while (true) { ch = readChar(); if (ch == '\n' || ch < 0 || ch == '\r') { break; } } endOfLine = true; return null; } else { // un-delimited value while (true) { ch = readChar(); if (ch == fieldSeparatorRead) { break; } else if (ch == '\n' || ch < 0 || ch == '\r') { endOfLine = true; break; } } String s = new String(inputBuffer, inputBufferStart, inputBufferPos - inputBufferStart - 1); if (!preserveWhitespace) { s = s.trim(); } inputBufferStart = -1; // check un-delimited value for nullString return readNull(s); } } } private String readNull(String s) { return s.equals(nullString) ? null : s; } private String unEscape(String s) { StringBuilder buff = new StringBuilder(s.length()); int start = 0; char[] chars = null; while (true) { int idx = s.indexOf(escapeCharacter, start); if (idx < 0) { idx = s.indexOf(fieldDelimiter, start); if (idx < 0) { break; } } if (chars == null) { chars = s.toCharArray(); } buff.append(chars, start, idx - start); if (idx == s.length() - 1) { start = s.length(); break; } buff.append(chars[idx + 1]); start = idx + 2; } buff.append(s.substring(start)); return buff.toString(); } /** * INTERNAL */ @Override public Object[] readRow() throws SQLException { if (input == null) { return null; } String[] row = new String[columnNames.length]; try { int i = 0; while (true) { String v = readValue(); if (v == null) { if (endOfLine) { if (i == 0) { if (endOfFile) { return null; } // empty line continue; } break; } } if (i < row.length) { row[i++] = v; } if (endOfLine) { break; } } } catch (IOException e) { throw convertException("IOException reading from " + fileName, e); } return row; } private static SQLException convertException(String message, Exception e) { return DbException.get(ErrorCode.IO_EXCEPTION_1, e, message).getSQLException(); } /** * INTERNAL */ @Override public void close() { IOUtils.closeSilently(input); input = null; IOUtils.closeSilently(output); output = null; } /** * INTERNAL */ @Override public void reset() throws SQLException { throw new SQLException("Method is not supported", "CSV"); } /** * Override the field separator for writing. The default is ",". * * @param fieldSeparatorWrite the field separator */ public void setFieldSeparatorWrite(String fieldSeparatorWrite) { this.fieldSeparatorWrite = fieldSeparatorWrite; } /** * Get the current field separator for writing. * * @return the field separator */ public String getFieldSeparatorWrite() { return fieldSeparatorWrite; } /** * Override the case sensitive column names setting. The default is false. * If enabled, the case of all column names is always preserved. * * @param caseSensitiveColumnNames whether column names are case sensitive */ public void setCaseSensitiveColumnNames(boolean caseSensitiveColumnNames) { this.caseSensitiveColumnNames = caseSensitiveColumnNames; } /** * Get the current case sensitive column names setting. * * @return whether column names are case sensitive */ public boolean getCaseSensitiveColumnNames() { return caseSensitiveColumnNames; } /** * Override the field separator for reading. The default is ','. * * @param fieldSeparatorRead the field separator */ public void setFieldSeparatorRead(char fieldSeparatorRead) { this.fieldSeparatorRead = fieldSeparatorRead; } /** * Get the current field separator for reading. * * @return the field separator */ public char getFieldSeparatorRead() { return fieldSeparatorRead; } /** * Set the line comment character. The default is character code 0 (line * comments are disabled). * * @param lineCommentCharacter the line comment character */ public void setLineCommentCharacter(char lineCommentCharacter) { this.lineComment = lineCommentCharacter; } /** * Get the line comment character. * * @return the line comment character, or 0 if disabled */ public char getLineCommentCharacter() { return lineComment; } /** * Set the field delimiter. The default is " (a double quote). * The value 0 means no field delimiter is used. * * @param fieldDelimiter the field delimiter */ public void setFieldDelimiter(char fieldDelimiter) { this.fieldDelimiter = fieldDelimiter; } /** * Get the current field delimiter. * * @return the field delimiter */ public char getFieldDelimiter() { return fieldDelimiter; } /** * Set the escape character. The escape character is used to escape the * field delimiter. This is needed if the data contains the field delimiter. * The default escape character is " (a double quote), which is the same as * the field delimiter. If the field delimiter and the escape character are * both " (double quote), and the data contains a double quote, then an * additional double quote is added. Example: *
     * Data: He said "Hello".
     * Escape character: "
     * Field delimiter: "
     * CSV file: "He said ""Hello""."
     * 
* If the field delimiter is a double quote and the escape character is a * backslash, then escaping is done similar to Java (however, only the field * delimiter is escaped). Example: *
     * Data: He said "Hello".
     * Escape character: \
     * Field delimiter: "
     * CSV file: "He said \"Hello\"."
     * 
* The value 0 means no escape character is used. * * @param escapeCharacter the escape character */ public void setEscapeCharacter(char escapeCharacter) { this.escapeCharacter = escapeCharacter; } /** * Get the current escape character. * * @return the escape character */ public char getEscapeCharacter() { return escapeCharacter; } /** * Set the line separator used for writing. This is usually a line feed (\n * or \r\n depending on the system settings). The line separator is written * after each row (including the last row), so this option can include an * end-of-row marker if needed. * * @param lineSeparator the line separator */ public void setLineSeparator(String lineSeparator) { this.lineSeparator = lineSeparator; } /** * Get the line separator used for writing. * * @return the line separator */ public String getLineSeparator() { return lineSeparator; } /** * Set the value that represents NULL. It is only used for non-delimited * values. * * @param nullString the null */ public void setNullString(String nullString) { this.nullString = nullString; } /** * Get the current null string. * * @return the null string. */ public String getNullString() { return nullString; } /** * Enable or disable preserving whitespace in unquoted text. * * @param value the new value for the setting */ public void setPreserveWhitespace(boolean value) { this.preserveWhitespace = value; } /** * Whether whitespace in unquoted text is preserved. * * @return the current value for the setting */ public boolean getPreserveWhitespace() { return preserveWhitespace; } /** * Enable or disable writing the column header. * * @param value the new value for the setting */ public void setWriteColumnHeader(boolean value) { this.writeColumnHeader = value; } /** * Whether the column header is written. * * @return the current value for the setting */ public boolean getWriteColumnHeader() { return writeColumnHeader; } /** * INTERNAL. * Parse and set the CSV options. * * @param options the the options * @return the character set */ public String setOptions(String options) { String charset = null; String[] keyValuePairs = StringUtils.arraySplit(options, ' ', false); for (String pair : keyValuePairs) { if (pair.length() == 0) { continue; } int index = pair.indexOf('='); String key = StringUtils.trim(pair.substring(0, index), true, true, " "); String value = pair.substring(index + 1); char ch = value.length() == 0 ? 0 : value.charAt(0); if (isParam(key, "escape", "esc", "escapeCharacter")) { setEscapeCharacter(ch); } else if (isParam(key, "fieldDelimiter", "fieldDelim")) { setFieldDelimiter(ch); } else if (isParam(key, "fieldSeparator", "fieldSep")) { setFieldSeparatorRead(ch); setFieldSeparatorWrite(value); } else if (isParam(key, "lineComment", "lineCommentCharacter")) { setLineCommentCharacter(ch); } else if (isParam(key, "lineSeparator", "lineSep")) { setLineSeparator(value); } else if (isParam(key, "null", "nullString")) { setNullString(value); } else if (isParam(key, "charset", "characterSet")) { charset = value; } else if (isParam(key, "preserveWhitespace")) { setPreserveWhitespace(Boolean.parseBoolean(value)); } else if (isParam(key, "writeColumnHeader")) { setWriteColumnHeader(Boolean.parseBoolean(value)); } else if (isParam(key, "caseSensitiveColumnNames")) { setCaseSensitiveColumnNames(Boolean.parseBoolean(value)); } else { throw DbException.getUnsupportedException(key); } } return charset; } private static boolean isParam(String key, String... values) { for (String v : values) { if (key.equalsIgnoreCase(v)) { return true; } } return false; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy