org.ttzero.excel.util.CSVUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of eec Show documentation
A fast and lower memory excel write/read tool 一个非POI底层支持流式处理的高效且超低内存的Excel读写工具
The newest version!
/*
 * Copyright (c) 2017-2019, [email protected] All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.ttzero.excel.util;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UncheckedIOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.function.Consumer;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import static org.ttzero.excel.util.ExtBufferedWriter.MIN_INTEGER_CHARS;
import static org.ttzero.excel.util.ExtBufferedWriter.MIN_LONG_CHARS;
import static org.ttzero.excel.util.ExtBufferedWriter.getChars;
import static org.ttzero.excel.util.ExtBufferedWriter.stringSize;
import static org.ttzero.excel.util.FileUtil.exists;
import static org.ttzero.excel.util.FileUtil.mkdir;
import static org.ttzero.excel.util.StringUtil.EMPTY;

/**
 * Comma-Separated Values
 * 
 * RFC 4180 standard
 * Reliance on the standard documented by RFC 4180 can simplify CSV exchange. However,
 * this standard only specifies handling of text-based fields.
 * Interpretation of the text of each field is still application-specific.
 * 

 * RFC 4180 formalized CSV. It defines the MIME type "text/csv", and CSV files that follow its
 * rules should be very widely portable. Among its requirements:
 * 

 * MS-DOS-style lines that end with (CR/LF) characters (optional for the last line).
 * An optional header record (there is no sure way to detect whether it is present,
 * so care is required when importing).
 * Each record "should" contain the same number of comma-separated fields.
 * Any field may be quoted (with double quotes).
 * Fields containing a line-break, double-quote or commas should be quoted. (If they are not,
 * the file will likely be impossible to process correctly).
 * A (double) quote character in a field must be represented by two (double) quote characters.
 * 
 * @author guanquan.wang at 2019-02-12 17:27
 */
public class CSVUtil {
    private static final Logger LOGGER = LoggerFactory.getLogger(CSVUtil.class);

    private CSVUtil() { }
    private static final char QUOTE = '"';
    private static final char HT = 9;
    private static final char LF = 10;
    private static final char CR = 13;
    private static final char COMMA = ',';

    // --- Read

    /**
     * Read csv format file by UTF-8 charset.
     *
     * @param path the csv file path
     * @param clazz the class convert to
     * @param  the result type
     * @return ArrayList of clazz
     * @throws IOException if I/O error occur
     */
    public static  List read(Path path, Class clazz) throws IOException {
        return read(path, clazz, null);
    }

    /**
     * Read csv format file by UTF-8 charset.
     *
     * @param path the csv file path
     * @return ArrayList of string
     * @throws IOException if I/O error occur
     */
    public static List read(Path path) throws IOException {
        return read(path, (Charset) null);
    }

    /**
     * Read csv format file by UTF-8 charset.
     *
     * @param path the csv file path
     * @param separator the separator character
     * @return ArrayList of string
     * @throws IOException if I/O error occur
     */
    public static List read(Path path, char separator) throws IOException {
        return read(path, separator, null);
    }

    /**
     * Read csv format file.
     *
     * @param path the csv file path
     * @param clazz the class convert to
     * @param charset the charset to use for encoding
     * @param  the result type
     * @return ArrayList of clazz
     * @throws IOException if I/O error occur
     */
    public static  List read(Path path, Class clazz, Charset charset) throws IOException {
        throw new UnsupportedOperationException();
    }

    /**
     * Read csv format file.
     *
     * @param path the csv file path
     * @param charset the charset to use for encoding
     * @return ArrayList of string
     * @throws IOException if I/O error occur
     */
    public static List read(Path path, Charset charset) throws IOException {
        return read(path, (char) 0x0, charset);
    }

    /**
     * Read csv format file.
     *
     * @param path the csv file path
     * @param separator the separator character
     * @param charset the charset to use for encoding
     * @return ArrayList of string
     * @throws IOException if I/O error occur
     */
    public static List read(Path path, char separator, Charset charset) throws IOException {
        // Check comma character and column
        // FileNotFoundException will be occurred
        O o = init(path, separator, charset);
        // Empty file
        if (o == null) {
            return Collections.emptyList();
        }

        // Use iterator
        try (RowsIterator iter = new RowsIterator(o, path, o.charset)) {
            List result = new ArrayList<>();
            while (iter.hasNext()) {
                result.add(iter.next());
            }
            return result;
        }
    }

    /**
     * Create a CSV reader.
     *
     * @param path the csv file path
     * @return a stream CSV format reader
     */
    public static Reader newReader(Path path) {
        return newReader(path, null);
    }

    /**
     * Create a CSV reader.
     *
     * @param path the csv file path
     * @param charset the charset to use for encoding
     * @return a stream CSV format reader
     */
    public static Reader newReader(Path path, Charset charset) {
        return new Reader(path, charset);
    }

    /**
     * Create a CSV reader.
     *
     * @param path the csv file path
     * @param separator the separator character
     * @return a stream CSV format reader
     */
    public static Reader newReader(Path path, char separator) {
        Reader reader = newReader(path);
        reader.separator = separator;
        return reader;
    }

    /**
     * Create a CSV reader.
     *
     * @param path the csv file path
     * @param separator the separator character
     * @param charset the charset to use for encoding
     * @return a stream CSV format reader
     */
    public static Reader newReader(Path path, char separator, Charset charset) {
        Reader reader = newReader(path, charset);
        reader.separator = separator;
        return reader;
    }

    // --- Writer

    /**
     * Save vector object as csv format file
     *
     * @param data the vector object to be save
     * @param path the save path
     * @throws IOException if I/O error occur
     */
    public static void writeTo(List data, Path path) throws IOException {
        throw new UnsupportedOperationException();
    }

    /**
     * Create a CSV writer
     *
     * @param path the storage path
     * @return a CSV format writer
     * @throws IOException no permission or other I/O error occur
     */
    public static Writer newWriter(Path path) throws IOException {
        testOrCreate(path);
        return new Writer(path);
    }

    /**
     * Create a CSV writer
     *
     * @param path the storage path
     * @param charset the charset to use for encoding
     * @return a CSV format writer
     * @throws IOException no permission or other I/O error occur
     */
    public static Writer newWriter(Path path, Charset charset) throws IOException {
        testOrCreate(path);
        return new Writer(path, charset);
    }

    /**
     * Create a CSV writer
     *
     * @param path the storage path
     * @param separator the separator character
     * @return a CSV format writer
     * @throws IOException no permission or other I/O error occur
     */
    public static Writer newWriter(Path path, char separator) throws IOException {
        testOrCreate(path);
        Writer writer = new Writer(path);
        writer.separator = separator;
        return writer;
    }

    /**
     * Create a CSV writer
     *
     * @param path the storage path
     * @param separator the separator character
     * @param charset the charset to use for encoding
     * @return a CSV format writer
     * @throws IOException no permission or other I/O error occur
     */
    public static Writer newWriter(Path path, char separator, Charset charset) throws IOException {
        testOrCreate(path);
        Writer writer = new Writer(path, charset);
        writer.separator = separator;
        return writer;
    }

    /**
     * Create a CSV writer
     *
     * @param writer the {@link BufferedWriter}
     * @return a CSV format writer
     */
    public static Writer newWriter(BufferedWriter writer) {
        return new Writer(writer);
    }

    /**
     * Create a CSV writer
     *
     * @param os the {@link OutputStream}
     * @return a CSV format writer
     */
    public static Writer newWriter(OutputStream os) {
        return new Writer(new BufferedWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)));
    }

    private static void testOrCreate(Path path) throws IOException {
        if (!exists(path)) {
            mkdir(path.getParent());
        }
    }

    // --PUBLIC inner reader

    /**
     * A CSV format file reader.
     */
    public static class Reader implements Closeable {

        private RowsIterator iterator;
        private final Path path;
        private final Charset charset;
        private char separator;

        private Reader(Path path, Charset charset) {
            this.path = path;
            this.charset = charset;
            this.separator = (char) 0x0;
        }

        /**
         * Read csv format file.
         *
         * @param clazz the class convert to
         * @param  the result type
         * @return a stream of clazz array
         */
        public  Stream stream(Class clazz) {
            throw new UnsupportedOperationException();
        }

        /**
         * Read csv format file.
         *
         * @return a stream of string array
         * @throws IOException file not exists or read file error.
         */
        public Stream stream() throws IOException {
            // Check comma character and column
            // FileNotFoundException will be occurred
            O o = init(path, separator, charset);
            // Empty file
            if (o == null) {
                return StreamSupport.stream(emptySql, false);
            }

            // Use iterator
            iterator = new RowsIterator(o, path, o.charset);

            return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
                iterator, Spliterator.ORDERED | Spliterator.NONNULL), false);
        }

        /**
         * Read csv format file.
         * there has only one string array in memory, so do not call 'collect' or 'toArray' function direct.
         *
         * @return a stream of sheared string array
         * @throws IOException file not exists or read file error.
         */
        public Stream sharedStream() throws IOException {
            return sharedStream((char) 0x0);
        }

        /**
         * Read csv format file.
         * there has only one string array in memory, so do not call 'collect' or 'toArray' function direct.
         *
         * @param separator the separator character
         * @return a stream of sheared string array
         * @throws IOException file not exists or read file error.
         */
        public Stream sharedStream(char separator) throws IOException {
            // Check comma character and column
            // FileNotFoundException will be occurred
            O o = init(path, separator, charset);
            // Empty file
            if (o == null) {
                return StreamSupport.stream(emptySql, false);
            }

            // Use iterator
            iterator = new SharedRowsIterator(o, path, charset);

            return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
                iterator, Spliterator.ORDERED | Spliterator.NONNULL), false);
        }

        /**
         * Read csv format file.
         *
         * @return an iterator
         * @throws IOException file not exists or read file error.
         */
        public RowsIterator iterator() throws IOException {
            // Check comma character and column
            // FileNotFoundException will be occur
            O o = init(path, separator, charset);
            // Empty file
            if (o == null) {
                return RowsIterator.createEmptyIterator();
            }

            // Use iterator
            return new RowsIterator(o, path, o.charset);
        }

        /**
         * Read csv format file.
         *
         * @return an iterator
         * @throws IOException file not exists or read file error.
         */
        public RowsIterator sharedIterator() throws IOException {
            // Check comma character and column
            // FileNotFoundException will be occur
            O o = init(path, separator, charset);
            // Empty file
            if (o == null) {
                return SharedRowsIterator.createEmptyIterator();
            }

            // Use iterator
            return new SharedRowsIterator(o, path, charset);
        }

        @Override
        public void close() throws IOException {
            if (iterator != null) {
                try {
                    iterator.close();
                } catch (Exception e) {
                    throw new IOException(e);
                }
            }
        }

        // None item iterator
        private final Spliterator emptySql = new Spliterator() {
            @Override
            public boolean tryAdvance(Consumer action) {
                return false;
            }

            @Override
            public Spliterator trySplit() {
                return null;
            }

            @Override
            public long estimateSize() {
                return 0;
            }

            @Override
            public int characteristics() {
                return 0;
            }
        };
    }

    // -- PRIVATE inner function

    private static class O {
        int offset, line;
        String value;
        boolean newLine;
        Charset charset;
        O(int offset) { this.offset = offset; }
    }

    /**
     * Rows iterator
     */
    public static class RowsIterator implements Closeable, Iterator {
        private int column;
        private final char comma;
        private BufferedReader reader;
        private char[] chars;
        private int offset;
        private int i, _i;
        private int n;
        String[] nextRow;
        private static final int length = 8192;
        private O o;
        boolean EOF, load;

        RowsIterator() {
            this.comma = COMMA;
        }

        RowsIterator(O o, Path path, Charset charset) throws IOException {
            this.column = o.offset;
            this.comma = o.value.charAt(0);
            this.o = o;
            // Default charset UTF-8
            reader = charset != null ? Files.newBufferedReader(path, charset) : Files.newBufferedReader(path);
            // Ignore the Byte-order mark (BOM)
            if (o.line > 0) {
                reader.skip(o.line);
                o.line = 0;
            }
            chars = new char[length];
            nextRow = new String[column];
            this.offset = o.offset = 0;
            load = true;
        }

        @Override
        public boolean hasNext() {
            if (EOF) return false;
            try {
                for ( ; ; ) {
                    if (load) {
                        n = reader.read(chars, offset, length - offset);
                        // EOF
                        if (n <= 0) {
                            EOF = true;
                            // End of {comma}
                            if (chars[o.offset - 1] == comma) {
                                // Contain more than standard comma-separated fields
                                if (i == column) {
                                    String[] _array = new String[++column];
                                    System.arraycopy(nextRow, 0, _array, 0, column - 1);
                                    nextRow = _array;
                                }
                                nextRow[i++] = EMPTY;
                                _i = i;
                            }
                            return nextRow[0] != null;
                        }
                        n += offset;
                        o.offset = 0;
                        load = false;
                    }
                    // Parse a block characters
                    while (parse(chars, n, o, comma)) {
                        offset = o.offset;
                        // Contain more than standard comma-separated fields
                        if (i == column) {
                            String[] _array = new String[++column];
                            System.arraycopy(nextRow, 0, _array, 0, column - 1);
                            nextRow = _array;
                        }
                        nextRow[i++] = o.value;
                        _i = i;
                        // End of block
                        if (offset >= n) {
                            // An integral row
                            if (o.newLine) {
                                o.line++;
                                // Ignore empty row
                                if ((i > 1 || nextRow[0] != null)) {
                                    // Line end of '{comma}'
                                    if (o.value == null) nextRow[i - 1] = EMPTY;
                                    i = 0;
                                    load = true;
                                    offset = 0;
                                    return load;
                                }
                            }
                            break;
                        }
                        if (!o.newLine) continue;
                        // Unquoted (CR/LF) characters
                        o.line++;
                        // Ignore empty row
                        if ((i > 1 || nextRow[0] != null)) {
                            // Line end of '{comma}'
                            if (o.value == null) nextRow[i - 1] = EMPTY;
                            i = 0;
                            return true;
                        }
                        i = 0;
                    }
                    load = true;
                    // Move the last character to header
                    if (offset < n) {
                        System.arraycopy(chars, offset, chars, 0, offset = n - offset);
                    } else offset = 0;
                }
            } catch (IOException e) {
                throw new UncheckedIOException(e);
            }
        }

        @Override
        public String[] next() {
            if (nextRow[0] != null || hasNext()) {
                String[] next = Arrays.copyOf(nextRow, _i);
                nextRow[0] = null;
                return next;
            } else {
                throw new NoSuchElementException();
            }
        }

        @Override
        public void close() throws IOException {
            if (reader != null) {
                reader.close();
            }
        }

        static RowsIterator createEmptyIterator() {
            RowsIterator iterator = new RowsIterator();
            iterator.EOF = true;
            iterator.nextRow = new String[0];
            return iterator;
        }
    }

    /**
     * Shared Row iterator
     */
    public static class SharedRowsIterator extends RowsIterator {
        /*
         A flag to mark the next row is ready.
         */
        private boolean produced;

        protected SharedRowsIterator() {
            super();
        }

        SharedRowsIterator(O o, Path path, Charset charset) throws IOException {
            super(o, path, charset);
        }

        @Override
        public boolean hasNext() {
            if (produced) return true;
            nextRow[0] = null;
            return produced = super.hasNext();
        }

        @Override
        public String[] next() {
            if (produced || hasNext()) {
                produced = false;
                return nextRow;
            } else {
                throw new NoSuchElementException();
            }
        }

        /**
         * Retain current row data
         */
        public void retain() {
            produced = true;
        }
    }

    /**
     * Check comma character and column
     *
     * @param path the csv file path
     * @param separator the separator character
     * @param charset the charset to use for encoding
     * @return comma character and column size
     */
    private static O init(Path path, char separator, Charset charset) throws IOException {
        // Test charset
        Charset bom = charsetTest(path);
        // Default use UTF-8 charset
        if (bom == null && charset == null) {
            charset = StandardCharsets.UTF_8;
        }
        if (bom != null) {
            if (charset == null) {
                charset = bom;
                // Print a warring log or reset charset
            } else if (!charset.equals(bom)) {
                LOGGER.warn("Maybe the charset is " + bom);
            }
        }

        try (BufferedReader reader = Files.newBufferedReader(path, charset)) {
            int n = 0; // read 10 lines
            String[] lines = new String[10];
            String s;
            while ((s = reader.readLine()) != null && n < lines.length) {
                if (!s.isEmpty()) {
                    lines[n++] = s;
                }
            }

            // Empty file
            if (lines[0] == null || lines[0].isEmpty()) {
                return null;
            }
            // No enough information to judge the separator
            if (n < 10) {
                LOGGER.warn("No enough information to judge the separator.");
            }

            // USA/UK CSV file almost use ',' or '\t'
            // European CSV file almost use ';'
            String[] commas = separator == 0
                ? new String[] { String.valueOf(COMMA), String.valueOf(HT), ";" } : new String[] { String.valueOf(separator)};
            int[][] columns = new int[commas.length][n];
            for (int i = 0; i < commas.length; i++) {
                for (int j = 0; j < n; j++) {
                    columns[i][j] = lines[j].length() - lines[j].replace(commas[i], EMPTY).length();
                }
            }

            // Find the most comma character
            int[] nc = new int[commas.length];
            for (int i = 0; i < columns.length; i++) {
                Map c = new HashMap<>();
                for (int j : columns[i]) {
                    if (j == 0) continue;
                    Integer co = c.get(j);
                    c.put(j, co != null ? co + 1 : 1);
                }
                if (c.isEmpty()) continue;

                if (c.size() == 1) {
                    Map.Entry entry = c.entrySet().iterator().next();
                    if (entry.getKey() > 65535) {
                        throw new IOException("Too many columns occur. Max columns 65535 but has " + entry.getKey());
                    }
                    // there only read 10 lines, 4-bits be used
                    nc[i] = (entry.getKey() << 4) + entry.getValue();
                } else {
                    int mv = 0, mk = 0;
                    for (Map.Entry entry : c.entrySet()) {
                        if (entry.getValue() > mv || entry.getValue() == mv && entry.getKey() > mk) {
                            mv = entry.getValue();
                            mk = entry.getKey();
                            nc[i] = (entry.getKey() << 4) + entry.getValue();
                        }
                    }
                }
            }

            O o = new O(0);
            o.line = bom != null ? 1 : 0;
            o.charset = charset;
            n = 0;
            // Find the final comma and column
            for (int i = 0; i < nc.length; i++) {
                int size = nc[i] >>> 4;
                if (size++ == 0) continue;
                int count = nc[i] & 0x0F;
                if (count > n) {
                    n = count;
                    o.offset = size;
                    o.value = commas[i];
                } else if (count == n && size > o.offset) {
                    o.offset = size;
                    o.value = commas[i];
                }
            }

            // Comma character not ',', '\t' or ';'
            if (o.offset == 0) {
                int count = 0;
                for (int c : nc) {
                    count += c;
                }
                // All top 10 row has only one word
                if (count == 0) {
                    o.offset = 1;
                    o.value = commas[0];
                } else {
                    throw new IOException("Unknown comma character, Please specify a separator.");
                }
            }
            return o;
        }
    }

    private static Charset charsetTest(Path path) throws IOException {
        Charset bom = null;
        try (InputStream is = Files.newInputStream(path)) {
            byte[] header = new byte[8];
            int n = is.read(header);
            if (n < 1) return null;
            // 16-bit Unicode
            if (n >= 2) {
                // little-endian byte order
                if ((header[0] & 0xFF) == 0xFF && (header[1] & 0xFF) == 0xFE) {
                    bom = StandardCharsets.UTF_16LE; // UTF-16/UCS-2
                    // 32-bit Unicode
                    if (n >= 4 && header[2] == 0x0 && header[3] == 0x0) {
                        bom = Charset.forName("UTF-32LE"); // UTF-32/UCS-4
                    }
                }
                // big-endian byte order
                else if ((header[0] & 0xFF) == 0xFE && (header[1] & 0xFF) == 0xFF) {
                    bom = StandardCharsets.UTF_16BE;
                }
            }
            // 8-bit Unicode
            if (n >= 3 && (header[0] & 0xFF) == 0xEF && (header[1] & 0xFF) == 0xBB && (header[2] & 0xFF) == 0xBF) {
                bom = StandardCharsets.UTF_8; // UTF-8
            }
            // big-endian byte order UTF-32/UCS-4
            if (n >= 4 && (header[0] & 0xFF) == 0x0 && (header[1] & 0xFF) == 0x0
                && (header[2] & 0xFF) == 0xFE && (header[3] & 0xFF) == 0xFF) {
                bom = Charset.forName("UTF-32BE");
            }
        }
        return bom;
    }

    /**
     * Parse char array
     *
     * @param chars the data array
     * @param len the size of data
     * @param o a cache object
     * @param comma the separate character
     * @return true if the char array has more integral string
     */
    private static boolean parse(char[] chars, int len, O o, char comma) {
        int offset = o.offset, i = offset, iq = -1, qn = 0;
        // the first character is '"'
        boolean quoted = chars[i] == QUOTE
            // an integral string
            , integral = false
            // if data size less than block length
            , last_block = len < chars.length;

        if (quoted) i++;
        for (; i < len; i++) {
            char c = chars[i];
            if (c == QUOTE) {
                if (i >= len - 1) {
                    integral = true;
                    i++;
                    break;
                }
                if (quoted) {
                    if (chars[i + 1] == QUOTE) {
                        iq = ++i;
                        continue;
                    } else if (chars[i + 1] != comma && chars[i + 1] != LF && !(chars[i + 1] == CR && i < len - 2 && chars[i + 2] == LF)) {
                        throw new RuntimeException("line-number: " + o.line + " (zero-base). Comma-separated values" +
                            " format error.\nInvalid char between encapsulated token and delimiter.");
                    }
                } else qn++;
                if (!last_block && chars[i + 1] != comma && chars[i + 1] != LF
                    && !(chars[i + 1] == CR && i < len - 2 && chars[i + 2] == LF)) {
                    throw new RuntimeException("line-number: " + o.line + " (zero-base). Comma-separated values " +
                        "format error.\nA (double) quote character in a field must be represented by two (double) quote characters.");
                }
                i++;
                if (qn == 0) {
                    integral = true;
                    break;
                }
            } else if (c == comma || c == LF) {
                if (!quoted) {
                    integral = true;
                    break;
                }
            }
        }

        if (!integral && last_block) {
            if (!quoted) integral = true;
            else throw new RuntimeException("line-number: " + o.line + " (zero-base). Comma-separated values " +
                "format error.\nEOF reached before encapsulated token finished.");
        }

        if (integral) {
            if (quoted) offset++;
            // an integral string
            if (offset == i && chars[offset] == LF || offset - i == 1 && chars[offset] == CR && chars[i] == LF)
                o.value = null;
            else {
                o.value = i - offset > 0
                    ? trim(chars, offset, quoted || chars[i - 1] == CR ? i - offset - 1 : i - offset, iq) : EMPTY;
            }
            if (i < len - 1 && chars[i] == CR && chars[i + 1] == LF) {
                i += 1;
                o.newLine = true;
            } else if (i < len) {
                o.newLine = chars[i] == LF;
            } else o.newLine = false;
            offset = i + 1;

            // reset the offset
            o.offset = offset;
        }
        return integral;
    }

    /**
     * Returns a string whose value is this string, with any leading and trailing
     * whitespace removed and double-quoted character convert to single-quoted character.
     *
     * @param chars a block data
     * @param offset initial offset of the block data.
     * @param size length of the integral string
     * @return string
     */
    private static String trim(char[] chars, int offset, int size, int iq) {
        if (size > 0) {
            int len = offset + size;
            if (iq >= 0) {
                System.arraycopy(chars, iq, chars, iq - 1, len - iq);
                len--;
                for (int i = iq - 1; i > offset; i--) {
                    if (chars[i] == QUOTE && chars[i - 1] == QUOTE) {
                        System.arraycopy(chars, i, chars, i - 1, len - i);
                        i--;
                        len--;
                    }
                }
            }
            return new String(chars, offset, len - offset);
        }
        return EMPTY;
    }

    // --- PUBLIC inner Writer

    /**
     * A CSV format file writer.
     */
    public static class Writer implements Closeable {

        private final BufferedWriter writer;
        // Comma separator character, default ','
        private char separator = COMMA;
        private int column;
        // The column index
        private int i;
        private char[] cb;
        private int offset;
        private final static int length = 8192;

        /**
         * Line separator string.  This is the value of the line.separator
         * property at the moment that the stream was created.
         */
        private final char[] lineSeparator = System.lineSeparator().toCharArray();

        /**
         * Create a CSV format writer
         *
         * @param path the storage path
         * @throws IOException If an I/O error occurs
         */
        private Writer(Path path) throws IOException {
            this(path, StandardCharsets.UTF_8);
        }

        /**
         * Create a CSV format writer
         *
         * @param path the storage path
         * @param charset the charset to use for encoding
         * @throws IOException If an I/O error occurs
         */
        private Writer(Path path, Charset charset) throws IOException {
            this.writer = Files.newBufferedWriter(path, charset);
            init();
        }

        /**
         * Create a CSV format writer
         *
         * @param writer the output
         */
        private Writer(BufferedWriter writer) {
            this.writer = writer;
            init();
        }

        private void init() {
            cb = new char[length];
        }

        /**
         * Write csv bytes with BOM
         *
         * Note: This property must be set before writing, otherwise it will be ignored
         *
         * @return current writer
         */
        public Writer writeWithBom() {
            if (offset == 0 && i == 0 && column == 0) {
                // Write UTF BOM
                cb[offset++] = '\uFEFF';
            }
            return this;
        }

        /**
         * Writes a single character
         *
         * @param c int specifying a character to be written
         * @throws IOException If an I/O error occurs
         */
        public void writeChar(char c) throws IOException {
            test();
            if (c == QUOTE) {
                checkBound(4);
                cb[offset++] = QUOTE;
                cb[offset++] = QUOTE;
                cb[offset++] = QUOTE;
                cb[offset++] = QUOTE;
            } else if (c == LF || c == HT || c == separator || c == COMMA) {
                checkBound(3);
                cb[offset++] = QUOTE;
                cb[offset++] = c;
                cb[offset++] = QUOTE;
            } else {
                checkBound(1);
                cb[offset++] = c;
            }
        }

        /**
         * Writes a boolean value, will be convert to boolean string upper case
         *
         * @param b the int value to be written
         * @throws IOException If an I/O error occurs
         */
        public void write(boolean b) throws IOException {
            test();
            if (b) {
                checkBound(4);
                cb[offset++] = 'T';
                cb[offset++] = 'R';
                cb[offset++] = 'U';
                cb[offset++] = 'E';
            } else {
                checkBound(5);
                cb[offset++] = 'F';
                cb[offset++] = 'A';
                cb[offset++] = 'L';
                cb[offset++] = 'S';
                cb[offset++] = 'E';
            }
        }

        /**
         * Writes a int value, will be convert to int string
         *
         * @param n the int value to be written
         * @throws IOException If an I/O error occurs
         */
        public void write(int n) throws IOException {
            test();
            toChars(n);
        }

        /**
         * Writes a long value, will be convert to long string
         *
         * @param l the long value to be written
         * @throws IOException If an I/O error occurs
         */
        public void write(long l) throws IOException {
            test();
            toChars(l);
        }

        /**
         * Writes a single-precision floating-point value, will be convert to single-precision string
         *
         * @param f the single-precision floating-point to be written
         * @throws IOException If an I/O error occurs
         */
        public void write(float f) throws IOException {
            test();
            String fs = Float.toString(f);
            int len = fs.length();
            checkBound(len);
            fs.getChars(0, len, cb, offset);
            offset += len;
        }

        /**
         * Writes a double-precision floating-point value, will be convert to double-precision string
         *
         * @param d the double-precision floating-point to be written
         * @throws IOException If an I/O error occurs
         */
        public void write(double d) throws IOException {
            test();
            String ds = Double.toString(d);
            int len = ds.length();
            checkBound(len);
            ds.getChars(0, len, cb, offset);
            offset += len;
        }

        /**
         * Compression and escape char sequence
         * - line-break, double-quote or commas should be quoted.
         * - A (double) quote character in a field must be represented by two (double) quote characters.
         *
         * @param text the string to be written
         * @throws IOException If an I/O error occurs
         */
        public void write(String text) throws IOException {
            if (text != null && !text.isEmpty()) write(text.toCharArray());
            else writeEmpty();
        }

        /**
         * Compression and escape char sequence
         * - line-break, double-quote or commas should be quoted.
         * - A (double) quote character in a field must be represented by two (double) quote characters.
         *
         * @param chars the char sequence to be written
         * @throws IOException If an I/O error occurs
         */
        public void write(char[] chars) throws IOException {
            write(chars, 0, chars.length);
        }

        /**
         * Compression and escape char sequence
         * - line-break, double-quote or commas should be quoted.
         * - A (double) quote character in a field must be represented by two (double) quote characters.
         *
         * @param chars the char sequence to be written
         * @param offset the offset index
         * @param size size of characters
         * @throws IOException If an I/O error occurs
         */
        public void write(char[] chars, int offset, int size) throws IOException {
            test();
            int i = 0;
            int last = offset;
            boolean quoted = false, shouldBeQuoted = false;

            for ( ; i < size; ) {
                char c = chars[i++];

                // A (double) quote character in a field must be represented
                // by two (double) quote characters.
                if (c == QUOTE) {
                    quoted = true;
                    if (last == offset) {
                        checkBound(1);
                        cb[this.offset++] = QUOTE;
                    }
                    checkBound(i - last + 1);
                    System.arraycopy(chars, last, cb, this.offset, i - last);
                    this.offset += (i - last);
                    cb[this.offset++] = QUOTE;
                    last = i;
                }

                else if (c == LF || c == HT || c == separator || c == COMMA) {
                    shouldBeQuoted = true;
                }

            }

            if (quoted) {
                checkBound(i - last + 1);
                System.arraycopy(chars, last, cb, this.offset, i - last);
                this.offset += (i - last);
                cb[this.offset++] = QUOTE;
            }
            else if (shouldBeQuoted) {
                checkBound(size + 2);
                cb[this.offset++] = QUOTE;
                System.arraycopy(chars, offset, cb, this.offset, size);
                this.offset += size;
                cb[this.offset++] = QUOTE;
            }
            else {
                checkBound(size);
                System.arraycopy(chars, offset, cb, this.offset, size);
                this.offset += size;
            }

        }

        /**
         * Writes a empty column.
         * @throws IOException If an I/O error occurs
         */
        public void writeEmpty() throws IOException {
            test();
        }

        /**
         * Writes a line separator.  The line separator string is defined by the
         * system property line.separator, and is not necessarily a single
         * newline ('\n') character.
         *
         * @exception  IOException  If an I/O error occurs
         */
        public void newLine() throws IOException {
            checkBound(lineSeparator.length);
            System.arraycopy(lineSeparator, 0, cb, offset, lineSeparator.length);
            offset += lineSeparator.length;
            if (column == 0) column = i;
            i = 0;
        }

        /**
         * Test the column index
         *
         * @return true if first column
         */
        private boolean test() throws IOException {
            boolean first = i == 0;
            i++;
            if (column > 0 && i > column) {
                // FIXME maybe throw an exception
                LOGGER.warn("Each record should contain the same number of comma-separated fields.");
            }
            if (!first) {
                checkBound(1);
                cb[offset++] = separator;
            }
            return first;
        }

        private void flush() throws IOException {
            writer.write(cb, 0, offset);
            offset = 0;
        }

        private void checkBound(int size) throws IOException {
            if (offset + size > length) {
                flush();
            }
        }

        private void toChars(int i) throws IOException {
            if (i == Integer.MIN_VALUE) {
                checkBound(MIN_INTEGER_CHARS.length);
                System.arraycopy(MIN_INTEGER_CHARS, 0, cb, offset, MIN_INTEGER_CHARS.length);
                offset += MIN_INTEGER_CHARS.length;
            } else {
                int size = stringSize(i);
                checkBound(size);
                getChars(i, offset += size, cb);
            }
        }

        private void toChars(long i) throws IOException {
            if (i == Long.MIN_VALUE) {
                checkBound(MIN_LONG_CHARS.length);
                System.arraycopy(MIN_LONG_CHARS, 0, cb, offset, MIN_LONG_CHARS.length);
                offset += MIN_LONG_CHARS.length;
            } else {
                int size = stringSize(i);
                checkBound(size);
                getChars(i, offset += size, cb);
            }
        }

        @Override
        public void close() throws IOException {
            if (writer != null) {
                if (offset > 0) {
                    flush();
                }
                writer.close();
            }
        }
    }
}