All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.jackson.dataformat.csv.impl.CsvWriter Maven / Gradle / Ivy

There is a newer version: 8.1.2
Show newest version
package com.fasterxml.jackson.dataformat.csv.impl;

import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.dataformat.csv.CsvGenerator;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;

import java.io.IOException;
import java.io.Writer;
import java.util.Arrays;

/**
 * Low-level helper class that handles actual output of CSV, purely
 * based on indexes given without worrying about reordering etc.
 */
public class CsvWriter
{
    /* As an optimization we try coalescing short writes into
     * buffer; but pass longer directly.
     */
    final protected static int SHORT_WRITE = 32;

    /* Also: only do check for optional quotes for short
     * values; longer ones will always be quoted.
     */
    final protected static int MAX_QUOTE_CHECK = 24;
    
    final protected BufferedValue[] NO_BUFFERED = new BufferedValue[0];

    private final static char[] TRUE_CHARS = "true".toCharArray();
    private final static char[] FALSE_CHARS = "false".toCharArray();
    
    /*
    /**********************************************************
    /* Configuration
    /**********************************************************
     */

    final protected IOContext _ioContext;

    /**
     * Underlying {@link Writer} used for output.
     */
    final protected Writer _out;
    
    final protected char _cfgColumnSeparator;

    final protected int _cfgQuoteCharacter;
    
    final protected char[] _cfgLineSeparator;

    final protected int _cfgLineSeparatorLength;

    protected int _cfgMaxQuoteCheckChars;
    
    /**
     * Lowest-valued character that is safe to output without using
     * quotes around value
     */
    final protected int _cfgMinSafeChar;

    protected int _csvFeatures;

    /**
     * Marker flag used to determine if to do optimal (aka "strict") quoting
     * checks or not (looser conservative check)
     * 
     * @since 2.4
     */
    protected boolean _cfgOptimalQuoting;

    /**
     * @since 2.4
     */
    protected boolean _cfgIncludeMissingTail;

    /*
    /**********************************************************
    /* Output state
    /**********************************************************
     */

    /**
     * @since 2.4
     */
    protected int _columnCount;
    
    /**
     * Index of column we expect to write next
     */
    protected int _nextColumnToWrite = 0;

    /**
     * And if output comes in shuffled order we will need to do 
     * bit of ordering.
     */
    protected BufferedValue[] _buffered = NO_BUFFERED;

    /**
     * Index of the last buffered value
     */
    protected int _lastBuffered = -1;
    
    /*
    /**********************************************************
    /* Output buffering, low-level
    /**********************************************************
     */
    
    /**
     * Intermediate buffer in which contents are buffered before
     * being written using {@link #_out}.
     */
    protected char[] _outputBuffer;

    /**
     * Flag that indicates whether the _outputBuffer is recyclable (and
     * needs to be returned to recycler once we are done) or not.
     */
    protected boolean _bufferRecyclable;
    
    /**
     * Pointer to the next available byte in {@link #_outputBuffer}
     */
    protected int _outputTail = 0;

    /**
     * Offset to index after the last valid index in {@link #_outputBuffer}.
     * Typically same as length of the buffer.
     */
    protected final int _outputEnd;
    
    /**
     * Let's keep track of how many bytes have been output, may prove useful
     * when debugging. This does not include bytes buffered in
     * the output buffer, just bytes that have been written using underlying
     * stream writer.
     */
    protected int _charsWritten;
    
    /*
    /**********************************************************
    /* Construction, (re)configuration
    /**********************************************************
     */

    public CsvWriter(IOContext ctxt, int csvFeatures, Writer out, CsvSchema schema)
    {
        _ioContext = ctxt;
        _csvFeatures = csvFeatures;
        _cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(csvFeatures);
        _cfgIncludeMissingTail = !CsvGenerator.Feature.OMIT_MISSING_TAIL_COLUMNS.enabledIn(_csvFeatures);
        
        _outputBuffer = ctxt.allocConcatBuffer();
        _bufferRecyclable = true;
        _outputEnd = _outputBuffer.length;
        _out = out;

        _cfgColumnSeparator = schema.getColumnSeparator();
        _cfgQuoteCharacter = schema.getQuoteChar();
        _cfgLineSeparator = schema.getLineSeparator();
        _cfgLineSeparatorLength = (_cfgLineSeparator == null) ? 0 : _cfgLineSeparator.length;
        _columnCount = schema.size();

        _cfgMinSafeChar = _calcSafeChar();

        _cfgMaxQuoteCheckChars = MAX_QUOTE_CHECK;
    }

    @Deprecated // since 2.4, remove in 2.5
    public CsvWriter(IOContext ctxt, Writer out,
            char columnSeparator, char quoteChar, char[] linefeed)
    {
        this(ctxt, CsvGenerator.Feature.collectDefaults(),
                out, columnSeparator, quoteChar, linefeed);
    }

    @Deprecated // since 2.4, remove in 2.5
    public CsvWriter(IOContext ctxt, int csvFeatures, Writer out,
            char columnSeparator, char quoteChar, char[] linefeed)
    {
        _ioContext = ctxt;
        _csvFeatures = csvFeatures;
        _cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(csvFeatures);
        _cfgIncludeMissingTail = !CsvGenerator.Feature.OMIT_MISSING_TAIL_COLUMNS.enabledIn(_csvFeatures);
        
        _outputBuffer = ctxt.allocConcatBuffer();
        _bufferRecyclable = true;
        _outputEnd = _outputBuffer.length;
        _out = out;

        _cfgColumnSeparator = columnSeparator;
        _cfgQuoteCharacter = quoteChar;
        _cfgLineSeparator = linefeed;
        _cfgLineSeparatorLength = linefeed.length;

        _cfgMinSafeChar = _calcSafeChar();

        _cfgMaxQuoteCheckChars = MAX_QUOTE_CHECK;

        // not sure how this would be figured out so...
        _columnCount = -1;    
    }

    public CsvWriter(CsvWriter base, CsvSchema newSchema)
    {
        _ioContext = base._ioContext;
        _csvFeatures = base._csvFeatures;
        _cfgOptimalQuoting = base._cfgOptimalQuoting;
        _cfgIncludeMissingTail = base._cfgIncludeMissingTail;

        _outputBuffer = base._outputBuffer;
        _bufferRecyclable = base._bufferRecyclable;
        _outputEnd = base._outputEnd;
        _out = base._out;
        _cfgMaxQuoteCheckChars = base._cfgMaxQuoteCheckChars;

        _cfgColumnSeparator = newSchema.getColumnSeparator();
        _cfgQuoteCharacter = newSchema.getQuoteChar();
        _cfgLineSeparator = newSchema.getLineSeparator();
        _cfgLineSeparatorLength = _cfgLineSeparator.length;
        _cfgMinSafeChar = _calcSafeChar();
        _columnCount = newSchema.size();
    }  
    
    private final int _calcSafeChar()
    {
        // note: quote char may be -1 to signify "no quoting":
        int min = Math.max(_cfgColumnSeparator, _cfgQuoteCharacter);
        for (int i = 0; i < _cfgLineSeparatorLength; ++i) {
            min = Math.max(min, _cfgLineSeparator[i]);
        }
        return min+1;
    }

    public CsvWriter withSchema(CsvSchema schema) {
        return new CsvWriter(this, schema);
    }

    public CsvWriter setFeatures(int feat) {
        if (feat != _csvFeatures) {
            _csvFeatures = feat;
            _cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(feat);
        }
        return this;
    }
    
    /*
    /**********************************************************
    /* Read-access to output state
    /**********************************************************
     */
    
    public Object getOutputTarget() {
        return _out;
    }

    public int nextColumnIndex() {
        return _nextColumnToWrite;
    }
    
    /*
    /**********************************************************
    /* Writer API, writes from generator
    /**********************************************************
     */

    public final void write(int columnIndex, String value) throws IOException
    {
        // easy case: all in order
        if (columnIndex == _nextColumnToWrite) {
            appendValue(value);
            ++_nextColumnToWrite;
            return;
        }
        _buffer(columnIndex, BufferedValue.buffered(value));
    }

    public final void write(int columnIndex, char[] ch, int offset, int len) throws IOException
    {
        // !!! TODO: optimize
        write(columnIndex, new String(ch, offset, len));
    }
    
    public final void write(int columnIndex, int value) throws IOException
    {
        // easy case: all in order
        if (columnIndex == _nextColumnToWrite) {
            appendValue(value);
            ++_nextColumnToWrite;
            return;
        }
        _buffer(columnIndex, BufferedValue.buffered(value));
    }

    public final void write(int columnIndex, long value) throws IOException
    {
        // easy case: all in order
        if (columnIndex == _nextColumnToWrite) {
            appendValue(value);
            ++_nextColumnToWrite;
            return;
        }
        _buffer(columnIndex, BufferedValue.buffered(value));
    }

    public final void write(int columnIndex, float value) throws IOException
    {
        // easy case: all in order
        if (columnIndex == _nextColumnToWrite) {
            appendValue(value);
            ++_nextColumnToWrite;
            return;
        }
        _buffer(columnIndex, BufferedValue.buffered(value));
    }

    public final void write(int columnIndex, double value) throws IOException
    {
        // easy case: all in order
        if (columnIndex == _nextColumnToWrite) {
            appendValue(value);
            ++_nextColumnToWrite;
            return;
        }
        _buffer(columnIndex, BufferedValue.buffered(value));
    }


    public final void write(int columnIndex, boolean value) throws IOException
    {
        // easy case: all in order
        if (columnIndex == _nextColumnToWrite) {
            appendValue(value);
            ++_nextColumnToWrite;
            return;
        }
        _buffer(columnIndex, BufferedValue.buffered(value));
    }
    
    public final void writeColumnName(String name) throws IOException
    {
        appendValue(name);
        ++_nextColumnToWrite;
    }

    public void endRow() throws IOException
    {
        // First things first; any buffered?
        if (_lastBuffered >= 0) {
            final int last = _lastBuffered;
            _lastBuffered = -1;
            for (; _nextColumnToWrite <= last; ++_nextColumnToWrite) {
                BufferedValue value = _buffered[_nextColumnToWrite];
                if (value != null) {
                    _buffered[_nextColumnToWrite] = null;
                    value.write(this);
                } else if (_nextColumnToWrite > 0) { // ) {
                    // note: write method triggers prepending of separator; but for missing
                    // values we need to do it explicitly.
                    appendColumnSeparator();
                } 
            }
        } else if (_nextColumnToWrite <= 0) { // empty line; do nothing
            return;
        }
        // Any missing values?
        if (_nextColumnToWrite < _columnCount) {
            if (_cfgIncludeMissingTail) {
                do {
                    appendColumnSeparator();
                } while (++_nextColumnToWrite < _columnCount);
            }
        }
        // write line separator
        _nextColumnToWrite = 0;
        if ((_outputTail + _cfgLineSeparatorLength) > _outputEnd) {
            _flushBuffer();
        }
        System.arraycopy(_cfgLineSeparator, 0, _outputBuffer, _outputTail, _cfgLineSeparatorLength);
        _outputTail += _cfgLineSeparatorLength;
    }
    
    /*
    /**********************************************************
    /* Writer API, writes via buffered values
    /**********************************************************
     */

    protected void appendValue(String value) throws IOException
    {
        if (_outputTail >= _outputEnd) {
            _flushBuffer();
        }
        if (_nextColumnToWrite > 0) {
            appendColumnSeparator();
        }
        /* First: determine if we need quotes; simple heuristics;
         * only check for short Strings, stop if something found
         */
        final int len = value.length();
        if (_mayNeedQuotes(value, len)) {
            _writeQuoted(value);
        } else {
            writeRaw(value);
        }
        
    }
    
    protected void appendValue(int value) throws IOException
    {
        // up to 10 digits and possible minus sign, leading comma
        if ((_outputTail + 12) > _outputTail) {
            _flushBuffer();
        }
        if (_nextColumnToWrite > 0) {
            _outputBuffer[_outputTail++] = _cfgColumnSeparator;
        }
        _outputTail = NumberOutput.outputInt(value, _outputBuffer, _outputTail);
    }

    protected void appendValue(long value) throws IOException
    {
        // up to 20 digits, minus sign, leading comma
        if ((_outputTail + 22) > _outputTail) {
            _flushBuffer();
        }
        if (_nextColumnToWrite > 0) {
            _outputBuffer[_outputTail++] = _cfgColumnSeparator;
        }
        _outputTail = NumberOutput.outputLong(value, _outputBuffer, _outputTail);
    }

    protected void appendValue(float value) throws IOException
    {
        String str = NumberOutput.toString(value);
        final int len = str.length();
        if ((_outputTail + len) >= _outputTail) { // >= to include possible comma too
            _flushBuffer();
        }
        if (_nextColumnToWrite > 0) {
            _outputBuffer[_outputTail++] = _cfgColumnSeparator;
        }
        writeRaw(str);
    }

    protected void appendValue(double value) throws IOException
    {
        String str = NumberOutput.toString(value);
        final int len = str.length();
        if ((_outputTail + len) >= _outputTail) { // >= to include possible comma too
            _flushBuffer();
        }
        if (_nextColumnToWrite > 0) {
            _outputBuffer[_outputTail++] = _cfgColumnSeparator;
        }
        writeRaw(str);
    }

    protected void appendValue(boolean value) throws IOException
    {
        char[] ch = value ? TRUE_CHARS : FALSE_CHARS;
        final int len = ch.length;
        if ((_outputTail + len) >= _outputTail) { // >= to include possible comma too
            _flushBuffer();
        }
        if (_nextColumnToWrite > 0) {
            _outputBuffer[_outputTail++] = _cfgColumnSeparator;
        }
        System.arraycopy(ch, 0, _outputBuffer, _outputTail, len);
        _outputTail += len;
    }

    protected void appendColumnSeparator() throws IOException {
        if (_outputTail >= _outputTail) {
            _flushBuffer();
        }
        _outputBuffer[_outputTail++] = _cfgColumnSeparator;
    }
    
    /*
    /**********************************************************
    /* Output methods, unprocessed ("raw")
    /**********************************************************
     */

    public void _writeQuoted(String text) throws IOException
    {
        if (_outputTail >= _outputEnd) {
            _flushBuffer();
        }
        // NOTE: caller should guarantee quote char is valid (not -1) at this point:
        final char q = (char) _cfgQuoteCharacter;
        _outputBuffer[_outputTail++] = q;
        // simple case: if we have enough room, no need for boundary checks
        final int len = text.length();
        if ((_outputTail + len + len) >= _outputEnd) {
            _writeLongQuoted(text);
            return;
        }
        for (int i = 0; i < len; ++i) {
            char c = text.charAt(i);
            if (c == q) { // double up
                _outputBuffer[_outputTail++] = q;
                if (_outputTail >= _outputEnd) {
                    _flushBuffer();
                }
            }
            _outputBuffer[_outputTail++] = c;
        }
        _outputBuffer[_outputTail++] = q;
    }
    
    private final void _writeLongQuoted(String text) throws IOException
    {
        final int len = text.length();
        // NOTE: caller should guarantee quote char is valid (not -1) at this point:
        final char q = (char) _cfgQuoteCharacter;
        for (int i = 0; i < len; ++i) {
            if (_outputTail >= _outputEnd) {
                _flushBuffer();
            }
            char c = text.charAt(i);
            if (c == q) { // double up
                _outputBuffer[_outputTail++] = q;
                if (_outputTail >= _outputEnd) {
                    _flushBuffer();
                }
            }
            _outputBuffer[_outputTail++] = c;
        }
        if (_outputTail >= _outputEnd) {
            _flushBuffer();
        }
        _outputBuffer[_outputTail++] = q;
    }
    
    public void writeRaw(String text) throws IOException
    {
        // Nothing to check, can just output as is
        int len = text.length();
        int room = _outputEnd - _outputTail;

        if (room == 0) {
            _flushBuffer();
            room = _outputEnd - _outputTail;
        }
        // But would it nicely fit in? If yes, it's easy
        if (room >= len) {
            text.getChars(0, len, _outputBuffer, _outputTail);
            _outputTail += len;
        } else {
            writeRawLong(text);
        }
    }

    public void writeRaw(String text, int start, int len) throws IOException
    {
        // Nothing to check, can just output as is
        int room = _outputEnd - _outputTail;

        if (room < len) {
            _flushBuffer();
            room = _outputEnd - _outputTail;
        }
        // But would it nicely fit in? If yes, it's easy
        if (room >= len) {
            text.getChars(start, start+len, _outputBuffer, _outputTail);
            _outputTail += len;
        } else {                
            writeRawLong(text.substring(start, start+len));
        }
    }

    public void writeRaw(char[] text, int offset, int len)
        throws IOException, JsonGenerationException
    {
        // Only worth buffering if it's a short write?
        if (len < SHORT_WRITE) {
            int room = _outputEnd - _outputTail;
            if (len > room) {
                _flushBuffer();
            }
            System.arraycopy(text, offset, _outputBuffer, _outputTail, len);
            _outputTail += len;
            return;
        }
        // Otherwise, better just pass through:
        _flushBuffer();
        _out.write(text, offset, len);
    }

    public void writeRaw(char c) throws IOException
    {
        if (_outputTail >= _outputEnd) {
            _flushBuffer();
        }
        _outputBuffer[_outputTail++] = c;
    }

    private void writeRawLong(String text) throws IOException
    {
        int room = _outputEnd - _outputTail;
        // If not, need to do it by looping
        text.getChars(0, room, _outputBuffer, _outputTail);
        _outputTail += room;
        _flushBuffer();
        int offset = room;
        int len = text.length() - room;

        while (len > _outputEnd) {
            int amount = _outputEnd;
            text.getChars(offset, offset+amount, _outputBuffer, 0);
            _outputTail = amount;
            _flushBuffer();
            offset += amount;
            len -= amount;
        }
        // And last piece (at most length of buffer)
        text.getChars(offset, offset+len, _outputBuffer, 0);
        _outputTail = len;
    }
    
    /*
    /**********************************************************
    /* Writer API, state changes
    /**********************************************************
     */
    
    public void flush(boolean flushStream) throws IOException
    {
        _flushBuffer();
        if (flushStream) {
            _out.flush();
        }
    }

    public void close(boolean autoClose) throws IOException
    {
        _flushBuffer();
        if (autoClose) {
            _out.close();
        } else {
            // If we can't close it, we should at least flush
            _out.flush();
        }
        // Internal buffer(s) generator has can now be released as well
        _releaseBuffers();
    }
    
    /*
    /**********************************************************
    /* Internal methods
    /**********************************************************
     */

    /**
     * Helper method that determines whether given String is likely
     * to require quoting; check tries to optimize for speed.
     */
    protected boolean _mayNeedQuotes(String value, int length)
    {
        // 21-Mar-2014, tatu: If quoting disabled, don't quote
        if (_cfgQuoteCharacter < 0) {
            return false;
        }
        // may skip checks unless we want exact checking
        if (_cfgOptimalQuoting) {
            return _needsQuotingStrict(value);
        }
        if (length > _cfgMaxQuoteCheckChars) {
            return true;
        }
        return _needsQuotingLoose(value);
    }

    /**
     *

* NOTE: final since checking is not expected to be changed here; override * calling method (_mayNeedQuotes) instead, if necessary. * * @since 2.4 */ protected final boolean _needsQuotingLoose(String value) { for (int i = 0, len = value.length(); i < len; ++i) { if (value.charAt(i) < _cfgMinSafeChar) { return true; } } return false; } /** * @since 2.4 */ protected boolean _needsQuotingStrict(String value) { for (int i = 0, len = value.length(); i < len; ++i) { char c = value.charAt(i); if (c < _cfgMinSafeChar) { if (c == _cfgColumnSeparator || c == _cfgQuoteCharacter || c == '\r' || c == '\n') { return true; } } } return false; } protected void _buffer(int index, BufferedValue v) { _lastBuffered = Math.max(_lastBuffered, index); if (index >= _buffered.length) { _buffered = Arrays.copyOf(_buffered, Math.max(index+1, _columnCount)); } _buffered[index] = v; } protected void _flushBuffer() throws IOException { if (_outputTail > 0) { _charsWritten += _outputTail; _out.write(_outputBuffer, 0, _outputTail); _outputTail = 0; } } public void _releaseBuffers() { char[] buf = _outputBuffer; if (buf != null && _bufferRecyclable) { _outputBuffer = null; _ioContext.releaseConcatBuffer(buf); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy