com.univocity.parsers.csv.CsvWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of univocity-parsers Show documentation
Show all versions of univocity-parsers Show documentation
uniVocity's open source parsers for processing different text formats using a consistent API
/*******************************************************************************
* Copyright 2014 Univocity Software Pty Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package com.univocity.parsers.csv;
import com.univocity.parsers.common.*;
import com.univocity.parsers.common.fields.*;
import java.io.*;
import java.nio.charset.*;
import java.util.*;
/**
* A powerful and flexible CSV writer implementation.
*
* @author Univocity Software Pty Ltd - [email protected]
* @see CsvFormat
* @see CsvWriterSettings
* @see CsvParser
* @see AbstractWriter
*/
public class CsvWriter extends AbstractWriter {
private char delimiter;
private char[] multiDelimiter;
private char quoteChar;
private char escapeChar;
private char escapeEscape;
private boolean quoteAllFields;
private boolean escapeUnquoted;
private boolean inputNotEscaped;
private char newLine;
private boolean dontProcessNormalizedNewLines;
private boolean[] quotationTriggers;
private char maxTrigger;
private Set quotedColumns;
private FieldSelector quotedFieldSelector;
private boolean quoteNulls;
/**
* The CsvWriter supports all settings provided by {@link CsvWriterSettings}, and requires this configuration to be properly initialized.
* Important: by not providing an instance of {@link java.io.Writer} to this constructor, only the operations that write to Strings are
* available.
*
* @param settings the CSV writer configuration
*/
public CsvWriter(CsvWriterSettings settings) {
this((Writer) null, settings);
}
/**
* The CsvWriter supports all settings provided by {@link CsvWriterSettings}, and requires this configuration to be properly initialized.
*
* @param writer the output resource that will receive CSV records produced by this class.
* @param settings the CSV writer configuration
*/
public CsvWriter(Writer writer, CsvWriterSettings settings) {
super(writer, settings);
}
/**
* The CsvWriter supports all settings provided by {@link CsvWriterSettings}, and requires this configuration to be properly initialized.
*
* @param file the output file that will receive CSV records produced by this class.
* @param settings the CSV writer configuration
*/
public CsvWriter(File file, CsvWriterSettings settings) {
super(file, settings);
}
/**
* The CsvWriter supports all settings provided by {@link CsvWriterSettings}, and requires this configuration to be properly initialized.
*
* @param file the output file that will receive CSV records produced by this class.
* @param encoding the encoding of the file
* @param settings the CSV writer configuration
*/
public CsvWriter(File file, String encoding, CsvWriterSettings settings) {
super(file, encoding, settings);
}
/**
* The CsvWriter supports all settings provided by {@link CsvWriterSettings}, and requires this configuration to be properly initialized.
*
* @param file the output file that will receive CSV records produced by this class.
* @param encoding the encoding of the file
* @param settings the CSV writer configuration
*/
public CsvWriter(File file, Charset encoding, CsvWriterSettings settings) {
super(file, encoding, settings);
}
/**
* The CsvWriter supports all settings provided by {@link CsvWriterSettings}, and requires this configuration to be properly initialized.
*
* @param output the output stream that will be written with the CSV records produced by this class.
* @param settings the CSV writer configuration
*/
public CsvWriter(OutputStream output, CsvWriterSettings settings) {
super(output, settings);
}
/**
* The CsvWriter supports all settings provided by {@link CsvWriterSettings}, and requires this configuration to be properly initialized.
*
* @param output the output stream that will be written with the CSV records produced by this class.
* @param encoding the encoding of the stream
* @param settings the CSV writer configuration
*/
public CsvWriter(OutputStream output, String encoding, CsvWriterSettings settings) {
super(output, encoding, settings);
}
/**
* The CsvWriter supports all settings provided by {@link CsvWriterSettings}, and requires this configuration to be properly initialized.
*
* @param output the output stream that will be written with the CSV records produced by this class.
* @param encoding the encoding of the stream
* @param settings the CSV writer configuration
*/
public CsvWriter(OutputStream output, Charset encoding, CsvWriterSettings settings) {
super(output, encoding, settings);
}
/**
* Initializes the CSV writer with CSV-specific configuration
*
* @param settings the CSV writer configuration
*/
protected final void initialize(CsvWriterSettings settings) {
CsvFormat format = settings.getFormat();
this.multiDelimiter = format.getDelimiterString().toCharArray();
if (multiDelimiter.length == 1) {
delimiter = multiDelimiter[0];
multiDelimiter = null;
}
this.quoteChar = format.getQuote();
this.escapeChar = format.getQuoteEscape();
this.escapeEscape = settings.getFormat().getCharToEscapeQuoteEscaping();
this.newLine = format.getNormalizedNewline();
this.quoteAllFields = settings.getQuoteAllFields();
this.quoteNulls = settings.getQuoteNulls();
this.escapeUnquoted = settings.isEscapeUnquotedValues();
this.inputNotEscaped = !settings.isInputEscaped();
this.dontProcessNormalizedNewLines = !settings.isNormalizeLineEndingsWithinQuotes();
this.quotationTriggers = null;
this.quotedColumns = null;
this.maxTrigger = 0;
quotedColumns = Collections.emptySet();
quotedFieldSelector = settings.getQuotedFieldSelector();
char[] sep = format.getLineSeparator();
int triggerCount = 3 + settings.getQuotationTriggers().length + sep.length;
int offset = settings.isQuoteEscapingEnabled() ? 1 : 0;
char[] tmp = Arrays.copyOf(settings.getQuotationTriggers(), triggerCount + offset);
if (offset == 1) {
tmp[triggerCount] = quoteChar;
}
tmp[triggerCount - 1] = '\n';
tmp[triggerCount - 2] = '\r';
tmp[triggerCount - 3] = newLine;
tmp[triggerCount - 4] = sep[0];
if (sep.length > 1) {
tmp[triggerCount - 5] = sep[1];
}
for (int i = 0; i < tmp.length; i++) {
if (maxTrigger < tmp[i]) {
maxTrigger = tmp[i];
}
}
if (maxTrigger != 0) {
maxTrigger++;
this.quotationTriggers = new boolean[maxTrigger];
Arrays.fill(quotationTriggers, false);
for (int i = 0; i < tmp.length; i++) {
quotationTriggers[tmp[i]] = true;
}
}
}
@Override
protected void processRow(Object[] row) {
if (recordCount == 0L && quotedFieldSelector != null) {
int[] quotedIndexes = quotedFieldSelector.getFieldIndexes(headers);
if (quotedIndexes.length > 0) {
quotedColumns = new HashSet();
for (int idx : quotedIndexes) {
quotedColumns.add(idx);
}
}
}
for (int i = 0; i < row.length; i++) {
if (i != 0) {
if (multiDelimiter == null) {
appendToRow(delimiter);
} else {
appendToRow(multiDelimiter);
}
}
if (dontProcessNormalizedNewLines) {
appender.enableDenormalizedLineEndings(false);
}
boolean allowTrim = allowTrim(i);
String nextElement = getStringValue(row[i]);
boolean quoteOn = quoteNulls || row[i] != null;
int originalLength = appender.length();
boolean isElementQuoted = append(i, quoteOn && (quoteAllFields || quotedColumns.contains(i)), allowTrim, nextElement) && quoteOn;
//skipped all whitespaces and wrote nothing
if (appender.length() == originalLength && !usingNullOrEmptyValue) {
if (isElementQuoted) {
if (nextElement == null) {
append(i, false, allowTrim, nullValue);
} else {
append(i, true, allowTrim, emptyValue);
}
} else if (nextElement == null) {
append(i, false, allowTrim, nullValue);
} else {
append(i, false, allowTrim, emptyValue);
}
}
if (isElementQuoted) {
appendToRow(quoteChar);
appendValueToRow();
appendToRow(quoteChar);
if (dontProcessNormalizedNewLines) {
appender.enableDenormalizedLineEndings(true);
}
} else {
appendValueToRow();
}
}
}
private boolean matchMultiDelimiter(String element, int from) {
if (from + multiDelimiter.length - 2 >= element.length()) {
return false;
}
for (int j = 1; j < multiDelimiter.length; j++, from++) {
if (element.charAt(from) != multiDelimiter[j]) {
return false;
}
}
return true;
}
private boolean quoteElement(int start, String element) {
final int length = element.length();
if (multiDelimiter == null) {
if (maxTrigger == 0) {
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if (nextChar == delimiter || nextChar == newLine) {
return true;
}
}
} else {
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if (nextChar == delimiter || nextChar < maxTrigger && quotationTriggers[nextChar]) {
return true;
}
}
}
} else {
if (maxTrigger == 0) {
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if ((nextChar == multiDelimiter[0] && matchMultiDelimiter(element, i + 1)) || nextChar == newLine) {
return true;
}
}
} else {
for (int i = start; i < length; i++) {
char nextChar = element.charAt(i);
if ((nextChar == multiDelimiter[0] && matchMultiDelimiter(element, i + 1)) || nextChar < maxTrigger && quotationTriggers[nextChar]) {
return true;
}
}
}
}
return false;
}
private boolean append(int columnIndex, boolean isElementQuoted, boolean allowTrim, String element) {
if (element == null) {
if (nullValue == null) {
return isElementQuoted;
}
element = nullValue;
}
int start = 0;
if (allowTrim && this.ignoreLeading) {
start = skipLeadingWhitespace(whitespaceRangeStart, element);
}
final int length = element.length();
if (start < length && (element.charAt(start) == quoteChar || columnIndex == 0 && element.charAt(0) == comment)) {
isElementQuoted = true;
}
if (isElementQuoted) {
if (usingNullOrEmptyValue && length >= 2) {
if (element.charAt(0) == quoteChar && element.charAt(length - 1) == quoteChar) {
appender.append(element);
return false;
} else {
appendQuoted(start, allowTrim, element);
return true;
}
} else {
appendQuoted(start, allowTrim, element);
return true;
}
}
int i = start;
char ch = '\0';
if (multiDelimiter == null) {
for (; i < length; i++) {
ch = element.charAt(i);
if (ch == quoteChar || ch == delimiter || ch == escapeChar || (ch < maxTrigger && quotationTriggers[ch])) {
appender.append(element, start, i);
start = i + 1;
if (ch == quoteChar || ch == escapeChar) {
if (quoteElement(i, element)) {
appendQuoted(i, allowTrim, element);
return true;
} else if (escapeUnquoted) {
appendQuoted(i, allowTrim, element);
} else {
appender.append(element, i, length);
if (allowTrim && ignoreTrailing && element.charAt(length - 1) <= ' ' && whitespaceRangeStart < element.charAt(length - 1)) {
appender.updateWhitespace();
}
}
return isElementQuoted;
} else if (ch == escapeChar && inputNotEscaped && escapeEscape != '\0' && escapeUnquoted) {
appender.append(escapeEscape);
} else if (ch == delimiter || ch < maxTrigger && quotationTriggers[ch]) {
appendQuoted(i, allowTrim, element);
return true;
}
appender.append(ch);
}
}
} else {
for (; i < length; i++) {
ch = element.charAt(i);
if (ch == quoteChar || (ch == multiDelimiter[0] && matchMultiDelimiter(element, i + 1)) || ch == escapeChar || (ch < maxTrigger && quotationTriggers[ch])) {
appender.append(element, start, i);
start = i + 1;
if (ch == quoteChar || ch == escapeChar) {
if (quoteElement(i, element)) {
appendQuoted(i, allowTrim, element);
return true;
} else if (escapeUnquoted) {
appendQuoted(i, allowTrim, element);
} else {
appender.append(element, i, length);
if (allowTrim && ignoreTrailing && element.charAt(length - 1) <= ' ' && whitespaceRangeStart < element.charAt(length - 1)) {
appender.updateWhitespace();
}
}
return isElementQuoted;
} else if (ch == escapeChar && inputNotEscaped && escapeEscape != '\0' && escapeUnquoted) {
appender.append(escapeEscape);
} else if ((ch == multiDelimiter[0] && matchMultiDelimiter(element, i + 1)) || ch < maxTrigger && quotationTriggers[ch]) {
appendQuoted(i, allowTrim, element);
return true;
}
appender.append(ch);
}
}
}
appender.append(element, start, i);
if (allowTrim && ch <= ' ' && ignoreTrailing && whitespaceRangeStart < ch) {
appender.updateWhitespace();
}
return isElementQuoted;
}
private void appendQuoted(int start, boolean allowTrim, String element) {
final int length = element.length();
int i = start;
char ch = '\0';
for (; i < length; i++) {
ch = element.charAt(i);
if (ch == quoteChar || ch == newLine || ch == escapeChar) {
appender.append(element, start, i);
start = i + 1;
if (ch == quoteChar && inputNotEscaped) {
appender.append(escapeChar);
} else if (ch == escapeChar && inputNotEscaped && escapeEscape != '\0') {
appender.append(escapeEscape);
}
appender.append(ch);
}
}
appender.append(element, start, i);
if (allowTrim && ch <= ' ' && ignoreTrailing && whitespaceRangeStart < ch) {
appender.updateWhitespace();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy