com.univocity.parsers.csv.CsvFormat Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of univocity-parsers Show documentation
Show all versions of univocity-parsers Show documentation
univocity's open source parsers for processing different text formats using a consistent API
The newest version!
/*******************************************************************************
* Copyright 2014 Univocity Software Pty Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package com.univocity.parsers.csv;
import com.univocity.parsers.common.*;
import java.util.*;
/**
* The CSV format configuration. In addition to the default configuration in {@link Format}, the CSV format defines:
*
*
* - delimiter (defaults to ','): the field delimiter character. Used to separate individual fields in a CSV record (where the record is usually a line of text with multiple fields).
*
e.g. the value a , b is parsed as [ a ][ b ]
* - quote (defaults to '"'): character used for escaping values where the field delimiter is part of the value.
*
e.g. the value " a , b " is parsed as [ a , b ] (instead of [ a ][ b ]
* - quoteEscape (defaults to '"'): character used for escaping the quote character inside an already quoted value
*
e.g. the value " "" a , b "" " is parsed as [ " a , b " ] (instead of [ " a ][ b " ] or [ "" a , b "" ])
* - charToEscapeQuoteEscaping (defaults to '\0' - undefined): character used for escaping the escape for the quote character
*
e.g. if the quoteEscape and charToEscapeQuoteEscaping are set to '\', the value " \\\" a , b \\\" " is parsed as [ \" a , b \" ]
*
*
* @author Univocity Software Pty Ltd - [email protected]
* @see com.univocity.parsers.common.Format
*/
public class CsvFormat extends Format {
private char quote = '"';
private char quoteEscape = '"';
private String delimiter = ",";
private Character charToEscapeQuoteEscaping = null;
/**
* Returns the character used for escaping values where the field delimiter is part of the value. Defaults to '"'
*
* @return the quote character
*/
public char getQuote() {
return quote;
}
/**
* Defines the character used for escaping values where the field delimiter is part of the value. Defaults to '"'
*
* @param quote the quote character
*/
public void setQuote(char quote) {
this.quote = quote;
}
/**
* Identifies whether or not a given character is used for escaping values where the field delimiter is part of the value
*
* @param ch the character to be verified
*
* @return true if the given character is the character used for escaping values, false otherwise
*/
public boolean isQuote(char ch) {
return this.quote == ch;
}
/**
* Returns the character used for escaping quotes inside an already quoted value. Defaults to '"'
*
* @return the quote escape character
*/
public char getQuoteEscape() {
return quoteEscape;
}
/**
* Defines the character used for escaping quotes inside an already quoted value. Defaults to '"'
*
* @param quoteEscape the quote escape character
*/
public void setQuoteEscape(char quoteEscape) {
this.quoteEscape = quoteEscape;
}
/**
* Identifies whether or not a given character is used for escaping quotes inside an already quoted value.
*
* @param ch the character to be verified
*
* @return true if the given character is the quote escape character, false otherwise
*/
public boolean isQuoteEscape(char ch) {
return this.quoteEscape == ch;
}
/**
* Returns the field delimiter character. Defaults to ','
*
* @return the field delimiter character
*/
public char getDelimiter() {
if (delimiter.length() > 1) {
throw new UnsupportedOperationException("Delimiter '" + delimiter + "' has more than one character. Use method getDelimiterString()");
}
return delimiter.charAt(0);
}
/**
* Returns the field delimiter sequence.
*
* @return the field delimiter as a {@code String}.
*/
public String getDelimiterString() {
return delimiter;
}
/**
* Defines the field delimiter character. Defaults to ','
*
* @param delimiter the field delimiter character
*/
public void setDelimiter(char delimiter) {
this.delimiter = String.valueOf(delimiter);
}
/**
* Defines the field delimiter as a sequence of characters. Defaults to ','
*
* @param delimiter the field delimiter sequence.
*/
public void setDelimiter(String delimiter) {
if (delimiter == null) {
throw new IllegalArgumentException("Delimiter cannot be null");
}
if (delimiter.isEmpty()) {
throw new IllegalArgumentException("Delimiter cannot be empty");
}
this.delimiter = delimiter;
}
/**
* Identifies whether or not a given character represents a field delimiter
*
* @param ch the character to be verified
*
* @return true if the given character is the field delimiter character, false otherwise
*/
public boolean isDelimiter(char ch) {
if (delimiter.length() > 1) {
throw new UnsupportedOperationException("Delimiter '" + delimiter + "' has more than one character. Use method isDelimiter(String)");
}
return this.delimiter.charAt(0) == ch;
}
/**
* Identifies whether or not a given character represents a field delimiter
*
* @param sequence the character sequence to be verified
*
* @return true if the given sequence is the field delimiter character sequence, false otherwise
*/
public boolean isDelimiter(String sequence) {
return this.delimiter.equals(sequence);
}
/**
* Returns the character used to escape the character used for escaping quotes defined by {@link #getQuoteEscape()}.
* For example, if the quote escape is set to '\', and the quoted value ends with: \", as in the following example:
*
*
* [ " a\\", b ]
*
*
* Then:
*
* - If the character to escape the '\' is undefined, the record won't be parsed. The parser will read characters: [a],[\],["],[,],[ ],[b] and throw an error because it cannot find a closing quote
* - If the character to escape the '\' is defined as '\', the record will be read with 2 values: [a\] and [b]
*
* Defaults to '\0' (undefined)
*
* @return the character to escape the character used for escaping quotes defined
*/
public final char getCharToEscapeQuoteEscaping() {
if (charToEscapeQuoteEscaping == null) { //not provided by the user
if (quote == quoteEscape) {
return '\0'; //not required
} else {
return quoteEscape;
}
}
return charToEscapeQuoteEscaping;
}
/**
* Defines the character used to escape the character used for escaping quotes defined by {@link #getQuoteEscape()}.
* For example, if the quote escape is set to '\', and the quoted value ends with: \", as in the following example:
*
*
* [ " a\\", b ]
*
*
* Then:
*
* - If the character to escape the '\' is undefined, the record won't be parsed. The parser will read characters: [a],[\],["],[,],[ ],[b] and throw an error because it cannot find a closing quote
* - If the character to escape the '\' is defined as '\', the record will be read with 2 values: [a\] and [b]
*
* Defaults to '\0' (undefined)
*
* @param charToEscapeQuoteEscaping the character to escape the character used for escaping quotes defined
*/
public final void setCharToEscapeQuoteEscaping(char charToEscapeQuoteEscaping) {
this.charToEscapeQuoteEscaping = charToEscapeQuoteEscaping;
}
/**
* Identifies whether or not a given character is used to escape the character used for escaping quotes defined by {@link #getQuoteEscape()}.
*
* @param ch the character to be verified
*
* @return true if the given character is used to escape the quote escape character, false otherwise
*/
public final boolean isCharToEscapeQuoteEscaping(char ch) {
char current = getCharToEscapeQuoteEscaping();
return current != '\0' && current == ch;
}
@Override
protected TreeMap getConfiguration() {
TreeMap out = new TreeMap();
out.put("Quote character", quote);
out.put("Quote escape character", quoteEscape);
out.put("Quote escape escape character", charToEscapeQuoteEscaping);
out.put("Field delimiter", delimiter);
return out;
}
@Override
public final CsvFormat clone() {
return (CsvFormat) super.clone();
}
}