com.fasterxml.jackson.dataformat.csv.CsvSchema Maven / Gradle / Ivy
Show all versions of jackson-dataformat-csv Show documentation
package com.fasterxml.jackson.dataformat.csv;
import java.util.*;
import java.util.function.UnaryOperator;
import com.fasterxml.jackson.core.FormatSchema;
/**
* Simple {@link FormatSchema} sub-type that defines properties of
* a CSV document to read or write.
* Properties supported currently are:
*
* - {@code columns} (List of ColumnDef) [default: empty List]: Ordered list of columns (which may be empty, see below).
* Each column has name (mandatory) as well as type (optional; if not
* defined, defaults to "String").
* Note that
*
* - {@code useHeader} (boolean) [default: false]: whether the first line of physical document defines
* column names (true) or not (false): if enabled, parser will take
* first-line values to define column names; and generator will output
* column names as the first line
*
* - {@code quoteChar} (char) [default: double-quote ('")]: character used for quoting values
* that contain quote characters or linefeeds.
*
* - {@code columnSeparator} (char) [default: comma (',')]: character used to separate values.
* Other commonly used values include tab ('\t') and pipe ('|')
*
* - {@code arrayElementSeparator} (String) [default: semicolon (";")]: string used to separate array elements.
*
* - {@code lineSeparator} (String) [default: "\n"]: character used to separate data rows.
* Only used by generator; parser accepts three standard linefeeds ("\r", "\r\n", "\n").
*
* - {@code escapeChar} (int) [default: -1 meaning "none"]: character, if any, used to
* escape values. Most commonly defined as backslash ('\'). Only used by parser;
* generator only uses quoting, including doubling up of quotes to indicate quote char
* itself.
*
* - {@code skipFirstDataRow} (boolean) [default: false]: whether the first data line (either
* first line of the document, if useHeader=false, or second, if useHeader=true)
* should be completely ignored by parser. Needed to support CSV-like file formats
* that include additional non-data content before real data begins (specifically
* some database dumps do this)
*
* - {@code nullValue} (String) [default: "" (empty String)]: When asked to write Java `null`,
* this String value will be used instead.
* With 2.6, value will also be recognized during value reads.
*
* - {@code strictHeaders} (boolean) [default: false] (added in Jackson 2.7): whether names of
* columns defined in the schema MUST match with actual declaration from
* the header row (if header row handling enabled): if true, they must be and
* an exception if thrown if order differs: if false, no verification is performed.
*
* - {@code allowComments} (boolean) [default: false]: whether lines that start with character "#"
* are processed as comment lines and skipped/ignored.
*
* - {@code anyProperty} (String] [default: none]: if "any properties" (properties for
* 'extra' columns; ones not specified in schema) are enabled, they are mapped to
* this name: leaving it as {@code null} disables use of
* "any properties" (and they are either ignored, or an exception
* is thrown, depending on other settings); setting it to a non-null
* String value will expose all extra properties under one specified name.
* Most often used with Jackson {@code @JsonAnySetter} annotation.
*
* Note that schemas without any columns are legal, but if no columns
* are added, behavior of parser/generator is usually different and
* content will be exposed as logical Arrays instead of Objects.
*
* There are 4 ways to create CsvSchema
instances:
*
* - Manually build one, using {@link Builder}
*
* - Modify existing schema (using
withXxx
methods
* or {@link #rebuild} for creating {@link Builder})
*
* - Create schema based on a POJO definition (Class), using
* {@link CsvMapper} methods like {@link CsvMapper#schemaFor(java.lang.Class)}.
*
* - Request that {@link CsvParser} reads schema from the first line:
* enable "useHeader" property for the initial schema, and let parser
* read column names from the document itself.
*
*
*/
public class CsvSchema
implements FormatSchema,
Iterable,
java.io.Serializable // since 2.5
{
private static final long serialVersionUID = 1L; // 2.5
/*
/**********************************************************************
/* Constants, feature flags
/**********************************************************************
*/
protected final static int ENCODING_FEATURE_USE_HEADER = 0x0001;
protected final static int ENCODING_FEATURE_SKIP_FIRST_DATA_ROW = 0x0002;
protected final static int ENCODING_FEATURE_ALLOW_COMMENTS = 0x0004;
protected final static int ENCODING_FEATURE_REORDER_COLUMNS = 0x0008;
protected final static int ENCODING_FEATURE_STRICT_HEADERS = 0x0010;
protected final static int DEFAULT_ENCODING_FEATURES = 0;
protected final static char[] NO_CHARS = new char[0];
/*
/**********************************************************************
/* Constants, default settings
/**********************************************************************
*/
/**
* Default separator for column values is comma (hence "Comma-Separated Values")
*/
public final static char DEFAULT_COLUMN_SEPARATOR = ',';
/**
* Default separator for array elements within a column value is
* semicolon.
*/
public final static String DEFAULT_ARRAY_ELEMENT_SEPARATOR = ";";
/**
* Marker for the case where no array element separator is used
*/
public final static String NO_ARRAY_ELEMENT_SEPARATOR = "";
/**
* By default no "any properties" (properties for 'extra' columns; ones
* not specified in schema) are used, so null
is used as marker.
*
* @since 2.7
*/
public final static String DEFAULT_ANY_PROPERTY_NAME = null;
public final static char DEFAULT_QUOTE_CHAR = '"';
/**
* By default, nulls are written as empty Strings (""); and no coercion
* is performed from any String (higher level databind may, however,
* coerce Strings into Java nulls).
* To use automatic coercion on reading, null value must be set explicitly
* to empty String ("").
*
* NOTE: before 2.6, this value default to empty char[]
; changed
* to Java null in 2.6.
*/
public final static char[] DEFAULT_NULL_VALUE = null;
/**
* By default, no escape character is used -- this is denoted by
* int value that does not map to a valid character
*/
public final static int DEFAULT_ESCAPE_CHAR = -1;
public final static char[] DEFAULT_LINEFEED = "\n".toCharArray();
/*
/**********************************************************************
/* Constants, other
/**********************************************************************
*/
protected final static Column[] NO_COLUMNS = new Column[0];
/*
/**********************************************************************
/* Helper classes
/**********************************************************************
*/
/**
* Enumeration that defines optional type indicators that can be passed
* with schema. If used type is used to determine type of
* {@link com.fasterxml.jackson.core.JsonToken}
* that column values are exposed as.
*/
public enum ColumnType
{
/**
* Default type if not explicitly defined; value will
* be presented as VALUE_STRING
by parser,
* that is, no type-inference is performed, and value is
* not trimmed.
*
* Note that this type allows coercion into array, if higher
* level application calls
* {@link com.fasterxml.jackson.core.JsonParser#isExpectedStartArrayToken},
* unlike more explicit types.
*/
STRING,
/**
* Value is considered to be a String, except that tokens
* "null", "true" and "false" are recognized as matching
* tokens and reported as such;
* and values are trimmed (leading/trailing white space)
*/
STRING_OR_LITERAL,
/**
* Value should be a number, but literals "null", "true" and "false"
* are also understood, and an empty String is considered null.
* Values are also trimmed (leading/trailing white space)
* Other non-numeric Strings may cause parsing exception.
*/
NUMBER,
/**
* Value is taken to be a number (if it matches valid JSON number
* formatting rules), literal (null, true or false) or String,
* depending on best match.
* Values are also trimmed (leading/trailing white space)
*/
NUMBER_OR_STRING,
/**
* Value is expected to be a boolean ("true", "false") String,
* or "null", or empty String (equivalent to null).
* Values are trimmed (leading/trailing white space).
* Values other than indicated above may result in an exception.
*
* @since 2.5
*/
BOOLEAN,
/**
* Value will be a multi-value sequence, separated by array element
* separator. Element type itself may be any scalar type (that is, number
* or String) and will not be optimized.
* Separator may be overridden on per-column basis.
*
* Note that this type is used for generic concept of multiple values, and
* not specifically to match Java arrays: data-binding may match such columns
* to {@link java.util.Collection}s as well, or even other types as necessary.
*
* @since 2.5
*/
ARRAY,
;
}
/**
* Representation of info for a single column
*/
public static class Column implements java.io.Serializable // since 2.4.3
{
private static final long serialVersionUID = 1L;
public final static Column PLACEHOLDER = new Column(0, "");
private final String _name;
private final int _index;
private final ColumnType _type;
/**
* NOTE: type changed from `char` to `java.lang.String` in 2.7
*
* @since 2.5
*/
private final String _arrayElementSeparator;
/**
* Value decorator used for this column, if any; {@code null} if none.
* Used to add decoration on serialization (writing) and remove decoration
* on deserialization (reading).
*
* @since 2.18
*/
private final CsvValueDecorator _valueDecorator;
/**
* Link to the next column within schema, if one exists;
* null for the last column.
*
* @since 2.6
*/
private final Column _next;
public Column(int index, String name) {
this(index, name, ColumnType.STRING, "");
}
public Column(int index, String name, ColumnType type) {
this(index, name, type, "");
}
public Column(int index, String name, ColumnType type, String arrayElementSep)
{
_index = index;
_name = name;
_type = type;
_arrayElementSeparator = _validArrayElementSeparator(arrayElementSep);
_valueDecorator = null;
_next = null;
}
public Column(Column src, Column next) {
this(src, src._index, src._valueDecorator, next);
}
protected Column(Column src, int index, Column next) {
this(src, index, src._valueDecorator, next);
}
/**
* @since 2.18
*/
protected Column(Column src, CsvValueDecorator valueDecorator) {
this(src, src._index, valueDecorator, src._next);
}
/**
* @since 2.18
*/
protected Column(Column src, int index, CsvValueDecorator valueDecorator,
Column next)
{
_index = index;
_name = src._name;
_type = src._type;
_arrayElementSeparator = src._arrayElementSeparator;
_valueDecorator = valueDecorator;
_next = next;
}
public Column withName(String newName) {
if (_name == newName) {
return this;
}
return new Column(_index, newName, _type, _arrayElementSeparator);
}
public Column withType(ColumnType newType) {
if (newType == _type) {
return this;
}
return new Column(_index, _name, newType, _arrayElementSeparator);
}
public Column withArrayElementSeparator(String separator) {
String sep = _validArrayElementSeparator(separator);
if (_arrayElementSeparator.equals(sep)) {
return this;
}
return new Column(_index, _name, _type, sep);
}
/**
* @since 2.18
*/
public Column withValueDecorator(CsvValueDecorator valueDecorator) {
if (valueDecorator == _valueDecorator) {
return this;
}
return new Column(this, valueDecorator);
}
public Column withNext(Column next) {
if (_next == next) {
return this;
}
return new Column(this, next);
}
/**
* @since 2.7
*/
public Column withNext(int index, Column next) {
if ((_index == index) && (_next == next)) {
return this;
}
return new Column(this, index, next);
}
public int getIndex() { return _index; }
public String getName() { return _name; }
public ColumnType getType() { return _type; }
public Column getNext() { return _next; }
/**
* Access that returns same as {@link #getNext} iff name of that
* column is same as given name
*/
public Column getNextWithName(String name) {
if (_next != null && name.equals(_next._name)) {
return _next;
}
return null;
}
public boolean hasName(String n) {
return (_name == n) || _name.equals(n);
}
/**
* @since 2.5
*/
public String getArrayElementSeparator() { return _arrayElementSeparator; }
/**
* @since 2.18
*/
public CsvValueDecorator getValueDecorator() { return _valueDecorator; }
public boolean isArray() {
return (_type == ColumnType.ARRAY);
}
}
/**
* Class used for building {@link CsvSchema} instances.
*/
public static class Builder
{
protected final ArrayList _columns = new ArrayList();
/**
* Bit-flag for general-purpose on/off features.
*
* @since 2.5
*/
protected int _encodingFeatures = DEFAULT_ENCODING_FEATURES;
protected char _columnSeparator = DEFAULT_COLUMN_SEPARATOR;
protected String _arrayElementSeparator = DEFAULT_ARRAY_ELEMENT_SEPARATOR;
/**
* If "any properties" (properties for 'extra' columns; ones
* not specified in schema) are enabled, they are mapped to
* this name: leaving it as {@code null} disables use of
* "any properties" (and they are either ignored, or an exception
* is thrown, depending on other settings); setting it to a non-null
* String value will expose all extra properties under one specified
* name.
*
* @since 2.7
*/
protected String _anyPropertyName = DEFAULT_ANY_PROPERTY_NAME;
// note: need to use int to allow -1 for 'none'
protected int _quoteChar = DEFAULT_QUOTE_CHAR;
// note: need to use int to allow -1 for 'none'
protected int _escapeChar = DEFAULT_ESCAPE_CHAR;
protected char[] _lineSeparator = DEFAULT_LINEFEED;
/**
* @since 2.5
*/
protected char[] _nullValue = DEFAULT_NULL_VALUE;
public Builder() { }
/**
* "Copy" constructor which creates builder that has settings of
* given source schema
*/
public Builder(CsvSchema src)
{
for (Column col : src._columns) {
_columns.add(col);
}
_encodingFeatures = src._features;
_columnSeparator = src._columnSeparator;
_arrayElementSeparator = src._arrayElementSeparator;
_quoteChar = src._quoteChar;
_escapeChar = src._escapeChar;
_lineSeparator = src._lineSeparator;
_nullValue = src._nullValue;
_anyPropertyName = src._anyPropertyName;
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*/
public Builder addColumn(String name) {
int index = _columns.size();
return addColumn(new Column(index, name));
}
/**
* Add column with given name, and with changes to apply (as specified
* by second argument, {@code transformer}).
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*
* @param name Name of column to add
* @param transformer Changes to apply to column definition
*
* @since 2.18
*/
public Builder addColumn(String name, UnaryOperator transformer) {
Column col = transformer.apply(new Column(_columns.size(), name));
return addColumn(col);
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*/
public Builder addColumn(String name, ColumnType type) {
int index = _columns.size();
return addColumn(new Column(index, name, type));
}
/**
* Add column with given name, and with changes to apply (as specified
* by second argument, {@code transformer}).
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*
* @param name Name of column to add
* @param type Type of the column to add
* @param transformer Changes to apply to column definition
*
* @since 2.18
*/
public Builder addColumn(String name, ColumnType type,
UnaryOperator transformer) {
Column col = transformer.apply(new Column(_columns.size(), name, type));
return addColumn(col);
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*/
public Builder addColumn(Column c) {
_columns.add(c);
return this;
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*
* @since 2.9
*/
public Builder addColumns(Iterable cs) {
for (Column c : cs) {
_columns.add(c);
}
return this;
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*
* @since 2.9
*/
public Builder addColumns(Iterable names, ColumnType type) {
Builder result = this;
for (String name : names) {
result = addColumn(name, type);
}
return result;
}
/**
* NOTE: unlike many other add methods, this method DOES check for, and
* discard, possible duplicate columns: that is, if this builder already
* has a column with same name as column to be added, existing column
* is retained and new column ignored.
*
* @since 2.9
*/
public Builder addColumnsFrom(CsvSchema schema) {
Builder result = this;
for (Column col : schema) {
if (!hasColumn(col.getName())) {
result = result.addColumn(col);
}
}
return result;
}
public Builder addArrayColumn(String name) {
int index = _columns.size();
return addColumn(new Column(index, name, ColumnType.ARRAY, ""));
}
/**
* @since 2.7
*/
public Builder addArrayColumn(String name, String elementSeparator) {
int index = _columns.size();
return addColumn(new Column(index, name, ColumnType.ARRAY, elementSeparator));
}
public Builder addNumberColumn(String name) {
int index = _columns.size();
return addColumn(new Column(index, name, ColumnType.NUMBER));
}
public Builder addBooleanColumn(String name) {
int index = _columns.size();
return addColumn(new Column(index, name, ColumnType.BOOLEAN));
}
public Builder renameColumn(int index, String newName) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withName(newName));
return this;
}
public Builder replaceColumn(int index, Column c) {
_checkIndex(index);
_columns.set(index, c);
return this;
}
/**
* @since 2.16
*/
public Builder removeColumn(int index) {
_checkIndex(index);
_columns.remove(index);
return this;
}
/**
* Helper method called to drop the last collected column name if
* it is empty: called if {link CsvParser.Feature#ALLOW_TRAILING_COMMA}
* enabled to remove the last entry after being added initially.
*
* @since 2.11.2
*/
public void dropLastColumnIfEmpty() {
final int ix = _columns.size() - 1;
if (ix >= 0) {
if (_columns.get(ix).getName().isEmpty()) {
_columns.remove(ix);
}
}
}
public Builder setColumnType(int index, ColumnType type) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withType(type));
return this;
}
public Builder removeArrayElementSeparator(int index) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withArrayElementSeparator(""));
return this;
}
/**
* @since 2.7
*/
public Builder setArrayElementSeparator(int index, String sep) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withArrayElementSeparator(sep));
return this;
}
public Builder setAnyPropertyName(String name) {
_anyPropertyName = name;
return this;
}
public Builder clearColumns() {
_columns.clear();
return this;
}
public int size() {
return _columns.size();
}
public Iterator getColumns() {
return _columns.iterator();
}
/**
*
* NOTE: this method requires linear scan over existing columns
* so it may be more efficient to use other types of lookups if
* available (for example, {@link CsvSchema#column(String)} has a
* hash lookup to use).
*
* @since 2.9
*/
public boolean hasColumn(String name) {
for (int i = 0, end = _columns.size(); i < end; ++i) {
if (_columns.get(i).getName().equals(name)) {
return true;
}
}
return false;
}
/**
* Method for specifying whether Schema should indicate that
* a header line (first row that contains column names) is to be
* used for reading and writing or not.
*/
public Builder setUseHeader(boolean b) {
_feature(ENCODING_FEATURE_USE_HEADER, b);
return this;
}
/**
* Use in combination with setUseHeader. When use header flag is
* is set, this setting will reorder the columns defined in this
* schema to match the order set by the header.
*
* @param b Enable / Disable this setting
* @return This Builder instance
*
* @since 2.7
*/
public Builder setReorderColumns(boolean b) {
_feature(ENCODING_FEATURE_REORDER_COLUMNS, b);
return this;
}
/**
* Use in combination with {@link #setUseHeader}. When `strict-headers`
* is set, encoder will ensure the headers are in the order
* of the schema; if order differs, an exception is thrown.
*
* @param b Enable / Disable this setting
* @return This Builder instance
*
* @since 2.7
*/
public Builder setStrictHeaders(boolean b) {
_feature(ENCODING_FEATURE_STRICT_HEADERS, b);
return this;
}
/**
* Method for specifying whether Schema should indicate that
* the first line that is not a header (if header handling enabled)
* should be skipped in its entirety.
*/
public Builder setSkipFirstDataRow(boolean b) {
_feature(ENCODING_FEATURE_SKIP_FIRST_DATA_ROW, b);
return this;
}
/**
* Method for specifying whether Schema should indicate that
* "hash comments" (lines where the first non-whitespace character
* is '#') are allowed; if so, they will be skipped without processing.
*
* @since 2.5
*/
public Builder setAllowComments(boolean b) {
_feature(ENCODING_FEATURE_ALLOW_COMMENTS, b);
return this;
}
protected final void _feature(int feature, boolean state) {
_encodingFeatures = state ? (_encodingFeatures | feature) : (_encodingFeatures & ~feature);
}
/**
* Method for specifying character used to separate column
* values.
* Default is comma (',').
*/
public Builder setColumnSeparator(char c) {
_columnSeparator = c;
return this;
}
/**
* Method for specifying character used to separate array element
* values.
* Default value is semicolon (";")
*
* @since 2.7
*/
public Builder setArrayElementSeparator(String separator) {
_arrayElementSeparator = _validArrayElementSeparator(separator);
return this;
}
/**
* @since 2.7
*/
public Builder disableArrayElementSeparator() {
_arrayElementSeparator = NO_ARRAY_ELEMENT_SEPARATOR;
return this;
}
/**
* Method for specifying character used for optional quoting
* of values.
* Default is double-quote ('"').
*/
public Builder setQuoteChar(char c) {
_quoteChar = c;
return this;
}
/**
* @since 2.4
*/
public Builder disableQuoteChar() {
_quoteChar = -1;
return this;
}
/**
* Method for specifying character used for optional escaping
* of characters in quoted String values.
* Default is "not used", meaning that no escaping used.
*/
public Builder setEscapeChar(char c) {
_escapeChar = c;
return this;
}
/**
* Method for specifying that no escape character is to be used
* with CSV documents this schema defines.
*/
public Builder disableEscapeChar() {
_escapeChar = -1;
return this;
}
public Builder setLineSeparator(String lf) {
_lineSeparator = lf.toCharArray();
return this;
}
public Builder setLineSeparator(char lf) {
_lineSeparator = new char[] { lf };
return this;
}
public Builder setNullValue(String nvl) {
return setNullValue((nvl == null) ? null : nvl.toCharArray());
}
public Builder setNullValue(char[] nvl) {
_nullValue = nvl;
return this;
}
public CsvSchema build()
{
Column[] cols = _columns.toArray(new Column[_columns.size()]);
return new CsvSchema(cols, _encodingFeatures,
_columnSeparator, _quoteChar, _escapeChar,
_lineSeparator, _arrayElementSeparator,
_nullValue, _anyPropertyName);
}
protected void _checkIndex(int index) {
if (index < 0 || index >= _columns.size()) {
throw new IllegalArgumentException("Illegal index "+index+"; only got "+_columns.size()+" columns");
}
}
}
/*
/**********************************************************************
/* Configuration, construction
/**********************************************************************
*/
/**
* Column definitions, needed for optional header and/or mapping
* of field names to column positions.
*/
protected final Column[] _columns;
protected final Map _columnsByName;
/**
* Bitflag for general-purpose on/off features.
*
* @since 2.5
*/
protected int _features = DEFAULT_ENCODING_FEATURES;
protected final char _columnSeparator;
protected final String _arrayElementSeparator;
protected final int _quoteChar;
protected final int _escapeChar;
protected final char[] _lineSeparator;
/**
* @since 2.5
*/
protected final char[] _nullValue;
protected transient String _nullValueAsString;
/**
* If "any properties" (properties for 'extra' columns; ones
* not specified in schema) are enabled, they are mapped to
* this name: leaving it as null
disables use of
* "any properties" (and they are either ignored, or an exception
* is thrown, depending on other settings); setting it to a non-null
* String value will expose all extra properties under one specified
* name.
*
* @since 2.7
*/
protected final String _anyPropertyName;
/**
* @since 2.7
*/
public CsvSchema(Column[] columns, int features,
char columnSeparator, int quoteChar, int escapeChar,
char[] lineSeparator, String arrayElementSeparator,
char[] nullValue, String anyPropertyName)
{
if (columns == null) {
columns = NO_COLUMNS;
} else {
columns = _link(columns);
}
_columns = columns;
_features = features;
_columnSeparator = columnSeparator;
_arrayElementSeparator = arrayElementSeparator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
_lineSeparator = lineSeparator;
_nullValue = nullValue;
_anyPropertyName = anyPropertyName;
// and then we may need to create a mapping
if (_columns.length == 0) {
_columnsByName = Collections.emptyMap();
} else {
_columnsByName = new LinkedHashMap<>(4 + _columns.length);
for (Column c : _columns) {
_columnsByName.put(c.getName(), c);
}
}
}
/**
* Copy constructor used for creating variants using
* withXxx()
methods.
*/
protected CsvSchema(Column[] columns, int features,
char columnSeparator, int quoteChar, int escapeChar,
char[] lineSeparator, String arrayElementSeparator,
char[] nullValue,
Map columnsByName, String anyPropertyName)
{
_columns = columns;
_features = features;
_columnSeparator = columnSeparator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
_lineSeparator = lineSeparator;
_arrayElementSeparator = arrayElementSeparator;
_nullValue = nullValue;
_columnsByName = columnsByName;
_anyPropertyName = anyPropertyName;
}
/**
* Copy constructor used for creating variants using
* sortedBy()
methods.
*/
protected CsvSchema(CsvSchema base, Column[] columns)
{
_columns = _link(columns);
_features = base._features;
_columnSeparator = base._columnSeparator;
_quoteChar = base._quoteChar;
_escapeChar = base._escapeChar;
_lineSeparator = base._lineSeparator;
_arrayElementSeparator = base._arrayElementSeparator;
_nullValue = base._nullValue;
_anyPropertyName = base._anyPropertyName;
// and then we may need to create a mapping
if (_columns.length == 0) {
_columnsByName = Collections.emptyMap();
} else {
_columnsByName = new LinkedHashMap<>(4 + _columns.length);
for (Column c : _columns) {
_columnsByName.put(c.getName(), c);
}
}
}
/**
* Copy constructor used for creating variants for on/off features
*
* @since 2.5
*/
protected CsvSchema(CsvSchema base, int features) {
_columns = base._columns;
_features = features;
_columnSeparator = base._columnSeparator;
_quoteChar = base._quoteChar;
_escapeChar = base._escapeChar;
_lineSeparator = base._lineSeparator;
_arrayElementSeparator = base._arrayElementSeparator;
_nullValue = base._nullValue;
_anyPropertyName = base._anyPropertyName;
_columnsByName = base._columnsByName;
}
/**
* Helper method used for chaining columns together using next-linkage,
* as well as ensuring that indexes are correct.
*/
private static Column[] _link(Column[] orig)
{
int i = orig.length;
Column[] result = new Column[i];
Column prev = null;
for (; --i >= 0; ) {
Column curr = orig[i].withNext(i, prev);
result[i] = curr;
prev = curr;
}
return result;
}
public static Builder builder() {
return new Builder();
}
/**
* Accessor for creating a "default" CSV schema instance, with following
* settings:
*
* - Does NOT use header line
*
* - Uses double quotes ('"') for quoting of field values (if necessary)
*
* - Uses comma (',') as the field separator
*
* - Uses Unix linefeed ('\n') as row separator
*
* - Does NOT use any escape characters
*
* - Does NOT have any columns defined
*
*
*/
public static CsvSchema emptySchema() {
return builder().build();
}
/**
* Helper method for constructing Builder that can be used to create modified
* schema.
*/
public Builder rebuild() {
return new Builder(this);
}
/*
/**********************************************************************
/* Mutant factories
/**********************************************************************
*/
public CsvSchema withUseHeader(boolean state) {
return _withFeature(ENCODING_FEATURE_USE_HEADER, state);
}
/**
* Returns a clone of this instance by changing or setting the
* column reordering flag
*
* @param state New value for setting
* @return A copy of itself, ensuring the setting for
* the column reordering feature.
* @since 2.7
*/
public CsvSchema withColumnReordering(boolean state) {
return _withFeature(ENCODING_FEATURE_REORDER_COLUMNS, state);
}
/**
* Returns a clone of this instance by changing or setting the
* strict headers flag
*
* @param state New value for setting
* @return A copy of itself, ensuring the setting for
* the strict headers feature.
* @since 2.7
*/
public CsvSchema withStrictHeaders(boolean state) {
return _withFeature(ENCODING_FEATURE_STRICT_HEADERS, state);
}
/**
* Helper method for constructing and returning schema instance that
* is similar to this one, except that it will be using header line.
*/
public CsvSchema withHeader() {
return _withFeature(ENCODING_FEATURE_USE_HEADER, true);
}
/**
* Helper method for construcing and returning schema instance that
* is similar to this one, except that it will not be using header line.
*/
public CsvSchema withoutHeader() {
return _withFeature(ENCODING_FEATURE_USE_HEADER, false);
}
public CsvSchema withSkipFirstDataRow(boolean state) {
return _withFeature(ENCODING_FEATURE_SKIP_FIRST_DATA_ROW, state);
}
/**
* Method to indicate whether "hash comments" are allowed
* for document described by this schema.
*
* @since 2.5
*/
public CsvSchema withAllowComments(boolean state) {
return _withFeature(ENCODING_FEATURE_ALLOW_COMMENTS, state);
}
/**
* Method to indicate that "hash comments" ARE allowed
* for document described by this schema.
*
* @since 2.5
*/
public CsvSchema withComments() {
return _withFeature(ENCODING_FEATURE_ALLOW_COMMENTS, true);
}
/**
* Method to indicate that "hash comments" are NOT allowed for document
* described by this schema.
*
* @since 2.5
*/
public CsvSchema withoutComments() {
return _withFeature(ENCODING_FEATURE_ALLOW_COMMENTS, false);
}
protected CsvSchema _withFeature(int feature, boolean state) {
int newFeatures = state ? (_features | feature) : (_features & ~feature);
return (newFeatures == _features) ? this : new CsvSchema(this, newFeatures);
}
public CsvSchema withColumnSeparator(char sep) {
return (_columnSeparator == sep) ? this :
new CsvSchema(_columns, _features,
sep, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withQuoteChar(char c) {
return (_quoteChar == c) ? this :
new CsvSchema(_columns, _features,
_columnSeparator, c, _escapeChar, _lineSeparator,_arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withoutQuoteChar() {
return (_quoteChar == -1) ? this :
new CsvSchema(_columns, _features,
_columnSeparator, -1, _escapeChar, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withEscapeChar(char c) {
return (_escapeChar == c) ? this
: new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, c, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withoutEscapeChar() {
return (_escapeChar == -1) ? this
: new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, -1, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
/**
* @since 2.7
*/
public CsvSchema withArrayElementSeparator(String separator) {
String sep = separator == null ? "" : separator;
return (_arrayElementSeparator.equals(sep)) ? this : new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator, separator,
_nullValue, _columnsByName, _anyPropertyName);
}
/**
* @since 2.5
*/
public CsvSchema withoutArrayElementSeparator() {
return (_arrayElementSeparator.isEmpty()) ? this
: new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator, "",
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withLineSeparator(String sep) {
return new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, sep.toCharArray(),
_arrayElementSeparator, _nullValue, _columnsByName, _anyPropertyName);
}
/**
* @since 2.5
*/
public CsvSchema withNullValue(String nvl) {
return new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator,
_arrayElementSeparator,
(nvl == null) ? null : nvl.toCharArray(),
_columnsByName, _anyPropertyName);
}
public CsvSchema withoutColumns() {
return new CsvSchema(NO_COLUMNS, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
/**
* Mutant factory method that will try to combine columns of this schema with those
* from `toAppend`, starting with columns of this instance, and ignoring
* duplicates (if any) from argument `toAppend`.
* All settings aside from column sets are copied from `this` instance.
*
* As with all `withXxx()` methods this method never modifies `this` but either
* returns it unmodified (if no new columns found from `toAppend`), or constructs
* a new instance and returns that.
*
* @return Either this schema (if nothing changed), or newly constructed {@link CsvSchema}
* with appended columns.
*
* @since 2.9
*/
public CsvSchema withColumnsFrom(CsvSchema toAppend) {
int addCount = toAppend.size();
if (addCount == 0) {
return this;
}
Builder b = rebuild();
for (int i = 0; i < addCount; ++i) {
Column col = toAppend.column(i);
if (column(col.getName()) == null) {
b.addColumn(col);
}
}
return b.build();
}
/**
* Mutant factory method that will try to replace specified column with
* changed definition (but same name), leaving other columns as-is.
*
* As with all `withXxx()` methods this method never modifies `this` but either
* returns it unmodified (if no change to column), or constructs
* a new schema instance and returns that.
*
* @param columnName Name of column to replace
* @param transformer Transformation to apply to the column
*
* @return Either this schema (if column did not change), or newly constructed {@link CsvSchema}
* with changed column
*
* @since 2.18
*/
public CsvSchema withColumn(String columnName, UnaryOperator transformer) {
Column old = column(columnName);
if (old == null) {
throw new IllegalArgumentException("No column '"+columnName+"' in CsvSchema (known columns: "
+getColumnNames()+")");
}
Column newColumn = transformer.apply(old);
if (newColumn == old) {
return this;
}
return _withColumn(old.getIndex(), newColumn);
}
/**
* Mutant factory method that will try to replace specified column with
* changed definition (but same name), leaving other columns as-is.
*
* As with all `withXxx()` methods this method never modifies `this` but either
* returns it unmodified (if no change to column), or constructs
* a new schema instance and returns that.
*
* @param columnIndex Index of column to replace
* @param transformer Transformation to apply to the column
*
* @return Either this schema (if column did not change), or newly constructed {@link CsvSchema}
* with changed column
*
* @since 2.18
*/
public CsvSchema withColumn(int columnIndex, UnaryOperator transformer) {
if (columnIndex < 0 || columnIndex >= size()) {
throw new IllegalArgumentException("Illegal index "+columnIndex+"; `CsvSchema` has "+size()+" columns");
}
Column old = _columns[columnIndex];
Column newColumn = transformer.apply(old);
if (newColumn == old) {
return this;
}
return _withColumn(old.getIndex(), newColumn);
}
/**
* @since 2.18
*/
protected CsvSchema _withColumn(int ix, Column toReplace) {
Objects.requireNonNull(toReplace);
if (ix < 0 || ix >= size()) {
throw new IllegalArgumentException("Illegal index for column '"+toReplace.getName()+"': "
+ix+" (column count: "+size()+")");
}
return rebuild()
.replaceColumn(ix, toReplace)
.build();
}
/**
* @since 2.7
*/
public CsvSchema withAnyPropertyName(String name) {
return new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, name);
}
/**
* Mutant factory method that will construct a new instance in which columns
* are sorted based on names given as argument. Columns not listed in argument
* will be sorted after those within list, using existing ordering.
*
* For example, schema that has columns:
*
"a", "d", "c", "b"
*
* ordered with schema.sortedBy("a", "b");
* would result instance that columns in order:
*"a", "b", "d", "c"
*
*
* @since 2.4
*/
public CsvSchema sortedBy(String... columnNames)
{
LinkedHashMap map = new LinkedHashMap<>();
for (String colName : columnNames) {
Column col = _columnsByName.get(colName);
if (col != null) {
map.put(col.getName(), col);
}
}
for (Column col : _columns) {
map.put(col.getName(), col);
}
return new CsvSchema(this, map.values().toArray(new Column[map.size()]));
}
/**
* Mutant factory method that will construct a new instance in which columns
* are sorted using given {@link Comparator} over column names.
*
* @since 2.4
*/
public CsvSchema sortedBy(Comparator cmp) {
TreeMap map = new TreeMap<>(cmp);
for (Column col : _columns) {
map.put(col.getName(), col);
}
return new CsvSchema(this, map.values().toArray(new Column[map.size()]));
}
/*
/**********************************************************************
/* Public API, FormatSchema
/**********************************************************************
*/
@Override
public String getSchemaType() {
return "CSV";
}
/*
/**********************************************************************
/* Public API, extended, properties
/**********************************************************************
*/
public boolean usesHeader() { return (_features & ENCODING_FEATURE_USE_HEADER) != 0; }
public boolean reordersColumns() { return (_features & ENCODING_FEATURE_REORDER_COLUMNS) != 0; }
public boolean skipsFirstDataRow() { return (_features & ENCODING_FEATURE_SKIP_FIRST_DATA_ROW) != 0; }
public boolean allowsComments() { return (_features & ENCODING_FEATURE_ALLOW_COMMENTS) != 0; }
public boolean strictHeaders() { return (_features & ENCODING_FEATURE_STRICT_HEADERS) != 0; }
public char getColumnSeparator() { return _columnSeparator; }
public String getArrayElementSeparator() { return _arrayElementSeparator; }
public int getQuoteChar() { return _quoteChar; }
public int getEscapeChar() { return _escapeChar; }
public char[] getLineSeparator() { return _lineSeparator; }
/**
* @return Null value defined, as char array, if one is defined to be recognized; Java null
* if not.
*
* @since 2.5
*/
public char[] getNullValue() { return _nullValue; }
/**
* Same as {@link #getNullValue()} except that undefined null value (one that remains as null
,
* or explicitly set as such) will be returned as empty char[]
*
* @since 2.6
*/
public char[] getNullValueOrEmpty() {
if (_nullValue == null) {
return NO_CHARS;
}
return _nullValue;
}
/**
* @since 2.6
*/
public String getNullValueString() {
String str = _nullValueAsString;
if (str == null) {
if (_nullValue == null) {
return null;
}
str = (_nullValue.length == 0) ? "" : new String(_nullValue);
_nullValueAsString = str;
}
return str;
}
public boolean usesQuoteChar() { return _quoteChar >= 0; }
public boolean usesEscapeChar() { return _escapeChar >= 0; }
/**
* @since 2.5
*/
public boolean hasArrayElementSeparator() { return !_arrayElementSeparator.isEmpty(); }
/**
* @since 2.7
*/
public String getAnyPropertyName() { return _anyPropertyName; }
/*
/**********************************************************************
/* Public API, extended; column access
/**********************************************************************
*/
@Override
public Iterator iterator() {
return Arrays.asList(_columns).iterator();
}
/**
* Accessor for finding out how many columns this schema defines.
*
* @return Number of columns this schema defines
*/
public int size() { return _columns.length; }
/**
* Accessor for column at specified index (0-based); index having to be within
*
* 0 <= index < size()
*
*/
public Column column(int index) {
return _columns[index];
}
/**
* Method for finding index of a named column within this schema.
*
* @param name Name of column to find
* @return Index of the specified column, if one exists; {@code -1} if not
*
* @since 2.18
*/
public int columnIndex(String name) {
Column col = column(name);
return (col == null) ? -1 : col.getIndex();
}
/**
* @since 2.6
*/
public String columnName(int index) {
return _columns[index].getName();
}
public Column column(String name) {
return _columnsByName.get(name);
}
/**
* Optimized variant where a hint is given as to likely index of the column
* name.
*
* @since 2.6
*/
public Column column(String name, int probableIndex) {
if (probableIndex < _columns.length) {
Column col = _columns[probableIndex];
if (col.hasName(name)) {
return col;
}
}
return _columnsByName.get(name);
}
/**
* Accessor for getting names of included columns, in the order they are
* included in the schema.
*
* @since 2.14
*/
public List getColumnNames() {
return (List) getColumnNames(new ArrayList(_columns.length));
}
/**
* Accessor for getting names of included columns, added in given
* {@code Collection}.
*
* @since 2.14
*/
public Collection getColumnNames(Collection names) {
final int len = _columns.length;
for (int i = 0; i < len; ++i) {
names.add(_columns[i].getName());
}
return names;
}
/**
* Method for getting description of column definitions in
* developer-readable form
*/
public String getColumnDesc()
{
StringBuilder sb = new StringBuilder(100);
sb.append('[');
for (Column col : _columns) {
if (sb.length() > 1) {
sb.append(',');
}
sb.append('"');
sb.append(col.getName());
sb.append('"');
}
sb.append(']');
return sb.toString();
}
/*
/**********************************************************************
/* Other overrides
/**********************************************************************
*/
@Override
public String toString()
{
StringBuilder sb = new StringBuilder(150);
sb.append("[CsvSchema: ")
.append("columns=[");
boolean first = true;
for (Column col : _columns) {
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append('"');
sb.append(col.getName());
sb.append("\"/");
sb.append(col.getType());
}
sb.append(']');
sb.append(", header? ").append(usesHeader());
sb.append(", skipFirst? ").append(skipsFirstDataRow());
sb.append(", comments? ").append(allowsComments());
sb.append(", any-properties? ");
String anyProp = getAnyPropertyName();
if (anyProp == null) {
sb.append("N/A");
} else {
sb.append("as '").append(anyProp).append("'");
}
sb.append(']');
return sb.toString();
}
/*
/**********************************************************************
/* Helper methods
/**********************************************************************
*/
protected static String _validArrayElementSeparator(String sep) {
if (sep == null || sep.isEmpty()) {
return NO_ARRAY_ELEMENT_SEPARATOR;
}
return sep;
}
}