All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.jackson.dataformat.csv.CsvSchema Maven / Gradle / Ivy

The newest version!

package com.fasterxml.jackson.dataformat.csv;

import java.util.*;
import java.util.function.UnaryOperator;

import com.fasterxml.jackson.core.FormatSchema;

/**
 * Simple {@link FormatSchema} sub-type that defines properties of
 * a CSV document to read or write.
 * Properties supported currently are:
 *
    *
  • {@code columns} (List of ColumnDef) [default: empty List]: Ordered list of columns (which may be empty, see below). * Each column has name (mandatory) as well as type (optional; if not * defined, defaults to "String"). * Note that *
  • *
  • {@code useHeader} (boolean) [default: false]: whether the first line of physical document defines * column names (true) or not (false): if enabled, parser will take * first-line values to define column names; and generator will output * column names as the first line *
  • *
  • {@code quoteChar} (char) [default: double-quote ('")]: character used for quoting values * that contain quote characters or linefeeds. *
  • *
  • {@code columnSeparator} (char) [default: comma (',')]: character used to separate values. * Other commonly used values include tab ('\t') and pipe ('|') *
  • *
  • {@code arrayElementSeparator} (String) [default: semicolon (";")]: string used to separate array elements. *
  • *
  • {@code lineSeparator} (String) [default: "\n"]: character used to separate data rows. * Only used by generator; parser accepts three standard linefeeds ("\r", "\r\n", "\n"). *
  • *
  • {@code escapeChar} (int) [default: -1 meaning "none"]: character, if any, used to * escape values. Most commonly defined as backslash ('\'). Only used by parser; * generator only uses quoting, including doubling up of quotes to indicate quote char * itself. *
  • *
  • {@code skipFirstDataRow} (boolean) [default: false]: whether the first data line (either * first line of the document, if useHeader=false, or second, if useHeader=true) * should be completely ignored by parser. Needed to support CSV-like file formats * that include additional non-data content before real data begins (specifically * some database dumps do this) *
  • *
  • {@code nullValue} (String) [default: "" (empty String)]: When asked to write Java `null`, * this String value will be used instead.
    * With 2.6, value will also be recognized during value reads. *
  • *
  • {@code strictHeaders} (boolean) [default: false] (added in Jackson 2.7): whether names of * columns defined in the schema MUST match with actual declaration from * the header row (if header row handling enabled): if true, they must be and * an exception if thrown if order differs: if false, no verification is performed. *
  • *
  • {@code allowComments} (boolean) [default: false]: whether lines that start with character "#" * are processed as comment lines and skipped/ignored. *
  • *
  • {@code anyProperty} (String] [default: none]: if "any properties" (properties for * 'extra' columns; ones not specified in schema) are enabled, they are mapped to * this name: leaving it as {@code null} disables use of * "any properties" (and they are either ignored, or an exception * is thrown, depending on other settings); setting it to a non-null * String value will expose all extra properties under one specified name. * Most often used with Jackson {@code @JsonAnySetter} annotation. *

    * Note that schemas without any columns are legal, but if no columns * are added, behavior of parser/generator is usually different and * content will be exposed as logical Arrays instead of Objects. *

    * There are 4 ways to create CsvSchema instances: *

      *
    • Manually build one, using {@link Builder} *
    • *
    • Modify existing schema (using withXxx methods * or {@link #rebuild} for creating {@link Builder}) *
    • *
    • Create schema based on a POJO definition (Class), using * {@link CsvMapper} methods like {@link CsvMapper#schemaFor(java.lang.Class)}. *
    • *
    • Request that {@link CsvParser} reads schema from the first line: * enable "useHeader" property for the initial schema, and let parser * read column names from the document itself. *
    • *
    */ public class CsvSchema implements FormatSchema, Iterable, java.io.Serializable // since 2.5 { private static final long serialVersionUID = 1L; // 2.5 /* /********************************************************************** /* Constants, feature flags /********************************************************************** */ protected final static int ENCODING_FEATURE_USE_HEADER = 0x0001; protected final static int ENCODING_FEATURE_SKIP_FIRST_DATA_ROW = 0x0002; protected final static int ENCODING_FEATURE_ALLOW_COMMENTS = 0x0004; protected final static int ENCODING_FEATURE_REORDER_COLUMNS = 0x0008; protected final static int ENCODING_FEATURE_STRICT_HEADERS = 0x0010; protected final static int DEFAULT_ENCODING_FEATURES = 0; protected final static char[] NO_CHARS = new char[0]; /* /********************************************************************** /* Constants, default settings /********************************************************************** */ /** * Default separator for column values is comma (hence "Comma-Separated Values") */ public final static char DEFAULT_COLUMN_SEPARATOR = ','; /** * Default separator for array elements within a column value is * semicolon. */ public final static String DEFAULT_ARRAY_ELEMENT_SEPARATOR = ";"; /** * Marker for the case where no array element separator is used */ public final static String NO_ARRAY_ELEMENT_SEPARATOR = ""; /** * By default no "any properties" (properties for 'extra' columns; ones * not specified in schema) are used, so null is used as marker. * * @since 2.7 */ public final static String DEFAULT_ANY_PROPERTY_NAME = null; public final static char DEFAULT_QUOTE_CHAR = '"'; /** * By default, nulls are written as empty Strings (""); and no coercion * is performed from any String (higher level databind may, however, * coerce Strings into Java nulls). * To use automatic coercion on reading, null value must be set explicitly * to empty String (""). *

    * NOTE: before 2.6, this value default to empty char[]; changed * to Java null in 2.6. */ public final static char[] DEFAULT_NULL_VALUE = null; /** * By default, no escape character is used -- this is denoted by * int value that does not map to a valid character */ public final static int DEFAULT_ESCAPE_CHAR = -1; public final static char[] DEFAULT_LINEFEED = "\n".toCharArray(); /* /********************************************************************** /* Constants, other /********************************************************************** */ protected final static Column[] NO_COLUMNS = new Column[0]; /* /********************************************************************** /* Helper classes /********************************************************************** */ /** * Enumeration that defines optional type indicators that can be passed * with schema. If used type is used to determine type of * {@link com.fasterxml.jackson.core.JsonToken} * that column values are exposed as. */ public enum ColumnType { /** * Default type if not explicitly defined; value will * be presented as VALUE_STRING by parser, * that is, no type-inference is performed, and value is * not trimmed. *

    * Note that this type allows coercion into array, if higher * level application calls * {@link com.fasterxml.jackson.core.JsonParser#isExpectedStartArrayToken}, * unlike more explicit types. */ STRING, /** * Value is considered to be a String, except that tokens * "null", "true" and "false" are recognized as matching * tokens and reported as such; * and values are trimmed (leading/trailing white space) */ STRING_OR_LITERAL, /** * Value should be a number, but literals "null", "true" and "false" * are also understood, and an empty String is considered null. * Values are also trimmed (leading/trailing white space) * Other non-numeric Strings may cause parsing exception. */ NUMBER, /** * Value is taken to be a number (if it matches valid JSON number * formatting rules), literal (null, true or false) or String, * depending on best match. * Values are also trimmed (leading/trailing white space) */ NUMBER_OR_STRING, /** * Value is expected to be a boolean ("true", "false") String, * or "null", or empty String (equivalent to null). * Values are trimmed (leading/trailing white space). * Values other than indicated above may result in an exception. * * @since 2.5 */ BOOLEAN, /** * Value will be a multi-value sequence, separated by array element * separator. Element type itself may be any scalar type (that is, number * or String) and will not be optimized. * Separator may be overridden on per-column basis. *

    * Note that this type is used for generic concept of multiple values, and * not specifically to match Java arrays: data-binding may match such columns * to {@link java.util.Collection}s as well, or even other types as necessary. * * @since 2.5 */ ARRAY, ; } /** * Representation of info for a single column */ public static class Column implements java.io.Serializable // since 2.4.3 { private static final long serialVersionUID = 1L; public final static Column PLACEHOLDER = new Column(0, ""); private final String _name; private final int _index; private final ColumnType _type; /** * NOTE: type changed from `char` to `java.lang.String` in 2.7 * * @since 2.5 */ private final String _arrayElementSeparator; /** * Value decorator used for this column, if any; {@code null} if none. * Used to add decoration on serialization (writing) and remove decoration * on deserialization (reading). * * @since 2.18 */ private final CsvValueDecorator _valueDecorator; /** * Link to the next column within schema, if one exists; * null for the last column. * * @since 2.6 */ private final Column _next; public Column(int index, String name) { this(index, name, ColumnType.STRING, ""); } public Column(int index, String name, ColumnType type) { this(index, name, type, ""); } public Column(int index, String name, ColumnType type, String arrayElementSep) { _index = index; _name = name; _type = type; _arrayElementSeparator = _validArrayElementSeparator(arrayElementSep); _valueDecorator = null; _next = null; } public Column(Column src, Column next) { this(src, src._index, src._valueDecorator, next); } protected Column(Column src, int index, Column next) { this(src, index, src._valueDecorator, next); } /** * @since 2.18 */ protected Column(Column src, CsvValueDecorator valueDecorator) { this(src, src._index, valueDecorator, src._next); } /** * @since 2.18 */ protected Column(Column src, int index, CsvValueDecorator valueDecorator, Column next) { _index = index; _name = src._name; _type = src._type; _arrayElementSeparator = src._arrayElementSeparator; _valueDecorator = valueDecorator; _next = next; } public Column withName(String newName) { if (_name == newName) { return this; } return new Column(_index, newName, _type, _arrayElementSeparator); } public Column withType(ColumnType newType) { if (newType == _type) { return this; } return new Column(_index, _name, newType, _arrayElementSeparator); } public Column withArrayElementSeparator(String separator) { String sep = _validArrayElementSeparator(separator); if (_arrayElementSeparator.equals(sep)) { return this; } return new Column(_index, _name, _type, sep); } /** * @since 2.18 */ public Column withValueDecorator(CsvValueDecorator valueDecorator) { if (valueDecorator == _valueDecorator) { return this; } return new Column(this, valueDecorator); } public Column withNext(Column next) { if (_next == next) { return this; } return new Column(this, next); } /** * @since 2.7 */ public Column withNext(int index, Column next) { if ((_index == index) && (_next == next)) { return this; } return new Column(this, index, next); } public int getIndex() { return _index; } public String getName() { return _name; } public ColumnType getType() { return _type; } public Column getNext() { return _next; } /** * Access that returns same as {@link #getNext} iff name of that * column is same as given name */ public Column getNextWithName(String name) { if (_next != null && name.equals(_next._name)) { return _next; } return null; } public boolean hasName(String n) { return (_name == n) || _name.equals(n); } /** * @since 2.5 */ public String getArrayElementSeparator() { return _arrayElementSeparator; } /** * @since 2.18 */ public CsvValueDecorator getValueDecorator() { return _valueDecorator; } public boolean isArray() { return (_type == ColumnType.ARRAY); } } /** * Class used for building {@link CsvSchema} instances. */ public static class Builder { protected final ArrayList _columns = new ArrayList(); /** * Bit-flag for general-purpose on/off features. * * @since 2.5 */ protected int _encodingFeatures = DEFAULT_ENCODING_FEATURES; protected char _columnSeparator = DEFAULT_COLUMN_SEPARATOR; protected String _arrayElementSeparator = DEFAULT_ARRAY_ELEMENT_SEPARATOR; /** * If "any properties" (properties for 'extra' columns; ones * not specified in schema) are enabled, they are mapped to * this name: leaving it as {@code null} disables use of * "any properties" (and they are either ignored, or an exception * is thrown, depending on other settings); setting it to a non-null * String value will expose all extra properties under one specified * name. * * @since 2.7 */ protected String _anyPropertyName = DEFAULT_ANY_PROPERTY_NAME; // note: need to use int to allow -1 for 'none' protected int _quoteChar = DEFAULT_QUOTE_CHAR; // note: need to use int to allow -1 for 'none' protected int _escapeChar = DEFAULT_ESCAPE_CHAR; protected char[] _lineSeparator = DEFAULT_LINEFEED; /** * @since 2.5 */ protected char[] _nullValue = DEFAULT_NULL_VALUE; public Builder() { } /** * "Copy" constructor which creates builder that has settings of * given source schema */ public Builder(CsvSchema src) { for (Column col : src._columns) { _columns.add(col); } _encodingFeatures = src._features; _columnSeparator = src._columnSeparator; _arrayElementSeparator = src._arrayElementSeparator; _quoteChar = src._quoteChar; _escapeChar = src._escapeChar; _lineSeparator = src._lineSeparator; _nullValue = src._nullValue; _anyPropertyName = src._anyPropertyName; } /** * NOTE: does NOT check for duplicate column names so it is possibly to * accidentally add duplicates. */ public Builder addColumn(String name) { int index = _columns.size(); return addColumn(new Column(index, name)); } /** * Add column with given name, and with changes to apply (as specified * by second argument, {@code transformer}). * NOTE: does NOT check for duplicate column names so it is possibly to * accidentally add duplicates. * * @param name Name of column to add * @param transformer Changes to apply to column definition * * @since 2.18 */ public Builder addColumn(String name, UnaryOperator transformer) { Column col = transformer.apply(new Column(_columns.size(), name)); return addColumn(col); } /** * NOTE: does NOT check for duplicate column names so it is possibly to * accidentally add duplicates. */ public Builder addColumn(String name, ColumnType type) { int index = _columns.size(); return addColumn(new Column(index, name, type)); } /** * Add column with given name, and with changes to apply (as specified * by second argument, {@code transformer}). * NOTE: does NOT check for duplicate column names so it is possibly to * accidentally add duplicates. * * @param name Name of column to add * @param type Type of the column to add * @param transformer Changes to apply to column definition * * @since 2.18 */ public Builder addColumn(String name, ColumnType type, UnaryOperator transformer) { Column col = transformer.apply(new Column(_columns.size(), name, type)); return addColumn(col); } /** * NOTE: does NOT check for duplicate column names so it is possibly to * accidentally add duplicates. */ public Builder addColumn(Column c) { _columns.add(c); return this; } /** * NOTE: does NOT check for duplicate column names so it is possibly to * accidentally add duplicates. * * @since 2.9 */ public Builder addColumns(Iterable cs) { for (Column c : cs) { _columns.add(c); } return this; } /** * NOTE: does NOT check for duplicate column names so it is possibly to * accidentally add duplicates. * * @since 2.9 */ public Builder addColumns(Iterable names, ColumnType type) { Builder result = this; for (String name : names) { result = addColumn(name, type); } return result; } /** * NOTE: unlike many other add methods, this method DOES check for, and * discard, possible duplicate columns: that is, if this builder already * has a column with same name as column to be added, existing column * is retained and new column ignored. * * @since 2.9 */ public Builder addColumnsFrom(CsvSchema schema) { Builder result = this; for (Column col : schema) { if (!hasColumn(col.getName())) { result = result.addColumn(col); } } return result; } public Builder addArrayColumn(String name) { int index = _columns.size(); return addColumn(new Column(index, name, ColumnType.ARRAY, "")); } /** * @since 2.7 */ public Builder addArrayColumn(String name, String elementSeparator) { int index = _columns.size(); return addColumn(new Column(index, name, ColumnType.ARRAY, elementSeparator)); } public Builder addNumberColumn(String name) { int index = _columns.size(); return addColumn(new Column(index, name, ColumnType.NUMBER)); } public Builder addBooleanColumn(String name) { int index = _columns.size(); return addColumn(new Column(index, name, ColumnType.BOOLEAN)); } public Builder renameColumn(int index, String newName) { _checkIndex(index); _columns.set(index, _columns.get(index).withName(newName)); return this; } public Builder replaceColumn(int index, Column c) { _checkIndex(index); _columns.set(index, c); return this; } /** * @since 2.16 */ public Builder removeColumn(int index) { _checkIndex(index); _columns.remove(index); return this; } /** * Helper method called to drop the last collected column name if * it is empty: called if {link CsvParser.Feature#ALLOW_TRAILING_COMMA} * enabled to remove the last entry after being added initially. * * @since 2.11.2 */ public void dropLastColumnIfEmpty() { final int ix = _columns.size() - 1; if (ix >= 0) { if (_columns.get(ix).getName().isEmpty()) { _columns.remove(ix); } } } public Builder setColumnType(int index, ColumnType type) { _checkIndex(index); _columns.set(index, _columns.get(index).withType(type)); return this; } public Builder removeArrayElementSeparator(int index) { _checkIndex(index); _columns.set(index, _columns.get(index).withArrayElementSeparator("")); return this; } /** * @since 2.7 */ public Builder setArrayElementSeparator(int index, String sep) { _checkIndex(index); _columns.set(index, _columns.get(index).withArrayElementSeparator(sep)); return this; } public Builder setAnyPropertyName(String name) { _anyPropertyName = name; return this; } public Builder clearColumns() { _columns.clear(); return this; } public int size() { return _columns.size(); } public Iterator getColumns() { return _columns.iterator(); } /** *

    * NOTE: this method requires linear scan over existing columns * so it may be more efficient to use other types of lookups if * available (for example, {@link CsvSchema#column(String)} has a * hash lookup to use). * * @since 2.9 */ public boolean hasColumn(String name) { for (int i = 0, end = _columns.size(); i < end; ++i) { if (_columns.get(i).getName().equals(name)) { return true; } } return false; } /** * Method for specifying whether Schema should indicate that * a header line (first row that contains column names) is to be * used for reading and writing or not. */ public Builder setUseHeader(boolean b) { _feature(ENCODING_FEATURE_USE_HEADER, b); return this; } /** * Use in combination with setUseHeader. When use header flag is * is set, this setting will reorder the columns defined in this * schema to match the order set by the header. * * @param b Enable / Disable this setting * @return This Builder instance * * @since 2.7 */ public Builder setReorderColumns(boolean b) { _feature(ENCODING_FEATURE_REORDER_COLUMNS, b); return this; } /** * Use in combination with {@link #setUseHeader}. When `strict-headers` * is set, encoder will ensure the headers are in the order * of the schema; if order differs, an exception is thrown. * * @param b Enable / Disable this setting * @return This Builder instance * * @since 2.7 */ public Builder setStrictHeaders(boolean b) { _feature(ENCODING_FEATURE_STRICT_HEADERS, b); return this; } /** * Method for specifying whether Schema should indicate that * the first line that is not a header (if header handling enabled) * should be skipped in its entirety. */ public Builder setSkipFirstDataRow(boolean b) { _feature(ENCODING_FEATURE_SKIP_FIRST_DATA_ROW, b); return this; } /** * Method for specifying whether Schema should indicate that * "hash comments" (lines where the first non-whitespace character * is '#') are allowed; if so, they will be skipped without processing. * * @since 2.5 */ public Builder setAllowComments(boolean b) { _feature(ENCODING_FEATURE_ALLOW_COMMENTS, b); return this; } protected final void _feature(int feature, boolean state) { _encodingFeatures = state ? (_encodingFeatures | feature) : (_encodingFeatures & ~feature); } /** * Method for specifying character used to separate column * values. * Default is comma (','). */ public Builder setColumnSeparator(char c) { _columnSeparator = c; return this; } /** * Method for specifying character used to separate array element * values. * Default value is semicolon (";") * * @since 2.7 */ public Builder setArrayElementSeparator(String separator) { _arrayElementSeparator = _validArrayElementSeparator(separator); return this; } /** * @since 2.7 */ public Builder disableArrayElementSeparator() { _arrayElementSeparator = NO_ARRAY_ELEMENT_SEPARATOR; return this; } /** * Method for specifying character used for optional quoting * of values. * Default is double-quote ('"'). */ public Builder setQuoteChar(char c) { _quoteChar = c; return this; } /** * @since 2.4 */ public Builder disableQuoteChar() { _quoteChar = -1; return this; } /** * Method for specifying character used for optional escaping * of characters in quoted String values. * Default is "not used", meaning that no escaping used. */ public Builder setEscapeChar(char c) { _escapeChar = c; return this; } /** * Method for specifying that no escape character is to be used * with CSV documents this schema defines. */ public Builder disableEscapeChar() { _escapeChar = -1; return this; } public Builder setLineSeparator(String lf) { _lineSeparator = lf.toCharArray(); return this; } public Builder setLineSeparator(char lf) { _lineSeparator = new char[] { lf }; return this; } public Builder setNullValue(String nvl) { return setNullValue((nvl == null) ? null : nvl.toCharArray()); } public Builder setNullValue(char[] nvl) { _nullValue = nvl; return this; } public CsvSchema build() { Column[] cols = _columns.toArray(new Column[_columns.size()]); return new CsvSchema(cols, _encodingFeatures, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator, _nullValue, _anyPropertyName); } protected void _checkIndex(int index) { if (index < 0 || index >= _columns.size()) { throw new IllegalArgumentException("Illegal index "+index+"; only got "+_columns.size()+" columns"); } } } /* /********************************************************************** /* Configuration, construction /********************************************************************** */ /** * Column definitions, needed for optional header and/or mapping * of field names to column positions. */ protected final Column[] _columns; protected final Map _columnsByName; /** * Bitflag for general-purpose on/off features. * * @since 2.5 */ protected int _features = DEFAULT_ENCODING_FEATURES; protected final char _columnSeparator; protected final String _arrayElementSeparator; protected final int _quoteChar; protected final int _escapeChar; protected final char[] _lineSeparator; /** * @since 2.5 */ protected final char[] _nullValue; protected transient String _nullValueAsString; /** * If "any properties" (properties for 'extra' columns; ones * not specified in schema) are enabled, they are mapped to * this name: leaving it as null disables use of * "any properties" (and they are either ignored, or an exception * is thrown, depending on other settings); setting it to a non-null * String value will expose all extra properties under one specified * name. * * @since 2.7 */ protected final String _anyPropertyName; /** * @since 2.7 */ public CsvSchema(Column[] columns, int features, char columnSeparator, int quoteChar, int escapeChar, char[] lineSeparator, String arrayElementSeparator, char[] nullValue, String anyPropertyName) { if (columns == null) { columns = NO_COLUMNS; } else { columns = _link(columns); } _columns = columns; _features = features; _columnSeparator = columnSeparator; _arrayElementSeparator = arrayElementSeparator; _quoteChar = quoteChar; _escapeChar = escapeChar; _lineSeparator = lineSeparator; _nullValue = nullValue; _anyPropertyName = anyPropertyName; // and then we may need to create a mapping if (_columns.length == 0) { _columnsByName = Collections.emptyMap(); } else { _columnsByName = new LinkedHashMap<>(4 + _columns.length); for (Column c : _columns) { _columnsByName.put(c.getName(), c); } } } /** * Copy constructor used for creating variants using * withXxx() methods. */ protected CsvSchema(Column[] columns, int features, char columnSeparator, int quoteChar, int escapeChar, char[] lineSeparator, String arrayElementSeparator, char[] nullValue, Map columnsByName, String anyPropertyName) { _columns = columns; _features = features; _columnSeparator = columnSeparator; _quoteChar = quoteChar; _escapeChar = escapeChar; _lineSeparator = lineSeparator; _arrayElementSeparator = arrayElementSeparator; _nullValue = nullValue; _columnsByName = columnsByName; _anyPropertyName = anyPropertyName; } /** * Copy constructor used for creating variants using * sortedBy() methods. */ protected CsvSchema(CsvSchema base, Column[] columns) { _columns = _link(columns); _features = base._features; _columnSeparator = base._columnSeparator; _quoteChar = base._quoteChar; _escapeChar = base._escapeChar; _lineSeparator = base._lineSeparator; _arrayElementSeparator = base._arrayElementSeparator; _nullValue = base._nullValue; _anyPropertyName = base._anyPropertyName; // and then we may need to create a mapping if (_columns.length == 0) { _columnsByName = Collections.emptyMap(); } else { _columnsByName = new LinkedHashMap<>(4 + _columns.length); for (Column c : _columns) { _columnsByName.put(c.getName(), c); } } } /** * Copy constructor used for creating variants for on/off features * * @since 2.5 */ protected CsvSchema(CsvSchema base, int features) { _columns = base._columns; _features = features; _columnSeparator = base._columnSeparator; _quoteChar = base._quoteChar; _escapeChar = base._escapeChar; _lineSeparator = base._lineSeparator; _arrayElementSeparator = base._arrayElementSeparator; _nullValue = base._nullValue; _anyPropertyName = base._anyPropertyName; _columnsByName = base._columnsByName; } /** * Helper method used for chaining columns together using next-linkage, * as well as ensuring that indexes are correct. */ private static Column[] _link(Column[] orig) { int i = orig.length; Column[] result = new Column[i]; Column prev = null; for (; --i >= 0; ) { Column curr = orig[i].withNext(i, prev); result[i] = curr; prev = curr; } return result; } public static Builder builder() { return new Builder(); } /** * Accessor for creating a "default" CSV schema instance, with following * settings: *

      *
    • Does NOT use header line *
    • *
    • Uses double quotes ('"') for quoting of field values (if necessary) *
    • *
    • Uses comma (',') as the field separator *
    • *
    • Uses Unix linefeed ('\n') as row separator *
    • *
    • Does NOT use any escape characters *
    • *
    • Does NOT have any columns defined *
    • *
    */ public static CsvSchema emptySchema() { return builder().build(); } /** * Helper method for constructing Builder that can be used to create modified * schema. */ public Builder rebuild() { return new Builder(this); } /* /********************************************************************** /* Mutant factories /********************************************************************** */ public CsvSchema withUseHeader(boolean state) { return _withFeature(ENCODING_FEATURE_USE_HEADER, state); } /** * Returns a clone of this instance by changing or setting the * column reordering flag * * @param state New value for setting * @return A copy of itself, ensuring the setting for * the column reordering feature. * @since 2.7 */ public CsvSchema withColumnReordering(boolean state) { return _withFeature(ENCODING_FEATURE_REORDER_COLUMNS, state); } /** * Returns a clone of this instance by changing or setting the * strict headers flag * * @param state New value for setting * @return A copy of itself, ensuring the setting for * the strict headers feature. * @since 2.7 */ public CsvSchema withStrictHeaders(boolean state) { return _withFeature(ENCODING_FEATURE_STRICT_HEADERS, state); } /** * Helper method for constructing and returning schema instance that * is similar to this one, except that it will be using header line. */ public CsvSchema withHeader() { return _withFeature(ENCODING_FEATURE_USE_HEADER, true); } /** * Helper method for construcing and returning schema instance that * is similar to this one, except that it will not be using header line. */ public CsvSchema withoutHeader() { return _withFeature(ENCODING_FEATURE_USE_HEADER, false); } public CsvSchema withSkipFirstDataRow(boolean state) { return _withFeature(ENCODING_FEATURE_SKIP_FIRST_DATA_ROW, state); } /** * Method to indicate whether "hash comments" are allowed * for document described by this schema. * * @since 2.5 */ public CsvSchema withAllowComments(boolean state) { return _withFeature(ENCODING_FEATURE_ALLOW_COMMENTS, state); } /** * Method to indicate that "hash comments" ARE allowed * for document described by this schema. * * @since 2.5 */ public CsvSchema withComments() { return _withFeature(ENCODING_FEATURE_ALLOW_COMMENTS, true); } /** * Method to indicate that "hash comments" are NOT allowed for document * described by this schema. * * @since 2.5 */ public CsvSchema withoutComments() { return _withFeature(ENCODING_FEATURE_ALLOW_COMMENTS, false); } protected CsvSchema _withFeature(int feature, boolean state) { int newFeatures = state ? (_features | feature) : (_features & ~feature); return (newFeatures == _features) ? this : new CsvSchema(this, newFeatures); } public CsvSchema withColumnSeparator(char sep) { return (_columnSeparator == sep) ? this : new CsvSchema(_columns, _features, sep, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator, _nullValue, _columnsByName, _anyPropertyName); } public CsvSchema withQuoteChar(char c) { return (_quoteChar == c) ? this : new CsvSchema(_columns, _features, _columnSeparator, c, _escapeChar, _lineSeparator,_arrayElementSeparator, _nullValue, _columnsByName, _anyPropertyName); } public CsvSchema withoutQuoteChar() { return (_quoteChar == -1) ? this : new CsvSchema(_columns, _features, _columnSeparator, -1, _escapeChar, _lineSeparator, _arrayElementSeparator, _nullValue, _columnsByName, _anyPropertyName); } public CsvSchema withEscapeChar(char c) { return (_escapeChar == c) ? this : new CsvSchema(_columns, _features, _columnSeparator, _quoteChar, c, _lineSeparator, _arrayElementSeparator, _nullValue, _columnsByName, _anyPropertyName); } public CsvSchema withoutEscapeChar() { return (_escapeChar == -1) ? this : new CsvSchema(_columns, _features, _columnSeparator, _quoteChar, -1, _lineSeparator, _arrayElementSeparator, _nullValue, _columnsByName, _anyPropertyName); } /** * @since 2.7 */ public CsvSchema withArrayElementSeparator(String separator) { String sep = separator == null ? "" : separator; return (_arrayElementSeparator.equals(sep)) ? this : new CsvSchema(_columns, _features, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator, separator, _nullValue, _columnsByName, _anyPropertyName); } /** * @since 2.5 */ public CsvSchema withoutArrayElementSeparator() { return (_arrayElementSeparator.isEmpty()) ? this : new CsvSchema(_columns, _features, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator, "", _nullValue, _columnsByName, _anyPropertyName); } public CsvSchema withLineSeparator(String sep) { return new CsvSchema(_columns, _features, _columnSeparator, _quoteChar, _escapeChar, sep.toCharArray(), _arrayElementSeparator, _nullValue, _columnsByName, _anyPropertyName); } /** * @since 2.5 */ public CsvSchema withNullValue(String nvl) { return new CsvSchema(_columns, _features, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator, (nvl == null) ? null : nvl.toCharArray(), _columnsByName, _anyPropertyName); } public CsvSchema withoutColumns() { return new CsvSchema(NO_COLUMNS, _features, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator, _nullValue, _columnsByName, _anyPropertyName); } /** * Mutant factory method that will try to combine columns of this schema with those * from `toAppend`, starting with columns of this instance, and ignoring * duplicates (if any) from argument `toAppend`. * All settings aside from column sets are copied from `this` instance. *

    * As with all `withXxx()` methods this method never modifies `this` but either * returns it unmodified (if no new columns found from `toAppend`), or constructs * a new instance and returns that. * * @return Either this schema (if nothing changed), or newly constructed {@link CsvSchema} * with appended columns. * * @since 2.9 */ public CsvSchema withColumnsFrom(CsvSchema toAppend) { int addCount = toAppend.size(); if (addCount == 0) { return this; } Builder b = rebuild(); for (int i = 0; i < addCount; ++i) { Column col = toAppend.column(i); if (column(col.getName()) == null) { b.addColumn(col); } } return b.build(); } /** * Mutant factory method that will try to replace specified column with * changed definition (but same name), leaving other columns as-is. *

    * As with all `withXxx()` methods this method never modifies `this` but either * returns it unmodified (if no change to column), or constructs * a new schema instance and returns that. * * @param columnName Name of column to replace * @param transformer Transformation to apply to the column * * @return Either this schema (if column did not change), or newly constructed {@link CsvSchema} * with changed column * * @since 2.18 */ public CsvSchema withColumn(String columnName, UnaryOperator transformer) { Column old = column(columnName); if (old == null) { throw new IllegalArgumentException("No column '"+columnName+"' in CsvSchema (known columns: " +getColumnNames()+")"); } Column newColumn = transformer.apply(old); if (newColumn == old) { return this; } return _withColumn(old.getIndex(), newColumn); } /** * Mutant factory method that will try to replace specified column with * changed definition (but same name), leaving other columns as-is. *

    * As with all `withXxx()` methods this method never modifies `this` but either * returns it unmodified (if no change to column), or constructs * a new schema instance and returns that. * * @param columnIndex Index of column to replace * @param transformer Transformation to apply to the column * * @return Either this schema (if column did not change), or newly constructed {@link CsvSchema} * with changed column * * @since 2.18 */ public CsvSchema withColumn(int columnIndex, UnaryOperator transformer) { if (columnIndex < 0 || columnIndex >= size()) { throw new IllegalArgumentException("Illegal index "+columnIndex+"; `CsvSchema` has "+size()+" columns"); } Column old = _columns[columnIndex]; Column newColumn = transformer.apply(old); if (newColumn == old) { return this; } return _withColumn(old.getIndex(), newColumn); } /** * @since 2.18 */ protected CsvSchema _withColumn(int ix, Column toReplace) { Objects.requireNonNull(toReplace); if (ix < 0 || ix >= size()) { throw new IllegalArgumentException("Illegal index for column '"+toReplace.getName()+"': " +ix+" (column count: "+size()+")"); } return rebuild() .replaceColumn(ix, toReplace) .build(); } /** * @since 2.7 */ public CsvSchema withAnyPropertyName(String name) { return new CsvSchema(_columns, _features, _columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator, _nullValue, _columnsByName, name); } /** * Mutant factory method that will construct a new instance in which columns * are sorted based on names given as argument. Columns not listed in argument * will be sorted after those within list, using existing ordering. *

    * For example, schema that has columns: *

    "a", "d", "c", "b"
         *
    * ordered with schema.sortedBy("a", "b"); * would result instance that columns in order: *
    "a", "b", "d", "c"
         *
    * * @since 2.4 */ public CsvSchema sortedBy(String... columnNames) { LinkedHashMap map = new LinkedHashMap<>(); for (String colName : columnNames) { Column col = _columnsByName.get(colName); if (col != null) { map.put(col.getName(), col); } } for (Column col : _columns) { map.put(col.getName(), col); } return new CsvSchema(this, map.values().toArray(new Column[map.size()])); } /** * Mutant factory method that will construct a new instance in which columns * are sorted using given {@link Comparator} over column names. * * @since 2.4 */ public CsvSchema sortedBy(Comparator cmp) { TreeMap map = new TreeMap<>(cmp); for (Column col : _columns) { map.put(col.getName(), col); } return new CsvSchema(this, map.values().toArray(new Column[map.size()])); } /* /********************************************************************** /* Public API, FormatSchema /********************************************************************** */ @Override public String getSchemaType() { return "CSV"; } /* /********************************************************************** /* Public API, extended, properties /********************************************************************** */ public boolean usesHeader() { return (_features & ENCODING_FEATURE_USE_HEADER) != 0; } public boolean reordersColumns() { return (_features & ENCODING_FEATURE_REORDER_COLUMNS) != 0; } public boolean skipsFirstDataRow() { return (_features & ENCODING_FEATURE_SKIP_FIRST_DATA_ROW) != 0; } public boolean allowsComments() { return (_features & ENCODING_FEATURE_ALLOW_COMMENTS) != 0; } public boolean strictHeaders() { return (_features & ENCODING_FEATURE_STRICT_HEADERS) != 0; } public char getColumnSeparator() { return _columnSeparator; } public String getArrayElementSeparator() { return _arrayElementSeparator; } public int getQuoteChar() { return _quoteChar; } public int getEscapeChar() { return _escapeChar; } public char[] getLineSeparator() { return _lineSeparator; } /** * @return Null value defined, as char array, if one is defined to be recognized; Java null * if not. * * @since 2.5 */ public char[] getNullValue() { return _nullValue; } /** * Same as {@link #getNullValue()} except that undefined null value (one that remains as null, * or explicitly set as such) will be returned as empty char[] * * @since 2.6 */ public char[] getNullValueOrEmpty() { if (_nullValue == null) { return NO_CHARS; } return _nullValue; } /** * @since 2.6 */ public String getNullValueString() { String str = _nullValueAsString; if (str == null) { if (_nullValue == null) { return null; } str = (_nullValue.length == 0) ? "" : new String(_nullValue); _nullValueAsString = str; } return str; } public boolean usesQuoteChar() { return _quoteChar >= 0; } public boolean usesEscapeChar() { return _escapeChar >= 0; } /** * @since 2.5 */ public boolean hasArrayElementSeparator() { return !_arrayElementSeparator.isEmpty(); } /** * @since 2.7 */ public String getAnyPropertyName() { return _anyPropertyName; } /* /********************************************************************** /* Public API, extended; column access /********************************************************************** */ @Override public Iterator iterator() { return Arrays.asList(_columns).iterator(); } /** * Accessor for finding out how many columns this schema defines. * * @return Number of columns this schema defines */ public int size() { return _columns.length; } /** * Accessor for column at specified index (0-based); index having to be within *
         *    0 <= index < size()
         *
    */ public Column column(int index) { return _columns[index]; } /** * Method for finding index of a named column within this schema. * * @param name Name of column to find * @return Index of the specified column, if one exists; {@code -1} if not * * @since 2.18 */ public int columnIndex(String name) { Column col = column(name); return (col == null) ? -1 : col.getIndex(); } /** * @since 2.6 */ public String columnName(int index) { return _columns[index].getName(); } public Column column(String name) { return _columnsByName.get(name); } /** * Optimized variant where a hint is given as to likely index of the column * name. * * @since 2.6 */ public Column column(String name, int probableIndex) { if (probableIndex < _columns.length) { Column col = _columns[probableIndex]; if (col.hasName(name)) { return col; } } return _columnsByName.get(name); } /** * Accessor for getting names of included columns, in the order they are * included in the schema. * * @since 2.14 */ public List getColumnNames() { return (List) getColumnNames(new ArrayList(_columns.length)); } /** * Accessor for getting names of included columns, added in given * {@code Collection}. * * @since 2.14 */ public Collection getColumnNames(Collection names) { final int len = _columns.length; for (int i = 0; i < len; ++i) { names.add(_columns[i].getName()); } return names; } /** * Method for getting description of column definitions in * developer-readable form */ public String getColumnDesc() { StringBuilder sb = new StringBuilder(100); sb.append('['); for (Column col : _columns) { if (sb.length() > 1) { sb.append(','); } sb.append('"'); sb.append(col.getName()); sb.append('"'); } sb.append(']'); return sb.toString(); } /* /********************************************************************** /* Other overrides /********************************************************************** */ @Override public String toString() { StringBuilder sb = new StringBuilder(150); sb.append("[CsvSchema: ") .append("columns=["); boolean first = true; for (Column col : _columns) { if (first) { first = false; } else { sb.append(','); } sb.append('"'); sb.append(col.getName()); sb.append("\"/"); sb.append(col.getType()); } sb.append(']'); sb.append(", header? ").append(usesHeader()); sb.append(", skipFirst? ").append(skipsFirstDataRow()); sb.append(", comments? ").append(allowsComments()); sb.append(", any-properties? "); String anyProp = getAnyPropertyName(); if (anyProp == null) { sb.append("N/A"); } else { sb.append("as '").append(anyProp).append("'"); } sb.append(']'); return sb.toString(); } /* /********************************************************************** /* Helper methods /********************************************************************** */ protected static String _validArrayElementSeparator(String sep) { if (sep == null || sep.isEmpty()) { return NO_ARRAY_ELEMENT_SEPARATOR; } return sep; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy