
com.fasterxml.jackson.dataformat.csv.CsvSchema Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jackson-dataformat-csv Show documentation
Show all versions of jackson-dataformat-csv Show documentation
Support for reading and writing CSV-encoded data via Jackson
abstractions.
The newest version!
package com.fasterxml.jackson.dataformat.csv;
import java.util.*;
import org.codehaus.jackson.FormatSchema;
/**
* Simple {@link FormatSchema} sub-type that defines properties of
* a CSV document to read or write.
* Properties supported currently are:
*
* - columns (List of ColumnDef) [default: empty List]: Ordered list of columns (which may be empty, see below).
* Each column has name (mandatory) as well as type (optional; if not
* defined, defaults to "String").
* Note that
*
* - useHeader (boolean) [default: false]: whether the first line of physical document defines
* column names (true) or not (false): if enabled, parser will take
* first-line values to define column names; and generator will output
* column names as the first line
*
* - quoteChar (char) [default: double-quote ('")]: character used for quoting values
* that contain quote characters or linefeeds.
*
* - columnSeparator (char) [default: comma (',')]: character used to separate values.
* Other commonly used values include tab ('\t') and pipe ('|')
*
* - lineSeparator (String) [default: "\n"]: character used to separate data rows.
* Only used by generator; parser accepts three standard linefeeds ("\r", "\r\n", "\n").
*
* - escapeChar (int) [default: -1 meaning "none"]: character, if any, used to
* escape values. Most commonly defined as backslash ('\'). Only used by parser;
* generator only uses quoting, including doubling up of quotes to indicate quote char
* itself.
*
* - skipFirstDataRow (boolean) [default: false]: whether the first data line (either
* first line of the document, if useHeader=false, or second, if useHeader=true)
* should be completely ignored by parser. Needed to support CSV-like file formats
* that include additional non-data content before real data begins (specifically
* some database dumps do this)
*
*
*
* Note that schemas without any columns are legal, but if no columns
* are added, behavior of parser/generator is usually different and
* content will be exposed as logical Arrays instead of Objects.
*
* There are 4 ways to create CsvSchema
instances:
*
* - Manually build one, using {@link Builder}
*
* - Modify existing schema (using
withXxx
methods
* or {@link #rebuild} for creating {@link Builder})
*
* - Create schema based on a POJO definition (Class), using
* {@link CsvMapper} methods like {@link CsvMapper#schemaFor(java.lang.Class)}.
*
* - Request that {@link CsvParser} reads schema from the first line:
* enable "useHeader" property for the initial schema, and let parser
* read column names from the document itself.
*
*
*
* @since 1.9
*/
public class CsvSchema
implements FormatSchema,
Iterable
{
/*
/**********************************************************************
/* Constants
/**********************************************************************
*/
protected final static Column[] NO_COLUMNS = new Column[0];
public final static char DEFAULT_COLUMN_SEPARATOR = ',';
public final static char DEFAULT_QUOTE_CHAR = '"';
/**
* By default, no escape character is used -- this is denoted by
* int value that does not map to a valid character
*/
public final static int DEFAULT_ESCAPE_CHAR = -1;
public final static char[] DEFAULT_LINEFEED = "\n".toCharArray();
/**
* By default we do NOT expect the first line to be header.
*/
public final static boolean DEFAULT_USE_HEADER = false;
public final static boolean DEFAULT_SKIP_FIRST_DATA_ROW = false;
/*
/**********************************************************************
/* Helper classes
/**********************************************************************
*/
/**
* Enumeration that defines optional type indicators that can be passed
* with schema. If used type is used to determine type of {@link JsonToken}
* that column values are exposed as.
*/
public enum ColumnType
{
/**
* Default type if not explicitly defined; value will
* be presented as VALUE_STRING
by parser,
* that is, no type-inference is performed, and value is
* not trimmed.
*/
STRING,
/**
* Value is considered to be a String, except that tokens
* "null", "true" and "false" are recognized as matching
* tokens and reported as such;
* and values are trimmed (leading/trailing white space)
*/
STRING_OR_LITERAL,
/**
* Value should be a number, but literals "null", "true" and "false"
* are also understood, and an empty String is considered null.
* Values are also trimmed (leading/trailing white space)
* Other non-numeric Strings will cause parsing exception.
*/
NUMBER,
/**
* Value is taken to be a number (if it matches valid JSON number
* formatting rules), literal (null, true or false) or String,
* depending on best match.
* Values are also trimmed (leading/trailing white space)
*/
NUMBER_OR_STRING
;
}
public static class Column
{
private final String _name;
private final int _index;
private final ColumnType _type;
public Column(int index, String name) {
this(index, name, ColumnType.STRING);
}
public Column(int index, String name, ColumnType type)
{
_index = index;
_name = name;
_type = type;
}
public Column withName(String newName) {
return new Column(_index, newName, _type);
}
public Column withType(ColumnType newType) {
return new Column(_index, _name, newType);
}
public int getIndex() { return _index; }
public String getName() { return _name; }
public ColumnType getType() { return _type; }
}
/**
* Class used for building {@link CsvSchema} instances.
*/
public static class Builder
{
protected final ArrayList _columns = new ArrayList();
protected boolean _useHeader = DEFAULT_USE_HEADER;
protected boolean _skipFirstDataRow = DEFAULT_SKIP_FIRST_DATA_ROW;
protected char _columnSeparator = DEFAULT_COLUMN_SEPARATOR;
protected char _quoteChar = DEFAULT_QUOTE_CHAR;
// note: need to use int to allow -1 for 'none'
protected int _escapeChar = DEFAULT_QUOTE_CHAR;
protected char[] _lineSeparator = DEFAULT_LINEFEED;
public Builder() { }
/**
* "Copy" constructor which creates builder that has settings of
* given source schema
*/
public Builder(CsvSchema src)
{
for (Column col : src._columns) {
_columns.add(col);
}
_useHeader = src._useHeader;
_columnSeparator = src._columnSeparator;
_quoteChar = src._quoteChar;
_escapeChar = src._escapeChar;
_lineSeparator = src._lineSeparator;
_skipFirstDataRow = src._skipFirstDataRow;
}
public Builder addColumn(String name) {
int index = _columns.size();
return addColumn(new Column(index, name));
}
public Builder addColumn(String name, ColumnType type) {
int index = _columns.size();
return addColumn(new Column(index, name, type));
}
public Builder addColumn(Column c) {
_columns.add(c);
return this;
}
public void replaceColumn(int index, Column c) {
_checkIndex(index);
_columns.set(index, c);
}
public void renameColumn(int index, String newName) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withName(newName));
}
public void setColumnType(int index, ColumnType type) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withType(type));
}
public Builder clearColumns() {
_columns.clear();
return this;
}
public int size() {
return _columns.size();
}
public Iterator getColumns() {
return _columns.iterator();
}
/**
* Method for specifying whether Schema should indicate that
* a header line (first row that contains column names) is to be
* used for reading and writing or not.
*/
public Builder setUseHeader(boolean b) {
_useHeader = b;
return this;
}
public Builder setSkipFirstDataRow(boolean b) {
_skipFirstDataRow = b;
return this;
}
/**
* Method for specifying character used to separate column
* values.
* Default is comma (',').
*/
public Builder setColumnSeparator(char c) {
_columnSeparator = c;
return this;
}
/**
* Method for specifying character used for optional quoting
* of values.
* Default is double-quote ('"').
*/
public Builder setQuoteChar(char c) {
_quoteChar = c;
return this;
}
/**
* Method for specifying character used for optional escaping
* of characters in quoted String values.
* Default is "not used", meaning that no escaping used.
*/
public Builder setEscapeChar(char c) {
_escapeChar = (int) c;
return this;
}
/**
* Method for specifying that no escape character is to be used
* with CSV documents this schema defines.
*/
public Builder disableEscapeChar() {
_escapeChar = -1;
return this;
}
public Builder setLineSeparator(String lf) {
_lineSeparator = lf.toCharArray();
return this;
}
public Builder setLineSeparator(char lf) {
_lineSeparator = new char[] { lf };
return this;
}
public CsvSchema build()
{
Column[] cols = _columns.toArray(new Column[_columns.size()]);
return new CsvSchema(cols,
_useHeader, _skipFirstDataRow,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator);
}
protected void _checkIndex(int index) {
if (index < 0 || index >= _columns.size()) {
throw new IllegalArgumentException("Illegal index "+index+"; only got "+_columns.size()+" columns");
}
}
}
/*
/**********************************************************************
/* Configuration, construction
/**********************************************************************
*/
/**
* Column definitions, needed for optional header and/or mapping
* of field names to column positions.
*/
protected final Column[] _columns;
protected final Map _columnsByName;
protected final boolean _useHeader;
protected final boolean _skipFirstDataRow;
protected final char _columnSeparator;
protected final char _quoteChar;
protected final int _escapeChar;
protected final char[] _lineSeparator;
public CsvSchema(Column[] columns,
boolean useHeader, boolean skipFirstDataRow,
char columnSeparator, char quoteChar, int escapeChar,
char[] lineSeparator)
{
if (columns == null) {
columns = NO_COLUMNS;
}
_columns = columns;
_useHeader = useHeader;
_skipFirstDataRow = skipFirstDataRow;
_columnSeparator = columnSeparator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
_lineSeparator = lineSeparator;
// and then we may need to create a mapping
if (_columns.length == 0) {
_columnsByName = Collections.emptyMap();
} else {
_columnsByName = new HashMap(4 + _columns.length);
for (Column c : _columns) {
_columnsByName.put(c.getName(), c);
}
}
}
/**
* Copy constructor used for creating variants using
* withXxx()
methods.
*/
protected CsvSchema(Column[] columns,
boolean useHeader, boolean skipFirstDataRow,
char columnSeparator, char quoteChar, int escapeChar,
char[] lineSeparator,
Map columnsByName)
{
_columns = columns;
_useHeader = useHeader;
_skipFirstDataRow = skipFirstDataRow;
_columnSeparator = columnSeparator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
_lineSeparator = lineSeparator;
_columnsByName = columnsByName;
}
public static Builder builder() {
return new Builder();
}
/**
* Accessor for creating a "default" CSV schema instance, with following
* settings:
*
* - Does NOT use header line
*
* - Uses double quotes ('"') for quoting of field values (if necessary)
*
* - Uses comma (',') as the field separator
*
* - Uses Unix linefeed ('\n') as row separator
*
* - Does NOT use any escape characters
*
* - Does NOT have any columns defined
*
*
*/
public static CsvSchema emptySchema() {
return builder().build();
}
/**
* Helper method for constructing Builder that can be used to create modified
* schema.
*/
public Builder rebuild() {
return new Builder(this);
}
public CsvSchema withUseHeader(boolean state) {
return (_useHeader == state) ? this
: new CsvSchema(_columns, state, _skipFirstDataRow,
_columnSeparator, _quoteChar,
_escapeChar, _lineSeparator, _columnsByName);
}
/**
* Helper method for construcing and returning schema instance that
* is similar to this one, except that it will be using header line.
*/
public CsvSchema withHeader() {
return withUseHeader(true);
}
/**
* Helper method for construcing and returning schema instance that
* is similar to this one, except that it will not be using header line.
*/
public CsvSchema withoutHeader() {
return withUseHeader(false);
}
public CsvSchema withSkipFirstDataRow(boolean state) {
return (_skipFirstDataRow == state) ? this
: new CsvSchema(_columns, _useHeader, state,
_columnSeparator, _quoteChar,
_escapeChar, _lineSeparator, _columnsByName);
}
public CsvSchema withColumnSeparator(char sep) {
return (_columnSeparator == sep) ? this :
new CsvSchema(_columns, _useHeader, _skipFirstDataRow,
sep, _quoteChar, _escapeChar, _lineSeparator, _columnsByName);
}
public CsvSchema withQuoteChar(char c) {
return (_quoteChar == c) ? this :
new CsvSchema(_columns, _useHeader, _skipFirstDataRow,
_columnSeparator, c, _escapeChar, _lineSeparator, _columnsByName);
}
public CsvSchema withEscapeChar(char c) {
return (_escapeChar == c) ? this
: new CsvSchema(_columns, _useHeader, _skipFirstDataRow,
_columnSeparator, _quoteChar, c, _lineSeparator, _columnsByName);
}
public CsvSchema withoutEscapeChar() {
return (_escapeChar == -1) ? this
: new CsvSchema(_columns, _useHeader, _skipFirstDataRow,
_columnSeparator, _quoteChar, -1, _lineSeparator, _columnsByName);
}
public CsvSchema withLineSeparator(String sep) {
return new CsvSchema(_columns, _useHeader, _skipFirstDataRow,
_columnSeparator, _quoteChar, _escapeChar, sep.toCharArray(), _columnsByName);
}
public CsvSchema withoutColumns() {
return new CsvSchema(NO_COLUMNS, _useHeader, _skipFirstDataRow,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _columnsByName);
}
/*
/**********************************************************************
/* Public API, FormatSchema
/**********************************************************************
*/
@Override
public String getSchemaType() {
return "CSV";
}
/*
/**********************************************************************
/* Public API, extended, properties
/**********************************************************************
*/
public boolean useHeader() { return _useHeader; }
public boolean skipFirstDataRow() { return _skipFirstDataRow; }
public char getColumnSeparator() { return _columnSeparator; }
public char getQuoteChar() { return _quoteChar; }
public int getEscapeChar() { return _escapeChar; }
public char[] getLineSeparator() { return _lineSeparator; }
/*
/**********************************************************************
/* Public API, extended; column access
/**********************************************************************
*/
@Override
public Iterator iterator() {
return Arrays.asList(_columns).iterator();
}
public int size() { return _columns.length; }
public Column column(int index) {
return _columns[index];
}
public Column column(String name) {
return _columnsByName.get(name);
}
/**
* Method for getting description of column definitions in
* developer-readable form
*/
public String getColumnDesc()
{
StringBuilder sb = new StringBuilder(100);
for (Column col : _columns) {
if (sb.length() == 0) {
sb.append('[');
} else {
sb.append(',');
}
sb.append('"');
sb.append(col.getName());
sb.append('"');
}
sb.append(']');
return sb.toString();
}
/*
/**********************************************************************
/* Other
/**********************************************************************
*/
@Override
public String toString()
{
StringBuilder sb = new StringBuilder(150);
sb.append("[CsvSchema: ")
.append("columns=");
boolean first = true;
for (Column col : _columns) {
if (first) {
first = false;
sb.append('[');
} else {
sb.append(',');
}
sb.append('"');
sb.append(col.getName());
sb.append("\"/");
sb.append(col.getType());
}
sb.append(']');
sb.append(']');
return sb.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy