All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.metamodel.csv.CsvConfiguration Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.metamodel.csv;

import java.io.Serializable;
import java.util.List;

import org.apache.metamodel.schema.naming.ColumnNamingStrategies;
import org.apache.metamodel.schema.naming.ColumnNamingStrategy;
import org.apache.metamodel.util.BaseObject;
import org.apache.metamodel.util.FileHelper;

/**
 * Represents the configuration for reading/parsing CSV files.
 */
public final class CsvConfiguration extends BaseObject implements Serializable {

    private static final long serialVersionUID = 1L;

    /**
     * The value is '\\uFFFF', the "not a character" value which should not
     * occur in any valid Unicode string. This special char can be used to
     * disable either quote chars or escape chars.
     */
    public static final char NOT_A_CHAR = '\uFFFF';
    public static final int NO_COLUMN_NAME_LINE = 0;
    public static final int DEFAULT_COLUMN_NAME_LINE = 1;
    public static final char DEFAULT_SEPARATOR_CHAR = ',';
    public static final char DEFAULT_QUOTE_CHAR = '"';
    public static final char DEFAULT_ESCAPE_CHAR = '\\';

    private final int columnNameLineNumber;
    private final String encoding;
    private final char separatorChar;
    private final char quoteChar;
    private final char escapeChar;
    private final boolean failOnInconsistentRowLength;
    private final boolean multilineValues;
    private final ColumnNamingStrategy columnNamingStrategy;

    public CsvConfiguration() {
        this(DEFAULT_COLUMN_NAME_LINE);
    }

    public CsvConfiguration(int columnNameLineNumber) {
        this(columnNameLineNumber, FileHelper.DEFAULT_ENCODING, DEFAULT_SEPARATOR_CHAR, DEFAULT_QUOTE_CHAR,
                DEFAULT_ESCAPE_CHAR);
    }

    public CsvConfiguration(int columnNameLineNumber, boolean failOnInconsistentRowLength, boolean multilineValues) {
        this(columnNameLineNumber, FileHelper.DEFAULT_ENCODING, DEFAULT_SEPARATOR_CHAR, DEFAULT_QUOTE_CHAR,
                DEFAULT_ESCAPE_CHAR, failOnInconsistentRowLength, multilineValues);
    }

    public CsvConfiguration(int columnNameLineNumber, String encoding, char separatorChar, char quoteChar,
            char escapeChar) {
        this(columnNameLineNumber, encoding, separatorChar, quoteChar, escapeChar, false);
    }

    public CsvConfiguration(int columnNameLineNumber, String encoding, char separatorChar, char quoteChar,
            char escapeChar, boolean failOnInconsistentRowLength) {
        this(columnNameLineNumber, encoding, separatorChar, quoteChar, escapeChar, failOnInconsistentRowLength, true);
    }
    
    public CsvConfiguration(int columnNameLineNumber, String encoding, char separatorChar, char quoteChar,
            char escapeChar, boolean failOnInconsistentRowLength, boolean multilineValues) {
        this(columnNameLineNumber, null, encoding, separatorChar, quoteChar, escapeChar, failOnInconsistentRowLength,
                multilineValues);
    }

    public CsvConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy, String encoding,
            char separatorChar, char quoteChar, char escapeChar, boolean failOnInconsistentRowLength,
            boolean multilineValues) {
        this.columnNameLineNumber = columnNameLineNumber;
        this.encoding = encoding;
        this.separatorChar = separatorChar;
        this.quoteChar = quoteChar;
        this.escapeChar = escapeChar;
        this.failOnInconsistentRowLength = failOnInconsistentRowLength;
        this.multilineValues = multilineValues;
        this.columnNamingStrategy = columnNamingStrategy;
    }
    
    /**
     * Gets a {@link ColumnNamingStrategy} to use if needed.
     * @return
     */
    public ColumnNamingStrategy getColumnNamingStrategy() {
        if (columnNamingStrategy == null) {
            return ColumnNamingStrategies.defaultStrategy();
        }
        return columnNamingStrategy;
    }

    /**
     * Determines whether to fail (by throwing an
     * {@link InconsistentRowLengthException}) if a line in the CSV file has
     * inconsistent amounts of columns.
     * 
     * If set to false (default) MetaModel will gracefully fill in missing null
     * values in or ignore additional values in a line.
     * 
     * @return a boolean indicating whether to fail or gracefully compensate for
     *         inconsistent lines in the CSV files.
     */
    public boolean isFailOnInconsistentRowLength() {
        return failOnInconsistentRowLength;
    }

    /**
     * Determines whether the CSV files read using this configuration should be
     * allowed to have multiline values in them.
     * 
     * @return
     */
    public boolean isMultilineValues() {
        return multilineValues;
    }

    /**
     * The line number (1 based) from which to get the names of the columns.
     * 
     * @return the line number (1 based)
     */
    public int getColumnNameLineNumber() {
        return columnNameLineNumber;
    }

    /**
     * Gets the file encoding to use for reading the file.
     * 
     * @return the text encoding of the file.
     */
    public String getEncoding() {
        return encoding;
    }

    /**
     * Gets the separator char (typically comma or semicolon) for separating
     * values.
     * 
     * @return the separator char
     */
    public char getSeparatorChar() {
        return separatorChar;
    }

    /**
     * Gets the quote char, used for encapsulating values.
     * 
     * @return the quote char
     */
    public char getQuoteChar() {
        return quoteChar;
    }

    /**
     * Gets the escape char, used for escaping eg. quote chars inside values.
     * 
     * @return the escape char
     */
    public char getEscapeChar() {
        return escapeChar;
    }

    @Override
    protected void decorateIdentity(List identifiers) {
        identifiers.add(columnNameLineNumber);
        identifiers.add(encoding);
        identifiers.add(separatorChar);
        identifiers.add(quoteChar);
        identifiers.add(escapeChar);
        identifiers.add(failOnInconsistentRowLength);
    }

    @Override
    public String toString() {
        return "CsvConfiguration[columnNameLineNumber=" + columnNameLineNumber + ", encoding=" + encoding
                + ", separatorChar=" + separatorChar + ", quoteChar=" + quoteChar + ", escapeChar=" + escapeChar
                + ", failOnInconsistentRowLength=" + failOnInconsistentRowLength + "]";
    }
}