All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.univocity.parsers.common.Format Maven / Gradle / Ivy

Go to download

univocity's open source parsers for processing different text formats using a consistent API

There is a newer version: 2.9.1
Show newest version
/*******************************************************************************
 * Copyright 2014 uniVocity Software Pty Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 ******************************************************************************/
package com.univocity.parsers.common;

import java.util.Map.Entry;
import java.util.*;

/**
 * This is the parent class for all configuration classes that define a text format.
 *
 * 

By default, all parsers and writers have to handle, at least, the following format definitions: * *

    *
  • lineSeparator: the 1-2 character sequence that indicates the end of a line. Newline sequences are different across operating systems. Typically: *
      *
    • Windows uses carriage return and line feed: \r\n
    • *
    • Linux/Unix uses line feed only: \n
    • *
    • MacOS uses carriage return only: \r
    • *
    * {@link Format#lineSeparator} defaults to the system line separator *
  • *
  • normalizedNewline: a single character used to represent the end of a line uniformly in any parsed content. It has the following implications: *
      *
    • When reading a text-based input, the sequence of characters defined in {@link Format#lineSeparator} will be replaced by this character.
    • *
    • When writing to a text-based output, this character will be replaced by the sequence of characters defined in {@link Format#lineSeparator}.
    • *
    *

    {@link Format#normalizedNewline} defaults to '\n'. *

  • *
  • comment:a character that, if found in the beginning of a line of text, represents comment in any text-based input supported by uniVocity-parsers. *

    {@link Format#comment} defaults to '#'.

  • *
* * @see com.univocity.parsers.csv.CsvFormat * @see com.univocity.parsers.fixed.FixedWidthFormat * * @author uniVocity Software Pty Ltd - [email protected] */ public abstract class Format { private static final String systemLineSeparatorString; private static final char[] systemLineSeparator; static { String lineSeparator = System.getProperty("line.separator"); if (lineSeparator == null) { systemLineSeparatorString = "\n"; } else { systemLineSeparatorString = lineSeparator; } systemLineSeparator = systemLineSeparatorString.toCharArray(); } private String lineSeparatorString; private char[] lineSeparator; private char normalizedNewline = '\n'; private char comment = '#'; protected Format() { this.lineSeparator = systemLineSeparator.clone(); this.lineSeparatorString = systemLineSeparatorString; } /** * Returns the current line separator character sequence, which can contain 1 to 2 characters. Defaults to the system's line separator sequence (usually '\r\n' in Windows, '\r' in MacOS, and '\n' in Linux/Unix). * @return the sequence of 1 to 2 characters that identifies the end of a line */ public char[] getLineSeparator() { return lineSeparator.clone(); } /** * Returns the current line separator sequence as a String of 1 to 2 characters. Defaults to the system's line separator sequence (usually "\r\n" in Windows, "\r" in MacOS, and "\n" in Linux/Unix). * @return the sequence of 1 to 2 characters that identifies the end of a line */ public String getLineSeparatorString() { return lineSeparatorString; } /** * Defines the line separator sequence that should be used for parsing and writing. * @param lineSeparator a sequence of 1 to 2 characters that identifies the end of a line */ public void setLineSeparator(String lineSeparator) { if (lineSeparator == null || lineSeparator.isEmpty()) { throw new IllegalArgumentException("Line separator cannot be empty"); } setLineSeparator(lineSeparator.toCharArray()); } /** * Defines the line separator sequence that should be used for parsing and writing. * @param lineSeparator a sequence of 1 to 2 characters that identifies the end of a line */ public void setLineSeparator(char[] lineSeparator) { if (lineSeparator == null || lineSeparator.length == 0) { throw new IllegalArgumentException("Invalid line separator. Expected 1 to 2 characters"); } if (lineSeparator.length > 2) { throw new IllegalArgumentException("Invalid line separator. Up to 2 characters are expected. Got " + lineSeparator.length + " characters."); } this.lineSeparator = lineSeparator; this.lineSeparatorString = new String(lineSeparator); } /** * Returns the normalized newline character, which is automatically replaced by {@link Format#lineSeparator} when reading/writing. Defaults to '\n'. * @return the normalized newline character */ public char getNormalizedNewline() { return normalizedNewline; } /** * Sets the normalized newline character, which is automatically replaced by {@link Format#lineSeparator} when reading/writing * @param normalizedNewline a single character used to represent a line separator. */ public void setNormalizedNewline(char normalizedNewline) { this.normalizedNewline = normalizedNewline; } /** * Compares the given character against the {@link Format#normalizedNewline} character. * @param ch the character to be verified * @return true if the given character is the normalized newline character, false otherwise */ public boolean isNewLine(char ch) { return this.normalizedNewline == ch; } /** * Returns the character that represents a line comment. Defaults to '#' *

Set it to '\0' to disable comment skipping. * @return the comment character */ public char getComment() { return comment; } /** * Defines the character that represents a line comment when found in the beginning of a line of text. Defaults to '#' *

Use '\0' to disable comment skipping. * @param comment the comment character */ public void setComment(char comment) { this.comment = comment; } /** * Identifies whether or not a given character represents a comment * @param ch the character to be verified * @return true if the given character is the comment character, false otherwise */ public boolean isComment(char ch) { return this.comment == ch; } private String getFormattedValue(Object value) { if (value instanceof Character) { char ch = (Character) value; switch (ch) { case '\n': return "\\n"; case '\r': return "\\r"; case '\t': return "\\t"; case '\0': return "\\0"; default: return value.toString(); } } if (value instanceof String) { String s = (String) value; StringBuilder tmp = new StringBuilder(); for (int i = 0; i < s.length(); i++) { tmp.append(getFormattedValue(s.charAt(i))); } value = tmp.toString(); } if (String.valueOf(value).trim().isEmpty()) { return "'" + value + "'"; } return String.valueOf(value); } @Override public final String toString() { StringBuilder out = new StringBuilder(); out.append(getClass().getSimpleName()).append(':'); TreeMap config = getConfiguration(); config.put("Comment character", comment); config.put("Line separator sequence", lineSeparatorString); config.put("Line separator (normalized)", normalizedNewline); for (Entry e : config.entrySet()) { out.append("\n\t\t"); out.append(e.getKey()).append('=').append(getFormattedValue(e.getValue())); } return out.toString(); } protected abstract TreeMap getConfiguration(); }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy