
com.univocity.api.entity.text.TextFormat Maven / Gradle / Ivy
Show all versions of univocity-api Show documentation
/*******************************************************************************
* Copyright (c) 2014 uniVocity Software Pty Ltd. All rights reserved.
* This file is subject to the terms and conditions defined in file
* 'LICENSE.txt', which is part of this source code package.
******************************************************************************/
package com.univocity.api.entity.text;
import java.util.*;
import com.univocity.api.common.*;
import com.univocity.api.entity.*;
/**
* This is the parent class for all configuration classes that define a file format in uniVocity.
*
* By default, all plain-text based entities in uniVocity require the following format definitions:
*
*
* - lineSeparator: the 1-2 character sequence that indicates the end of a line. Newline sequences are different across operating systems. Typically:
*
* - Windows uses carriage return and line feed: \r\n
* - Linux/Unix uses line feed only: \n
* - MacOS uses carriage return only: \r
*
* {@link #lineSeparator} defaults to the system line separator.
*
* - normalizedNewLine: a single character used to represent the end of a line uniformly in any parsed content. It has the following implications:
*
* - When reading a text-based input, the sequence of characters defined in {@link #lineSeparator} will be replaced by this character.
* - When writing to a text-based output, this character will be replaced by the sequence of characters defined in {@link #lineSeparator}.
*
* {@link #normalizedNewline} defaults to '\n'.
*
* - comment: a character that, if found in the beginning of a line of text, represents comment in any text-based input supported by uniVocity.
*
{@link #comment} defaults to '#'.
*
*
* @see com.univocity.api.entity.text.csv.CsvFormat
* @see com.univocity.api.entity.text.fixed.FixedWidthFormat
* @see com.univocity.api.entity.text.TextEntityConfiguration
*
* @author uniVocity Software Pty Ltd - [email protected]
*
*/
public class TextFormat extends Configuration {
private static final char[] systemLineSeparator;
static {
String systemLineSeparatorString = System.getProperty("line.separator");
if (systemLineSeparatorString == null) {
systemLineSeparatorString = "\n";
}
systemLineSeparator = systemLineSeparatorString.toCharArray();
}
private char[] lineSeparator;
private Character normalizedNewline;
private Character comment;
protected TextFormat() {
}
/**
* Returns the current line separator character sequence, which can contain 1 to 2 characters. Defaults to the system's line separator sequence (usually '\r\n' in Windows, '\r' in MacOS, and '\n' in Linux/Unix).
* @return the sequence of 1 to 2 characters that identifies the end of a line
*/
public final char[] getLineSeparator() {
if (lineSeparator == null) {
return systemLineSeparator.clone();
}
return lineSeparator.clone();
}
/**
* Returns the current line separator sequence as a String of 1 to 2 characters. Defaults to the system's line separator sequence (usually "\r\n" in Windows, "\r" in MacOS, and "\n" in Linux/Unix).
* @return the sequence of 1 to 2 characters that identifies the end of a line
*/
public final String getLineSeparatorString() {
return new String(getLineSeparator());
}
/**
* Identifies whether a given character sequence matches the {@link #lineSeparator} sequence.
* @param string the character sequence to be matched
* @return {@code true} if the given character sequence matches the {@link #lineSeparator}, otherwise {@code false}
*/
public final boolean isLineSeparator(String string) {
return getLineSeparatorString().equals(string);
}
/**
* Identifies whether a given character sequence matches the {@link #lineSeparator} sequence.
* @param chars the character sequence to be matched
* @return {@code true} if the given character sequence matches the {@link #lineSeparator}, otherwise {@code false}
*/
public final boolean isLineSeparator(char[] chars) {
if (chars == null) {
return false;
}
return Arrays.equals(getLineSeparator(), chars);
}
/**
* Defines the line separator sequence that should be used for parsing and writing.
* @param lineSeparator a sequence of 1 to 2 characters that identifies the end of a line
*/
public final void setLineSeparator(String lineSeparator) {
Args.notEmpty(this.lineSeparator, "Line separator");
setLineSeparator(lineSeparator.toCharArray());
}
/**
* Defines the line separator sequence that should be used for parsing and writing.
* @param lineSeparator a sequence of 1 to 2 characters that identifies the end of a line
*/
public final void setLineSeparator(char[] lineSeparator) {
Args.notEmpty(this.lineSeparator, "Line separator");
if (lineSeparator.length > 2) {
throw new IllegalArgumentException("Invalid line separator. Up to 2 characters are expected. Got " + lineSeparator.length + " characters.");
}
this.lineSeparator = lineSeparator;
}
/**
* Returns the normalized newline character, which is automatically replaced by {@link #lineSeparator} when reading/writing. Defaults to '\n'.
* @return the normalized newline character
*/
public final char getNormalizedNewline() {
if (normalizedNewline == null) {
return '\n';
}
return normalizedNewline;
}
/**
* Sets the normalized newline character, which is automatically replaced by {@link #lineSeparator} when reading/writing
* @param normalizedNewline a single character used to represent a line separator.
*/
public final void setNormalizedNewline(char normalizedNewline) {
this.normalizedNewline = normalizedNewline;
}
/**
* Compares the given character against the {@link #normalizedNewline} character.
* @param ch the character to be verified
* @return {@code true} if the given character is the normalized newline character, otherwise {@code false}
*/
public final boolean isNormalizedNewLine(char ch) {
return this.getNormalizedNewline() == ch;
}
/**
* Returns the character that represents a line comment. Defaults to '#'.
* Set it to '\0' to disable comment skipping.
* @return the comment character
*/
public final char getComment() {
if (comment == null) {
return '#';
}
return comment;
}
/**
* Defines the character that represents a line comment when found in the beginning of a line of text. Defaults to '#'
*
Use '\0' to disable comment skipping.
* @param comment the comment character
*/
public void setComment(char comment) {
this.comment = comment;
}
/**
* Identifies whether a given character represents a comment.
* @param ch the character to be verified
* @return {@code true} if the given character is the comment character, otherwise {@code false}
*/
public boolean isComment(char ch) {
return this.getComment() == ch;
}
/**
* {@inheritDoc}
*/
@Override
protected void copyDefaultsFrom(Configuration defaultConfig) {
TextFormat defaults = (TextFormat) defaultConfig;
if (comment == null) {
comment = defaults.getComment();
}
if (lineSeparator == null) {
lineSeparator = defaults.getLineSeparator();
}
if (normalizedNewline == null) {
normalizedNewline = defaults.getNormalizedNewline();
}
}
}