com.univocity.api.entity.text.TextFormat Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of univocity-api Show documentation
uniVocity Data Integration's Public API
There is a newer version: 1.0.6
/*******************************************************************************
 * Copyright (c) 2014 uniVocity Software Pty Ltd. All rights reserved.
 * This file is subject to the terms and conditions defined in file
 * 'LICENSE.txt', which is part of this source code package.
 ******************************************************************************/
package com.univocity.api.entity.text;

import java.util.*;

import com.univocity.api.common.*;
import com.univocity.api.entity.*;

/**
 * This is the parent class for all configuration classes that define a file format in uniVocity.
 *
 * By default, all plain-text based entities in uniVocity require the following format definitions:
 *
 * 

 *  lineSeparator: the 1-2 character sequence that indicates the end of a line. Newline sequences are different across operating systems. Typically:
 *		
 *			Windows uses carriage return and line feed: \r\n
 *			Linux/Unix uses line feed only: \n
 *			MacOS uses carriage return only: \r
 *		
 *   	{@link #lineSeparator} defaults to the system line separator.
 *  
 *  normalizedNewLine: a single character used to represent the end of a line uniformly in any parsed content. It has the following implications:
 *  	
 *			When reading a text-based input, the sequence of characters defined in {@link #lineSeparator} will be replaced by this character.
 *			When writing to a text-based output, this character will be replaced by the sequence of characters defined in {@link #lineSeparator}.
 *		
 *  	{@link #normalizedNewline} defaults to '\n'.
 *  
 *  comment: a character that, if found in the beginning of a line of text, represents comment in any text-based input supported by uniVocity.
 *  	{@link #comment} defaults to '#'.
 * 
 *
 * @see com.univocity.api.entity.text.csv.CsvFormat
 * @see com.univocity.api.entity.text.fixed.FixedWidthFormat
 * @see com.univocity.api.entity.text.TextEntityConfiguration
 *
 * @author uniVocity Software Pty Ltd - [email protected]
 *
 */
public class TextFormat extends Configuration {

	private static final char[] systemLineSeparator;

	static {
		String systemLineSeparatorString = System.getProperty("line.separator");
		if (systemLineSeparatorString == null) {
			systemLineSeparatorString = "\n";
		}
		systemLineSeparator = systemLineSeparatorString.toCharArray();
	}

	private char[] lineSeparator;
	private Character normalizedNewline;
	private Character comment;

	protected TextFormat() {

	}

	/**
	 * Returns the current line separator character sequence, which can contain 1 to 2 characters. Defaults to the system's line separator sequence (usually '\r\n' in Windows, '\r' in MacOS, and '\n' in Linux/Unix).
	 * @return the sequence of 1 to 2 characters that identifies the end of a line
	 */
	public final char[] getLineSeparator() {
		if (lineSeparator == null) {
			return systemLineSeparator.clone();
		}
		return lineSeparator.clone();
	}

	/**
	 * Returns the current line separator sequence as a String of 1 to 2 characters. Defaults to the system's line separator sequence (usually "\r\n" in Windows, "\r" in MacOS, and "\n" in Linux/Unix).
	 * @return the sequence of 1 to 2 characters that identifies the end of a line
	 */
	public final String getLineSeparatorString() {
		return new String(getLineSeparator());
	}

	/**
	 * Identifies whether a given character sequence matches the {@link #lineSeparator} sequence.
	 * @param string the character sequence to be matched
	 * @return {@code true} if the given character sequence matches the {@link #lineSeparator}, otherwise {@code false}
	 */
	public final boolean isLineSeparator(String string) {
		return getLineSeparatorString().equals(string);
	}

	/**
	 * Identifies whether a given character sequence matches the {@link #lineSeparator} sequence.
	 * @param chars the character sequence to be matched
	 * @return {@code true} if the given character sequence matches the {@link #lineSeparator}, otherwise {@code false}
	 */
	public final boolean isLineSeparator(char[] chars) {
		if (chars == null) {
			return false;
		}
		return Arrays.equals(getLineSeparator(), chars);
	}

	/**
	 * Defines the line separator sequence that should be used for parsing and writing.
	 * @param lineSeparator a sequence of 1 to 2 characters that identifies the end of a line
	 */
	public final void setLineSeparator(String lineSeparator) {
		Args.notEmpty(this.lineSeparator, "Line separator");
		setLineSeparator(lineSeparator.toCharArray());
	}

	/**
	 * Defines the line separator sequence that should be used for parsing and writing.
	 * @param lineSeparator a sequence of 1 to 2 characters that identifies the end of a line
	 */
	public final void setLineSeparator(char[] lineSeparator) {
		Args.notEmpty(this.lineSeparator, "Line separator");
		if (lineSeparator.length > 2) {
			throw new IllegalArgumentException("Invalid line separator. Up to 2 characters are expected. Got " + lineSeparator.length + " characters.");
		}
		this.lineSeparator = lineSeparator;
	}

	/**
	 * Returns the normalized newline character, which is automatically replaced by {@link #lineSeparator} when reading/writing. Defaults to '\n'.
	 * @return the normalized newline character
	 */
	public final char getNormalizedNewline() {
		if (normalizedNewline == null) {
			return '\n';
		}
		return normalizedNewline;
	}

	/**
	 * Sets the normalized newline character, which is automatically replaced by {@link #lineSeparator} when reading/writing
	 * @param normalizedNewline a single character used to represent a line separator.
	 */
	public final void setNormalizedNewline(char normalizedNewline) {
		this.normalizedNewline = normalizedNewline;
	}

	/**
	 * Compares the given character against the {@link #normalizedNewline} character.
	 * @param  ch the character to be verified
	 * @return {@code true} if the given character is the normalized newline character, otherwise {@code false}
	 */
	public final boolean isNormalizedNewLine(char ch) {
		return this.getNormalizedNewline() == ch;
	}

	/**
	 * Returns the character that represents a line comment. Defaults to '#'.
	 *  Set it to '\0' to disable comment skipping.
	 * @return the comment character
	 */
	public final char getComment() {
		if (comment == null) {
			return '#';
		}
		return comment;
	}

	/**
	 * Defines the character that represents a line comment when found in the beginning of a line of text. Defaults to '#'
	 *  Use '\0' to disable comment skipping.
	 * @param comment the comment character
	 */
	public void setComment(char comment) {
		this.comment = comment;
	}

	/**
	 * Identifies whether a given character represents a comment.
	 * @param ch the character to be verified
	 * @return {@code true} if the given character is the comment character, otherwise {@code false}
	 */
	public boolean isComment(char ch) {
		return this.getComment() == ch;
	}

	/**
	 * {@inheritDoc}
	 */
	@Override
	protected void copyDefaultsFrom(Configuration defaultConfig) {
		TextFormat defaults = (TextFormat) defaultConfig;
		if (comment == null) {
			comment = defaults.getComment();
		}

		if (lineSeparator == null) {
			lineSeparator = defaults.getLineSeparator();
		}

		if (normalizedNewline == null) {
			normalizedNewline = defaults.getNormalizedNewline();
		}
	}

}