All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nerd4j.csv.parser.CSVParserFactory Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * #%L
 * Nerd4j CSV
 * %%
 * Copyright (C) 2013 Nerd4j
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as 
 * published by the Free Software Foundation, either version 3 of the 
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 * 
 * You should have received a copy of the GNU General Lesser Public 
 * License along with this program.  If not, see
 * .
 * #L%
 */
package org.nerd4j.csv.parser;

import java.io.Reader;

import org.nerd4j.csv.RemarkableASCII;
import org.nerd4j.csv.exception.CSVConfigurationException;



/**
 * Factory of {@link CSVParser}.
 * 
 * 

* Handles correct parsers configuration and creation. *

* *

* Many useful notable characters are provided as public constants. *

* * @author Nerd4j Team */ public final class CSVParserFactory { /** * Defines for each character in the ASCII space * the related {@link CharacterClass} used while * paring. */ private final int[] asciiCharClasses; /** * Accept more lazy quotes: in unquoted fields are handled as * {@link CharacterClass#NORMAL} without beeing escaped */ private final boolean lazyQuotes; /* ******************** */ /* *** CONSTRUCTORS *** */ /* ******************** */ /** * Default constructor. * */ public CSVParserFactory() { this( new CSVParserMetadata() ); } /** * Constructor with parameters. * * @param configuration the configuration to use to build the parsers. */ public CSVParserFactory( CSVParserMetadata configuration ) { this.lazyQuotes = configuration.isLazyQuotes(); this.asciiCharClasses = new int[ RemarkableASCII.ASCII_TABLE_SIZE ]; for( char toIgnore : configuration.getCharsToIgnore() ) addCharClass( toIgnore, CharacterClass.TO_IGNORE, "TO IGNORE", false ); for( char toIgnoreAround : configuration.getCharsToIgnoreAroundFields() ) addCharClass( toIgnoreAround, CharacterClass.TO_IGNORE_AROUND_FIELDS, "TO IGNORE AROUND FIELDS", false ); addCharClass( configuration.getQuoteChar(), CharacterClass.QUOTE, "QUOTE", true ); addCharClass( configuration.getEscapeChar(), CharacterClass.ESCAPE, "ESCAPE", false ); addCharClass( configuration.getFieldSeparator(), CharacterClass.FIELD_SEPARATOR, "FIELD SEPARATOR", true ); if( configuration.getRecordSeparator2() != null ) { addCharClass( configuration.getRecordSeparator1(), CharacterClass.RECORD_SEPARATOR_1, "RECORD SEPARATOR 1", true ); addCharClass( configuration.getRecordSeparator2(), CharacterClass.RECORD_SEPARATOR_2, "RECORD SEPARATOR 2", true ); } else { addCharClass( configuration.getRecordSeparator1(), CharacterClass.RECORD_SEPARATOR, "RECORD SEPARATOR", true ); } } /* ************************ */ /* *** PUBLIC INTERFACE *** */ /* ************************ */ /** * Create a new {@link CSVParser} to read data from given {@link Reader}. * * @param reader csv data source. */ public CSVParser create( final Reader reader ) { /* * We don't need a buffered reader, the parser * already handles his reading buffer. */ return new CSVParserImpl( reader, asciiCharClasses, lazyQuotes ); } /* ***************** */ /* PRIVATE METHODS */ /* ***************** */ /** * Checks the given character and throws an exception if the value is inconsistent. * * @param character character to check. * @param charClass character class to add. * @param className character class name. * @param mandatory tells if suck character is mandatory. */ private void addCharClass( final Character character, final int charClass, final String className, final boolean mandatory ) { if( character == null ) { if( mandatory) throw new CSVConfigurationException( "The " + className + " character is mandatory and cant be null, check the configuration" ); else return; } final char charValue = character.charValue(); /* 0: ASCII null character to */ if( charValue < 0 || charValue >= RemarkableASCII.ASCII_TABLE_SIZE ) throw new CSVConfigurationException( "Invalid " + className + " character, it must belong to ASCII space" ); asciiCharClasses[charValue] = charClass; } /* *************** */ /* INNER CLASSES */ /* *************** */ /** * Enumerates all character classes accepted by the parser (state machine). * Previously was an enum but we discovered that switching on constants * is actually faster. */ static interface CharacterClass { /** Simple normal character without special meanings. */ public static final int NORMAL = 0; /** Quoting character, used to enclose field into quotes if needed. */ public static final int QUOTE = 2; /** Escape character, working like Java escape character '\'. */ public static final int ESCAPE = 1; /** Character used to separate fields. */ public static final int FIELD_SEPARATOR = 3; /** Character used to separate records. */ public static final int RECORD_SEPARATOR = 4; /** First of two characters used to separate records. */ public static final int RECORD_SEPARATOR_1 = 5; /** Second of two characters used to separate records. */ public static final int RECORD_SEPARATOR_2 = 6; /** Character to be ignored during parsing. */ public static final int TO_IGNORE = 7; /** Character to be ignored only if on heading or trailing of a field. */ public static final int TO_IGNORE_AROUND_FIELDS = 8; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy