org.nerd4j.csv.reader.CSVReaderFactoryImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nerd4j-csv Show documentation
Show all versions of nerd4j-csv Show documentation
CSV manipulation library.
/*
* #%L
* Nerd4j CSV
* %%
* Copyright (C) 2013 Nerd4j
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Lesser Public License for more details.
*
* You should have received a copy of the GNU General Lesser Public
* License along with this program. If not, see
* .
* #L%
*/
package org.nerd4j.csv.reader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import org.nerd4j.csv.exception.CSVConfigurationException;
import org.nerd4j.csv.exception.CSVToModelBindingException;
import org.nerd4j.csv.exception.MalformedCSVException;
import org.nerd4j.csv.field.CSVField;
import org.nerd4j.csv.field.CSVFieldMetadata;
import org.nerd4j.csv.parser.CSVParser;
import org.nerd4j.csv.reader.binding.CSVToModelBinder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Reference implementation for the {@link CSVReaderFactory} interface.
*
* @param type of the data model returned by the reader.
*
* @author Nerd4j Team
*/
public final class CSVReaderFactoryImpl implements CSVReaderFactory
{
/** Internal logging system. */
private static final Logger logger = LoggerFactory.getLogger( CSVReaderFactoryImpl.class );
/** The {@code Factory} for the reader meta-data model. */
private final CSVReaderMetadataFactory metadataFactory;
// /** The configuration to use for creating and configuring the {@link CSVReader}s. */
// private final CSVReaderMetadata configuration;
// /** Factory used to build CSV source parsers. */
// private CSVParserFactory parserFactory;
//
// /** Factory used to create builders which know how to populate the returned data model. */
// private CSVToModelBinderFactory binderFactory;
/**
* Constructor with parameters.
*
* @param metadataFactory the {@code Factory} of meta-data that describes how to build the CSV readers.
*/
public CSVReaderFactoryImpl( final CSVReaderMetadataFactory metadataFactory )
{
super();
if( metadataFactory == null )
throw new CSVConfigurationException( "The CSV reader configuration is mandatory" );
this.metadataFactory = metadataFactory;
// this.binderFactory = configuration.getModelBinderFactory();
// this.parserFactory = new CSVParserFactory( configuration.getParserConfiguration() );
}
/* ******************* */
/* INTERFACE METHODS */
/* ******************* */
/**
* {@inheritDoc}
*/
@Override
public CSVReader getCSVReader( File file )
throws FileNotFoundException, IOException, CSVToModelBindingException
{
return getCSVReader( new FileReader(file) );
}
/**
* {@inheritDoc}
*/
@Override
public CSVReader getCSVReader( InputStream is )
throws IOException, CSVToModelBindingException
{
return getCSVReader( new InputStreamReader(is) );
}
/**
* {@inheritDoc}
*/
@Override
public CSVReader getCSVReader( Reader reader )
throws IOException, CSVToModelBindingException
{
/*
* First of all we create a new meta-data model.
* If the framework was not well configured the
* system can break ad this point.
*/
final CSVReaderMetadata metadata = metadataFactory.getCSVReaderMetadata();
/* We create the parser able to parse the given CSV source. */
final CSVParser csvParser = metadata.getParserFactory().create( reader );
/* As second step we read the CSV source header if requested. */
final String[] csvHeader = readHeaderIfNeeded( csvParser, metadata );
/* Than we create the column mapping needed to elaborate the CSV source fields. */
final Integer[] columnMapping = buildColumnMapping( csvHeader, metadata );
/* We create the fields needed to elaborate the CSV source columns. */
final CSVField[] fields = buildFields( columnMapping, metadata );
/* We create the model binder needed to bind the source data to the output model. */
final CSVToModelBinder modelBinder = metadata.getModelBinderFactory().getCSVToModelBinder( metadata, columnMapping );
/* Finally we create the CSV reader. */
final CSVReader csvReader =
new CSVReaderImpl( csvParser, csvHeader, fields, modelBinder, metadata.isAcceptIncompleteRecords() );
return csvReader;
}
/* ***************** */
/* PRIVATE METHODS */
/* ***************** */
/**
* If the flag "readHeader" in the configuration is true
* reads the first row and returns an array of {@link String}s
* representing the CSV source header.
*
* @param csvParser the CSV source parser.
* @param metadata the meta-data model to read configuration from.
* @return the CSV source header or null
.
* @throws IOException if the CSV parser fails to read the source.
*/
private String[] readHeaderIfNeeded( CSVParser csvParser, CSVReaderMetadata metadata )
throws IOException
{
if( metadata.isReadHeader() )
{
final CSVHeaderReader csvHeaderReader = new CSVHeaderReader( csvParser );
final String[] header = csvHeaderReader.readHeader();
if( header == null || header.length < 1 )
{
logger.error( "Inconsistent CSV format, header expected but not present." );
throw new MalformedCSVException( "The CSV source was expected to have an header but was empty." );
}
return header;
}
return null;
}
/**
* Builds the fields needed to elaborate the CSV source fields.
*
* @param columnMapping the mapping between the CSV source and the configuration.
* @param metadata the meta-data model to read configuration from.
* @return the fields needed to elaborate the CSV source fields.
*/
@SuppressWarnings("unchecked")
private CSVField[] buildFields( Integer[] columnMapping, CSVReaderMetadata metadata )
{
logger.debug( "Going to get field processors from the configuration." );
if( columnMapping.length < 1 )
/* The array of fields has always at least one element. */
return new CSVField[1];
final CSVField[] fields = new CSVField[ columnMapping.length ];
final CSVFieldMetadata[] fieldConfs = metadata.getFieldConfigurations();
for( int i = 0; i < fields.length; ++i )
if( columnMapping[i] != null )
fields[i] = fieldConfs[columnMapping[i]].getField();
return fields;
}
/**
* Returns an array that associates each column of the source
* CSV with the corresponding index in the configuration.
*
*
* This process is performed by reading the CSV source header.
* If the header is null
all the information
* will be taken from the configuration.
*
*
*
* If the configuration is smaller than the source CSV some
* entries in the array may be null
.
*
*
*
* If the configuration is greater than the source CSV the
* inconsistent configurations will be ignored.
*
*
* @param header the CSV source header if any.
* @param metadata the meta-data model to read configuration from.
* @return the column index re-mapping.
*/
private Integer[] buildColumnMapping( String[] header, CSVReaderMetadata metadata )
{
logger.debug( "Going to get the column mapping by parsing the configuration." );
final CSVFieldMetadata[] fieldConfs = metadata.getFieldConfigurations();
if( fieldConfs == null || fieldConfs.length < 1 )
return new Integer[0];
/* We check the consistency of the configuration and the source CSV. */
final boolean useColumnNames = metadata.isUseColumnNames();
if( useColumnNames && header == null )
{
if( metadata.isReadHeader() )
logger.error( "The flag 'useColumnNames' is true but there is no header to match. Check the CSV source to be not empty" );
else
logger.error( "The configuration 'useColumnNames=true' and 'readHeader=false' are inconsistent. It is not possible to use column names without any matching header." );
throw new CSVConfigurationException( "Unable to use column names without any matching header" );
}
if( metadata.isUseColumnNames() )
{
/*
* If the flag 'readHeader' in the configuration is true
* we expect the CSV source to have an header and we need
* to match the information in the configuration with the
* one in the header.
*/
return buildColumnMappingUsingColumnNames( fieldConfs, header );
}
else
{
/*
* If the flag 'readHeader' in the configuration is false
* we the only available information is the one in the
* configuration so we use it to build the processors.
*/
return buildColumnMappingUsingIndexes( fieldConfs );
}
}
/**
* Builds the column mapping needed to elaborate the CSV source
* using the column indexes to identify the fields.
*
* @param fieldConfs array of field configurations.
* @return the column mapping needed to elaborate the CSV source fields.
*/
private Integer[] buildColumnMappingUsingIndexes( final CSVFieldMetadata[] fieldConfs )
{
int[] indexes = new int[fieldConfs.length];
for( int i = 0; i < fieldConfs.length; ++i )
{
try{
indexes[i] = Integer.parseInt( fieldConfs[i].getColumnName() );
}catch( Exception ex )
{
logger.error( "Unable to get column index", ex );
throw new CSVConfigurationException( fieldConfs[i].getColumnName() + " is not a valid column index", ex );
}
}
int maxIndex = -1;
for( int i = 0; i < indexes.length; ++i )
if( indexes[i] > maxIndex )
maxIndex = indexes[i];
final Integer[] columnMapping = new Integer[maxIndex + 1];
int columnIndex;
for( int i = 0; i < fieldConfs.length; ++i )
{
columnIndex = indexes[i];
if( columnMapping[columnIndex] != null )
throw new CSVConfigurationException( "Multiple configuration for the same column " + columnIndex );
columnMapping[columnIndex] = i;
}
return columnMapping;
}
/**
* Builds the column mapping needed to elaborate the CSV source
* using the column names to identify the fields.
*
* @param fieldConfs array of field configurations.
* @param header the CSV source header.
* @return the column mapping needed to elaborate the CSV source fields.
*/
private Integer[] buildColumnMappingUsingColumnNames( final CSVFieldMetadata[] fieldConfs, final String[] header )
{
/* We create a map that allows to find the processor by column name. */
String columnName = null;
final Map namedColumnMap = new HashMap();
for( int i = 0; i < fieldConfs.length; ++i )
{
columnName = fieldConfs[i].getColumnName();
if( namedColumnMap.containsKey(columnName) )
throw new CSVConfigurationException( "Multiple configuration for the same column " + columnName );
namedColumnMap.put( columnName, i );
}
final Integer[] columnMapping = new Integer[header.length];
/* We fill the processors by name. */
for( int i = 0; i < columnMapping.length; ++i )
columnMapping[i] = namedColumnMap.get( header[i] );
return columnMapping;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy