All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.fastnate.data.csv.AbstractCsvReader Maven / Gradle / Ivy

There is a newer version: 1.5.0
Show newest version
package org.fastnate.data.csv;

import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;

import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.Charsets;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.supercsv.comment.CommentMatches;
import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference;

import lombok.extern.slf4j.Slf4j;

/**
 * Base class for reading a csv file into an object. Useful for constructing arbitrary object types - not only entities.
 *
 * @author Andreas Penski
 * @param 
 *            the row type
 */
@Slf4j
public abstract class AbstractCsvReader {

	private static boolean isNotEmpty(final List values) {
		return values.size() > 1 || values.size() == 1 && StringUtils.isNotBlank(values.get(0));
	}

	private final List importFiles;

	/**
	 * Creates a new instance of {@link AbstractCsvReader}.
	 *
	 * @param importPath
	 *            the path to a file or to a directory that contains *.csv files
	 */
	public AbstractCsvReader(final File importPath) {
		this.importFiles = new ArrayList<>();
		if (!importPath.exists()) {
			throw new IllegalArgumentException("Path not found: " + importPath.getAbsolutePath());
		}
		if (importPath.isDirectory()) {
			this.importFiles.addAll(Arrays.asList(importPath.listFiles((FilenameFilter) new SuffixFileFilter(".csv"))));
		} else {
			this.importFiles.add(importPath);
		}
	}

	/**
	 * Builds one or more entities from the given row.
	 *
	 * @param row
	 *            contains the mapping from the header names to the current row data
	 * @return the list of entities from that row
	 */
	protected abstract Collection createEntities(final Map row);

	/**
	 * Defines the default encoding for CSV files, if it can't be determined from the BOM.
	 *
	 * @return the default encoding
	 */
	protected Charset getDefaultEncoding() {
		return Charsets.UTF_8;
	}

	/**
	 * Opens a CSV file.
	 *
	 * If the given file ends with "gz", then the file is decompressed before using a {@link GZIPInputStream}.
	 *
	 * @param importFile
	 *            the csv file
	 * @return a list reader
	 * @throws IOException
	 *             on io exception
	 */
	@SuppressWarnings("resource")
	protected CsvListReader openCsvListReader(final File importFile) throws IOException {
		// Open file
		InputStream fileStream = new FileInputStream(importFile);

		// Check for compressed file
		if (importFile.getName().toLowerCase().endsWith(".gz")) {
			fileStream = new GZIPInputStream(fileStream);
		}

		// Guess the encoding
		final BOMInputStream inputStream = new BOMInputStream(fileStream, false, ByteOrderMark.UTF_8,
				ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE);
		final String charset;
		if (inputStream.hasBOM()) {
			charset = inputStream.getBOMCharsetName();
			log.info("BOM detected. Using {} as encoding", charset);
		} else {
			charset = getDefaultEncoding().toString();
			log.info("No BOM detected. Assuming {} as encoding", charset);
		}
		final Reader reader = new InputStreamReader(inputStream, charset);
		return new CsvListReader(reader, new CsvPreference.Builder(CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE)
				.skipComments(new CommentMatches("(//|/\\*|#|;).*")).build());
	}

	/**
	 * Reads the import files.
	 *
	 * @return collection of constructed entities
	 * @throws IOException
	 *             on error
	 */
	protected Collection readImportFiles() throws IOException {
		final Collection entities = new ArrayList<>();
		for (final File importFile : this.importFiles) {
			log.info("Reading entities from {}...", importFile);

			try (final CsvListReader csvList = openCsvListReader(importFile)) {
				final String[] header = csvList.getHeader(true);
				if (ArrayUtils.isEmpty(header)) {
					log.error("Ignoring {}, as no header was found", importFile);
					continue;
				}
				final Map columnNames = new HashMap<>();
				for (int i = 0; i < header.length; i++) {
					if (header[i] != null) {
						columnNames.put(header[i], i);
					}
				}
				for (List values; (values = csvList.read()) != null;) {
					if (isNotEmpty(values)) {
						final Map row = new HashMap<>();
						for (final Map.Entry column : columnNames.entrySet()) {
							if (column.getValue() < values.size()) {
								row.put(column.getKey(), values.get(column.getValue()));
							}
						}
						entities.addAll(createEntities(row));
					}
				}
			}
		}
		return entities;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy