
com.databasesandlife.util.CsvParser Maven / Gradle / Ivy
package com.databasesandlife.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import com.google.gdata.util.io.base.UnicodeReader;
/**
* Parses CSV files.
*
* The CSV file is assumed to have a first line containing the column headings.
* Does not handle quotes in fields (e.g. as generated by Excel).
* Field names are case-sensitive.
* Files have a default character set (by default UTF-8) which can be changed by calling {@link #setDefaultCharset},
* however if the file has a Unicode BOM then this is accepted in preference to the default charset.
*
*
Usage
* Create an object and set attributes such as the field-separator, list of acceptable columns, etc.
* Then either call parseAndCallHandler or parseToListOfMaps.
*
*
* CsvLineHandler myHandler = new CsvLineHandler() {
* void processCsvLine(Map<String,String> line) { .. }
* };
* CsvParser csvParser = new CsvParser();
* csvParser.setDesiredFields("abc","def"); // field set in file must be this set
* csvParser.setNonEmptyFields("abc"); // all of these fields must have non-empty values
* csvParser.parseAndCallHandler(myHandler, aFile);
* csvParser.parseAndCallHandler(myHandler, aReader);
* csvParser.parseAndCallHandler(myHandler, aClass); // reads "aClass.csv" from classloader
* List<Map<String,String>> contents = csvParser.parseToListOfMaps(aFile);
* Glossary
*
* - Field - name of column
*
- Column index - e.g. 0 is the left-most column
*
- Line - a row of data or header
*
*
* @author This source is copyright Adrian Smith and licensed under the LGPL 3.
* @see Project on GitHub
*/
@SuppressWarnings("serial")
public class CsvParser {
public interface CsvLineHandler {
/** @param line this object can be re-used between calls to reduce GC; extract values from it but do not store the object anywhere */
void processCsvLine(Map line) throws MalformedCsvException;
}
public static class MalformedCsvException extends Exception { // checked ex. because it's always possible CSV invalid, must handle it
public MalformedCsvException(String msg) { super(msg); }
public MalformedCsvException(Throwable e) { super(e); }
}
protected class ArrayOfMapsLineHandler implements CsvLineHandler {
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy