org.cristalise.dsl.csv.CSVGroovyParser.groovy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cristalise-dsl Show documentation
Show all versions of cristalise-dsl Show documentation
CRISTAL-iSE Domain Specific Language module for bootstrapping and funtional testing
The newest version!
/**
* This file is part of the CRISTAL-iSE kernel.
* Copyright (c) 2001-2015 The CRISTAL Consortium. All rights reserved.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*
* http://www.fsf.org/licensing/licenses/lgpl.html
*/
package org.cristalise.dsl.csv
import java.time.LocalDate
import java.time.format.DateTimeParseException
import org.apache.commons.lang3.math.NumberUtils
import org.apache.commons.lang3.time.DateUtils
import com.opencsv.CSVParser
import com.opencsv.CSVParserBuilder
import com.opencsv.CSVReader
import com.opencsv.CSVReaderBuilder
import groovy.transform.CompileStatic;
import groovy.util.logging.Slf4j
@Slf4j @CompileStatic
class CSVGroovyParser implements TabularGroovyParser {
Map options = [:]
CSVReader reader = null
public CSVGroovyParser(Reader ioReader, Map opts) {
setDefaultOptions(opts)
reader = getCSVReader(ioReader)
}
@Override
public void setHeaderRowCount(int rowCount) {
options.headerRowCount = rowCount
}
@Override
public void setHeader(List> h) {
options.header = h
}
/**
*
* @param r
* @return
*/
private CSVReader getCSVReader(Reader ioReader) {
def parser = new CSVParserBuilder()
.withSeparator(options.separatorChar as char)
.withQuoteChar(options.quoteChar as char)
.withEscapeChar(options.escapeChar as char)
.withStrictQuotes(options.strictQuotes as boolean)
.build()
return new CSVReaderBuilder(ioReader)
.withCSVParser(parser)
.withSkipLines(options.skipRows as int)
.build()
}
/**
*
* @param opts
*/
private void setDefaultOptions(Map opts) {
options.headerRowCount = opts.headerRowCount != null ? opts.headerRowCount : 1
options.skipLeftCols = opts.skipLeftCols != null ? opts.skipLeftCols : 0
options.skipRightCols = opts.skipRightCols != null ? opts.skipRightCols : 0
options.trimHeader = opts.trimHeader != null ? opts.trimHeader : true
options.trimData = opts.trimData != null ? opts.trimData : false
options.strictQuotes = opts.strictQuotes != null ? opts.strictQuotes : CSVParser.DEFAULT_STRICT_QUOTES
options.skipRows = opts.skipRows != null ? opts.skipRows : CSVReader.DEFAULT_SKIP_LINES
options.useStringOnly = opts.useStringOnly != null ? opts.useStringOnly : false
options.separatorChar = opts.separatorChar ? opts.separatorChar : CSVParser.DEFAULT_SEPARATOR
options.quoteChar = opts.quoteChar ? opts.quoteChar : CSVParser.DEFAULT_QUOTE_CHARACTER
options.escapeChar = opts.escapeChar ? opts.escapeChar : CSVParser.DEFAULT_ESCAPE_CHARACTER
options.dateFormatter = opts.dateFormatter ? opts.dateFormatter : ''
options.header = opts.header ? opts.header : []
}
/**
*
* @param reader
* @param rowCount
* @param skipLeftCols
* @param skipRightCols
* @param trim
* @return
*/
@Override
public List> getHeader() {
int rowCount = options.headerRowCount as int
int skipLeftCols = options.skipLeftCols as int
int skipRightCols = options.skipRightCols as int
boolean trim = (boolean)options.trimData
assert rowCount, "row count for header must be grater than zero"
log.debug "getHeader() - rowCount: $rowCount, skipLeftCols: $skipLeftCols, skipRightCols: $skipRightCols"
List headerRows = []
Integer size = null
//read header lines into a list of arrays, check size of each rows
for (i in 0..rowCount-1) {
headerRows[i] = reader.readNext();
assert headerRows[i], "$i. row in header is null or zero size"
log.debug "getHeader() - headerRows[$i] size: ${headerRows[i].size()}"
//compare current size with the previous
if (size) {
assert headerRows[i].size() == size, "$i. header row size is not equal with the previous. All header rows must have the same size"
}
size = headerRows[i].size()
}
List currentNames = []
List> header = []
//construct the path of each header, make sure to skip columns if needed
for (int i in skipLeftCols..size-(1+skipRightCols)) {
List aList = []
for (j in 0..rowCount-1) {
def name = headerRows[j][i]
//if not null/empty take this string otherwise use the buffer of currentNames
if (name) { currentNames[j] = trim && name instanceof String ? name.trim() : name }
name = currentNames[j]
//if not null/empty append it to the list
if (name) { aList.add(name) }
}
log.info "getHeader() - header[${i-skipLeftCols}] = $aList"
header.add(aList)
}
return header
}
/**
* Cast the string value to a real type
* @param value in string format
* @return the value converted to its real type
*/
private Object getValue(String value) {
boolean trim = (boolean)options.trimData
if (!value) {
//value is null or empty string
return null
}
else if ((boolean)options.useStringOnly) {
return trim ? value.trim() : value
}
else if (value == 'null') {
return null
}
else if (NumberUtils.isCreatable(value)) {
if (value.isInteger()) {
return value.trim() as Integer
}
else if (value.isBigInteger()) {
return value.trim() as BigInteger
}
else if (value.isBigDecimal()) {
return value.trim() as BigDecimal
}
else {
return value.trim() as Number
}
}
else {
try {
if((String)options.dateFormatter) {
return LocalDate.parse(value.trim(), (String)options.dateFormatter)
}
else {
return LocalDate.parse(value.trim())
}
}
catch (DateTimeParseException e) {
return trim ? value.trim() : value
}
}
}
/**
* Recursively process the list of names to build the nested Maps and Lists
*
* @param map
* @param names List of String for one column e.g. 'contacts.address[0].purpose'
* @param value Is a String as returned by OpensCSV
* @return
*/
private void convertNamesToMaps(Map map, List names, String value) {
String name = names.head()
List namesTail = names.tail()
int index = -1
//Dealing with repeating section, so handle it as List of Maps
if(name.contains('[') && name.endsWith(']')) {
int i = name.indexOf('[')
index = name.substring(i+1,name.size()-1) as int
name = name.substring(0, i)
}
log.debug "$name index:$index names:$names value:'$value'"
if(namesTail) {
//Names are not fully converted to maps and lists yet
if(index == -1) {
if(!map[name]) { map[name] = [:] } //init Map
convertNamesToMaps((Map)map[name], namesTail, value)
}
else {
//Dealing with repeating section, so handle it as List of Maps
if(!map[name]) { map[name] = [] } //init List
if(!((List)map[name])[index]) { ((List)map[name])[index] = [:] } //init Map in the List
convertNamesToMaps((Map)((List)map[name])[index], namesTail, value)
}
}
else {
map[name] = getValue(value)
log.debug "map[$name] = ${map[name]}"
}
}
/**
*
* @param reader
* @param options
* @param cl
*/
@Override
public void eachRow(Closure cl) {
String[] nextLine;
int headerRowCount = options.headerRowCount as int
int skipLeftCols = options.skipLeftCols as int
int skipRightCols = options.skipRightCols as int
List> header = options.header as List
int rowIndex = 0
//CSV has no header
if (!headerRowCount && !header) {
log.warn "No header was specified so reverting to original openCsv behaviour"
while ((nextLine = reader.readNext()) != null) {
cl(nextLine, rowIndex++)
}
}
else {
if (!header) {
header = getHeader()
}
else {
log.debug "external header: $header"
}
assert header, "no header is availalle"
while ((nextLine = reader.readNext()) != null) {
assert header.size() == nextLine.size() - (skipLeftCols + skipRightCols), "Header size must be equal with the size of data line"
Map map = [:]
//header is a List of Lists
header.eachWithIndex { List names, int i ->
def value = nextLine[i+skipLeftCols]
if (value) convertNamesToMaps(map, names, value)
}
cl(map, rowIndex++)
}
}
}
/**
* Category method
*
* @param self
* @param options
* @return
*/
public static List> csvHeader(File self, Map options = [:]) {
return new CSVGroovyParser(new FileReader(self), options).getHeader()
}
/**
* Category method
*
* @param self
* @param options
* @return
*/
public static List> csvHeader(String self, Map options = [:]) {
return new CSVGroovyParser(new StringReader(self), options).getHeader()
}
/**
* Category method
*
* @param self
* @param options
* @param cl
*/
public static void csvEachRow(File self, Map options = [:], Closure cl) {
new CSVGroovyParser(new FileReader(self), options).eachRow(cl)
}
/**
* Category method
*
* @param self
* @param options
* @param cl
*/
public static void csvEachRow(String self, Map options = [:], Closure cl) {
new CSVGroovyParser(new StringReader(self), options).eachRow(cl)
}
/**
* @deprecated use {@link #csvEachRow(File, Closure)} to avoid name collision with
* ExcelGroovyParser when it is used as groovy Category or Extension.
* @param file
* @param cl
* @throws IOException
*/
@Deprecated
public static void parse(final File file, Closure cl) throws IOException {
csvEachRow(file, [headerRowCount: 1, trimData: true, useStringOnly: true], cl)
}
/**
* @deprecated use {@link #csvEachRow(String, Closure)} to avoid name collision with
* ExcelGroovyParser when it is used as groovy Category or Extension.
* @param string
* @param cl
* @throws IOException
*/
@Deprecated
public static void parse(final String string, Closure cl) throws IOException {
csvEachRow(string, [headerRowCount: 1, trimData: true, useStringOnly: true], cl)
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy