org.milyn.csv.CSVReader Maven / Gradle / Ivy
/*
Milyn - Copyright (C) 2006
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License (version 2.1) as published by the Free Software
Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU Lesser General Public License for more details:
http://www.gnu.org/licenses/lgpl.txt
*/
package org.milyn.csv;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.xml.XMLConstants;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.milyn.cdr.annotation.ConfigParam;
import org.milyn.cdr.SmooksConfigurationException;
import org.milyn.container.ExecutionContext;
import org.milyn.xml.SmooksXMLReader;
import org.milyn.delivery.VisitorAppender;
import org.milyn.delivery.VisitorConfigMap;
import org.milyn.delivery.ordering.Consumer;
import org.milyn.delivery.sax.SAXVisitAfter;
import org.milyn.delivery.sax.SAXElement;
import org.milyn.delivery.dom.DOMVisitAfter;
import org.milyn.javabean.Bean;
import org.milyn.javabean.repository.BeanRepositoryManager;
import org.milyn.javabean.repository.BeanRepository;
import org.milyn.SmooksException;
import org.milyn.expression.MVELExpressionEvaluator;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.DTDHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.helpers.AttributesImpl;
import org.w3c.dom.Element;
/**
* CSV Reader.
*
* This CSV Reader can be plugged into the Smooks (for example) in order to convert a
* CSV based message stream into a stream of SAX events to be consumed by the DOMBuilder.
*
* Configuration
* To maintain a single binding instance in memory:
*
* <?xml version="1.0"?>
* <smooks-resource-list xmlns="http://www.milyn.org/xsd/smooks-1.1.xsd" xmlns:csv="http://www.milyn.org/xsd/smooks/csv-1.2.xsd">
*
* <csv:reader fields="" separator="" quote="" skipLines="" rootElementName="" recordElementName="">
* <csv:singleBinding beanId="" class="" />
* </csv:reader>
*
* </smooks-resource-list>
*
*
* To maintain a {@link List} of binding instances in memory:
*
* <?xml version="1.0"?>
* <smooks-resource-list xmlns="http://www.milyn.org/xsd/smooks-1.1.xsd" xmlns:csv="http://www.milyn.org/xsd/smooks/csv-1.2.xsd">
*
* <csv:reader fields="" separator="" quote="" skipLines="" rootElementName="" recordElementName="">
* <csv:listBinding beanId="" class="" />
* </csv:reader>
*
* </smooks-resource-list>
*
*
* To maintain a {@link Map} of binding instances in memory:
*
* <?xml version="1.0"?>
* <smooks-resource-list xmlns="http://www.milyn.org/xsd/smooks-1.1.xsd" xmlns:csv="http://www.milyn.org/xsd/smooks/csv-1.2.xsd">
*
* <csv:reader fields="" separator="" quote="" skipLines="" rootElementName="" recordElementName="">
* <csv:mapBinding beanId="" class="" keyField="" />
* </csv:reader>
*
* </smooks-resource-list>
*
* Ignoring Fields
* To ignore a field in a CSV record set, just insert the string "$ignore$" for that field in the fields attribute.
*
* Simple Java Bindings
* A simple java binding can be configured on the reader configuration. This allows quick binding configuration where the
* CSV records map cleanly to the target bean. For more complex bindings, use the Java Binging Framework.
*
* Example Usage
* So the following configuration could be used to parse a CSV stream into
* a stream of SAX events:
*
* <csv:reader fields="name,address,$ignore$,item,quantity" />
*
* Within Smooks, the stream of SAX events generated by the "Acme-Order-List" message (and this parser) will generate
* an event stream equivalent to the following:
* <csv-set>
* <csv-record>
* <name>Tom Fennelly</name>
* <address>Ireland</address>
* <item>V1234</item>
* <quantity>3</quantity>
* <csv-record>
* <csv-record>
* <name>Joe Bloggs</name>
* <address>England</address>
* <item>D9123</item>
* <quantity>7</quantity>
* <csv-record>
* </csv-set>
*
* Other profile based transformations can then be used to transform the CSV records in accordance with the requirements
* of the consuming entities.
*
* @author tfennelly
*/
public class CSVReader implements SmooksXMLReader, VisitorAppender {
private static Log logger = LogFactory.getLog(CSVReader.class);
private static Attributes EMPTY_ATTRIBS = new AttributesImpl();
private static final String IGNORE_FIELD = "$ignore$";
private static char[] INDENT_LF = new char[] {'\n'};
private static char[] INDENT_1 = new char[] {'\t'};
private static char[] INDENT_2 = new char[] {'\t', '\t'};
private ContentHandler contentHandler;
private ExecutionContext execContext;
@ConfigParam(name = "fields")
private String[] csvFields;
@ConfigParam(defaultVal = ",")
private char separator;
@ConfigParam(name = "quote-char", defaultVal = "\"")
private char quoteChar;
@ConfigParam(name = "skip-line-count", defaultVal = "0")
private int skipLines;
@ConfigParam(defaultVal = "UTF-8")
private Charset encoding;
@ConfigParam(defaultVal="csv-set")
private String rootElementName;
@ConfigParam(defaultVal="csv-record")
private String recordElementName;
@ConfigParam(defaultVal="false")
private boolean indent;
@ConfigParam(use = ConfigParam.Use.OPTIONAL)
private String bindBeanId;
@ConfigParam(use = ConfigParam.Use.OPTIONAL)
private Class bindBeanClass;
@ConfigParam(use = ConfigParam.Use.OPTIONAL)
private CSVBindingType bindingType;
@ConfigParam(use = ConfigParam.Use.OPTIONAL)
private String bindMapKeyField;
private static final String RECORD_BEAN = "csvRecordBean";
public void addVisitors(VisitorConfigMap visitorMap) {
if(bindBeanId != null && bindBeanClass != null) {
Bean bean;
if(bindingType == CSVBindingType.LIST) {
Bean listBean = new Bean(ArrayList.class, bindBeanId, "$document");
bean = listBean.newBean(bindBeanClass, recordElementName);
listBean.bindTo(bean);
addFieldBindings(bean);
listBean.addVisitors(visitorMap);
} else if(bindingType == CSVBindingType.MAP) {
if(bindMapKeyField == null) {
throw new SmooksConfigurationException("CSV 'MAP' Binding must specify a 'keyField' property on the binding configuration.");
}
assertValidFieldName(bindMapKeyField);
Bean mapBean = new Bean(LinkedHashMap.class, bindBeanId, "$document");
Bean recordBean = new Bean(bindBeanClass, RECORD_BEAN, recordElementName);
MapBindingWiringVisitor wiringVisitor = new MapBindingWiringVisitor(bindMapKeyField, bindBeanId);
addFieldBindings(recordBean);
mapBean.addVisitors(visitorMap);
recordBean.addVisitors(visitorMap);
visitorMap.addVisitor(wiringVisitor, recordElementName, null, false);
} else {
bean = new Bean(bindBeanClass, bindBeanId, recordElementName);
addFieldBindings(bean);
bean.addVisitors(visitorMap);
}
}
}
private void addFieldBindings(Bean bean) {
for(int i = 0; i < csvFields.length; i++) {
String field = csvFields[i];
if(!field.equals(IGNORE_FIELD)) {
bean.bindTo(field, recordElementName + "/" + field);
}
}
}
/* (non-Javadoc)
* @see org.milyn.xml.SmooksXMLReader#setExecutionContext(org.milyn.container.ExecutionContext)
*/
public void setExecutionContext(ExecutionContext request) {
this.execContext = request;
}
/* (non-Javadoc)
* @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
*/
public void parse(InputSource csvInputSource) throws IOException, SAXException {
if(contentHandler == null) {
throw new IllegalStateException("'contentHandler' not set. Cannot parse CSV stream.");
}
if(execContext == null) {
throw new IllegalStateException("'execContext' not set. Cannot parse CSV stream.");
}
try {
Reader csvStreamReader;
au.com.bytecode.opencsv.CSVReader csvLineReader;
String[] csvRecord;
// Get a reader for the CSV source...
csvStreamReader = csvInputSource.getCharacterStream();
if(csvStreamReader == null) {
csvStreamReader = new InputStreamReader(csvInputSource.getByteStream(), encoding);
}
// Create the CSV line reader...
csvLineReader = new au.com.bytecode.opencsv.CSVReader(csvStreamReader, separator, quoteChar, skipLines);
// Start the document and add the root "csv-set" element...
contentHandler.startDocument();
contentHandler.startElement(XMLConstants.NULL_NS_URI, rootElementName, "", EMPTY_ATTRIBS);
// Output each of the CVS line entries...
int lineNumber = 0;
int expectedCount = getExpectedColumnsCount();
while ((csvRecord = csvLineReader.readNext()) != null) {
lineNumber++; // First line is line "1"
if(csvRecord.length < expectedCount) {
logger.warn("[CORRUPT-CSV] CSV line #" + lineNumber + " invalid [" + Arrays.asList(csvRecord) + "]. The line should contain number of items at least as in CSV config file " + csvFields.length + " fields [" + csvFields + "], but contains " + csvRecord.length + " fields. Ignoring!!");
continue;
}
if(indent) {
contentHandler.characters(INDENT_LF, 0, 1);
contentHandler.characters(INDENT_1, 0, 1);
}
contentHandler.startElement(XMLConstants.NULL_NS_URI, recordElementName, "", EMPTY_ATTRIBS);
int recordIt = 0;
for(int fieldIt = 0; fieldIt < csvFields.length; fieldIt++) {
String fieldName = csvFields[fieldIt];
if(fieldName.startsWith(IGNORE_FIELD)) {
int toSkip = parseIgnoreFieldDirective(fieldName);
if(toSkip == Integer.MAX_VALUE){
break;
}
recordIt += toSkip;
continue;
}
if(indent) {
contentHandler.characters(INDENT_LF, 0, 1);
contentHandler.characters(INDENT_2, 0, 2);
}
contentHandler.startElement(XMLConstants.NULL_NS_URI, fieldName, "", EMPTY_ATTRIBS);
contentHandler.characters(csvRecord[recordIt].toCharArray(), 0, csvRecord[recordIt].length());
contentHandler.endElement(XMLConstants.NULL_NS_URI, fieldName, "");
if(indent) {
}
recordIt++;
}
if(indent) {
contentHandler.characters(INDENT_LF, 0, 1);
contentHandler.characters(INDENT_1, 0, 1);
}
contentHandler.endElement(null, recordElementName, "");
}
if(indent) {
contentHandler.characters(INDENT_LF, 0, 1);
}
// Close out the "csv-set" root element and end the document..
contentHandler.endElement(XMLConstants.NULL_NS_URI, rootElementName, "");
contentHandler.endDocument();
} finally {
contentHandler = null;
execContext = null;
}
}
private int parseIgnoreFieldDirective(String field) {
String op = field.substring(IGNORE_FIELD.length());
int toSkip = 0;
if (op.length() == 0) {
toSkip = 1;
} else if ("+".equals(op)) {
toSkip = Integer.MAX_VALUE;
} else {
toSkip = Integer.parseInt(op);
}
return toSkip;
}
private int getExpectedColumnsCount() {
int count = 0;
for (int i = 0; i < csvFields.length; i++) {
String field = csvFields[i];
if (!field.startsWith(IGNORE_FIELD)) {
count++;
}
}
return count;
}
public void setContentHandler(ContentHandler contentHandler) {
this.contentHandler = contentHandler;
}
public ContentHandler getContentHandler() {
return contentHandler;
}
private void assertValidFieldName(String field) {
for(String csvField : csvFields) {
if(csvField.equals(field)) {
return;
}
}
throw new SmooksConfigurationException("Invalid field name '" + field + "'. Valid names: " + Arrays.asList(csvFields) + ".");
}
/****************************************************************************
*
* The following methods are currently unimplemnted...
*
****************************************************************************/
public void parse(String systemId) throws IOException, SAXException {
throw new UnsupportedOperationException("Operation not supports by this reader.");
}
public boolean getFeature(String name) throws SAXNotRecognizedException,
SAXNotSupportedException {
return false;
}
public void setFeature(String name, boolean value)
throws SAXNotRecognizedException, SAXNotSupportedException {
}
public DTDHandler getDTDHandler() {
return null;
}
public void setDTDHandler(DTDHandler arg0) {
}
public EntityResolver getEntityResolver() {
return null;
}
public void setEntityResolver(EntityResolver arg0) {
}
public ErrorHandler getErrorHandler() {
return null;
}
public void setErrorHandler(ErrorHandler arg0) {
}
public Object getProperty(String name) throws SAXNotRecognizedException,
SAXNotSupportedException {
return null;
}
public void setProperty(String name, Object value)
throws SAXNotRecognizedException, SAXNotSupportedException {
}
private class MapBindingWiringVisitor implements DOMVisitAfter, SAXVisitAfter, Consumer {
private MVELExpressionEvaluator keyExtractor = new MVELExpressionEvaluator();
private String mapBindingKey;
private MapBindingWiringVisitor(String bindKeyField, String mapBindingKey) {
keyExtractor.setExpression(RECORD_BEAN + "." + bindKeyField);
this.mapBindingKey = mapBindingKey;
}
public void visitAfter(Element element, ExecutionContext executionContext) throws SmooksException {
wireObject(executionContext);
}
public void visitAfter(SAXElement element, ExecutionContext executionContext) throws SmooksException, IOException {
wireObject(executionContext);
}
private void wireObject(ExecutionContext executionContext) {
BeanRepository repository = BeanRepositoryManager.getBeanRepository(executionContext);
Map beanMap = repository.getBeanMap();
Object key = keyExtractor.getValue(beanMap);
Map map = (Map) repository.getBean(mapBindingKey);
Object record = repository.getBean(RECORD_BEAN);
map.put(key, record);
}
public boolean consumes(Object object) {
if(keyExtractor.getExpression().indexOf(object.toString()) != -1) {
return true;
}
return false;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy