org.databene.document.csv.CSVLineIterator Maven / Gradle / Ivy
Go to download
'databene webdecs' is an open source software library for
WEB Data Extraction, Conversion and Scripting, written by Volker Bergmann.
The newest version!
/*
* (c) Copyright 2007-2011 by Volker Bergmann. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, is permitted under the terms of the
* GNU General Public License.
*
* For redistributing this software or a derivative work under a license other
* than the GPL-compatible Free Software License as defined by the Free
* Software Foundation or approved by OSI, you must first obtain a commercial
* license to this software product from Volker Bergmann.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
* REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
* HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.databene.document.csv;
import org.databene.commons.IOUtil;
import org.databene.commons.CollectionUtil;
import org.databene.commons.SystemInfo;
import org.databene.webdecs.DataContainer;
import org.databene.webdecs.DataIterator;
import java.io.*;
import java.util.List;
import java.util.ArrayList;
/**
* Gives access to content of a CSV file by String arrays
* that represent the CSV rows as specified in RFC 4180.
*
* @author Volker Bergmann
*/
public class CSVLineIterator implements DataIterator {
/** The default separator to use */
public static final char DEFAULT_SEPARATOR = ',';
private String stringRep;
private CSVTokenizer tokenizer;
private String[] nextLine;
private boolean ignoreEmptyLines;
private int lineCount;
private boolean eol;
// constructors ----------------------------------------------------------------------------------------------------
/**
* Creates a parser that reads from a uri
* @param uri the URL to read from
* @throws IOException if uri access failed
*/
public CSVLineIterator(String uri) throws IOException {
this(uri, DEFAULT_SEPARATOR);
}
/**
* Creates a parser that reads from a uri
* @param uri the URL to read from
* @param separator
* @throws IOException
*/
public CSVLineIterator(String uri, char separator) throws IOException {
this(uri, separator, false);
}
public CSVLineIterator(String uri, char separator, String encoding) throws IOException {
this(uri, separator, false, encoding);
}
public CSVLineIterator(String uri, char separator, boolean ignoreEmptyLines) throws IOException {
this(uri, separator, ignoreEmptyLines, SystemInfo.getFileEncoding());
}
public CSVLineIterator(String uri, char separator, boolean ignoreEmptyLines, String encoding) throws IOException {
this(IOUtil.getReaderForURI(uri, encoding), separator, ignoreEmptyLines);
this.stringRep = uri;
}
/**
* Creates a parser that reads from a reader and used a special separator character
* @param reader the reader to use
* @param separator the separator character
*/
public CSVLineIterator(Reader reader, char separator) throws IOException {
this(reader, separator, false);
}
public CSVLineIterator(Reader reader, char separator, boolean ignoreEmptyLines) throws IOException {
this.tokenizer = new CSVTokenizer(reader, separator);
this.ignoreEmptyLines = ignoreEmptyLines;
this.nextLine = parseNextLine();
this.lineCount = 0;
this.eol = false;
this.stringRep = reader.toString();
}
// interface -------------------------------------------------------------------------------------------------------
public Class getType() {
return String[].class;
}
/**
* Parses a CSV row into an array of Strings
* @return an array of Strings that represents a CSV row
*/
public synchronized DataContainer next(DataContainer wrapper) {
if (nextLine == null)
return null;
try {
String[] result = nextLine;
if (tokenizer != null) {
nextLine = parseNextLine();
lineCount++;
} else
nextLine = null;
return wrapper.setData(result);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* Closes the source
*/
public synchronized void close() {
if (tokenizer != null)
tokenizer.close();
tokenizer = null;
nextLine = null;
}
public synchronized int lineCount() {
return lineCount;
}
public static void process(String uri, char separator, String encoding, boolean ignoreEmptyLines, CSVLineHandler lineHandler) throws IOException {
CSVLineIterator iterator = null;
try {
iterator = new CSVLineIterator(uri, separator, ignoreEmptyLines, encoding);
DataContainer row = new DataContainer();
while ((row = iterator.next(row)) != null)
lineHandler.handle(row.getData());
} finally {
if (iterator != null)
iterator.close();
}
}
// private helpers -------------------------------------------------------------------------------------------------
private String[] parseNextLine() throws IOException {
if (tokenizer == null)
return null;
List list;
CSVTokenType tokenType;
do {
list = new ArrayList();
while ((tokenType = tokenizer.next()) == CSVTokenType.CELL) {
list.add(tokenizer.cell);
}
if (tokenType == CSVTokenType.EOF)
close();
} while (tokenType != CSVTokenType.EOF && (ignoreEmptyLines && list.size() == 0));
if (list.size() > 0) {
eol = (tokenType == CSVTokenType.EOL);
return CollectionUtil.toArray(list, String.class);
}
if (eol && !ignoreEmptyLines)
return new String[0];
else
return null;
}
@Override
public String toString() {
return getClass().getSimpleName() + "[" + stringRep + "]";
}
}