
org.metafacture.csv.CsvDecoder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of metafacture-csv Show documentation
Show all versions of metafacture-csv Show documentation
Modules for processing comma-separated values
/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.metafacture.csv;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultObjectPipe;
import com.opencsv.CSVReader;
/**
* Decodes lines of CSV files. First line is interpreted as header.
*
* @author Markus Michael Geipel
* @author Fabian Steeg (fsteeg)
*
*/
@Description("Decodes lines of CSV files. First line is interpreted as header.")
@In(String.class)
@Out(StreamReceiver.class)
@FluxCommand("decode-csv")
public final class CsvDecoder extends DefaultObjectPipe {
private static final char DEFAULT_SEP = ',';
private final char separator;
private String[] header = new String[0];
private int count;
private boolean hasHeader;
/**
* @param separator to split lines
*/
public CsvDecoder(final char separator) {
super();
this.separator = separator;
}
public CsvDecoder() {
super();
this.separator = DEFAULT_SEP;
}
@Override
public void process(final String string) {
assert !isClosed();
final String[] parts = parseCsv(string);
if(hasHeader){
if(header.length==0){
header = parts;
}else if(parts.length==header.length){
getReceiver().startRecord(String.valueOf(++count));
for (int i = 0; i < parts.length; ++i) {
getReceiver().literal(header[i], parts[i]);
}
getReceiver().endRecord();
}else{
throw new IllegalArgumentException(
String.format(
"wrong number of columns (expected %s, was %s) in input line: %s",
header.length, parts.length, string));
}
}else{
getReceiver().startRecord(String.valueOf(++count));
for (int i = 0; i < parts.length; ++i) {
getReceiver().literal(String.valueOf(i), parts[i]);
}
getReceiver().endRecord();
}
}
private String[] parseCsv(final String string) {
String[] parts = new String[0];
try {
final CSVReader reader = new CSVReader(new StringReader(string),
separator);
final List lines = reader.readAll();
if (lines.size() > 0) {
parts = lines.get(0);
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
return parts;
}
public void setHasHeader(final boolean hasHeader) {
this.hasHeader = hasHeader;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy