All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.metafacture.csv.CsvDecoder Maven / Gradle / Ivy

There is a newer version: 6.2.0
Show newest version
/*
 * Copyright 2013, 2014 Deutsche Nationalbibliothek
 *
 * Licensed under the Apache License, Version 2.0 the "License";
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.metafacture.csv;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultObjectPipe;

import com.opencsv.CSVReader;

/**
 * Decodes lines of CSV files. First line is interpreted as header.
 *
 * @author Markus Michael Geipel
 * @author Fabian Steeg (fsteeg)
 *
 */
@Description("Decodes lines of CSV files. First line is interpreted as header.")
@In(String.class)
@Out(StreamReceiver.class)
@FluxCommand("decode-csv")
public final class CsvDecoder extends DefaultObjectPipe  {

	private static final char DEFAULT_SEP = ',';
	private final char separator;

	private String[] header = new String[0];
	private int count;
	private boolean hasHeader;

	/**
	 * @param separator to split lines
	 */
	public CsvDecoder(final char separator) {
		super();
		this.separator = separator;
	}

	public CsvDecoder() {
		super();
		this.separator = DEFAULT_SEP;
	}

	@Override
	public void process(final String string) {
		assert !isClosed();
		final String[] parts = parseCsv(string);
		if(hasHeader){
			if(header.length==0){
				header = parts;
			}else if(parts.length==header.length){
				getReceiver().startRecord(String.valueOf(++count));
				for (int i = 0; i < parts.length; ++i) {
					getReceiver().literal(header[i], parts[i]);
				}
				getReceiver().endRecord();
			}else{
				throw new IllegalArgumentException(
						String.format(
								"wrong number of columns (expected %s, was %s) in input line: %s",
								header.length, parts.length, string));
			}
		}else{
			getReceiver().startRecord(String.valueOf(++count));
			for (int i = 0; i < parts.length; ++i) {
				getReceiver().literal(String.valueOf(i), parts[i]);
			}
			getReceiver().endRecord();
		}
	}

	private String[] parseCsv(final String string) {
		String[] parts = new String[0];
		try {
			final CSVReader reader = new CSVReader(new StringReader(string),
					separator);
			final List lines = reader.readAll();
			if (lines.size() > 0) {
				parts = lines.get(0);
			}
			reader.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return parts;
	}

	public void setHasHeader(final boolean hasHeader) {
		this.hasHeader = hasHeader;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy