All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.uniroma2.art.sheet2rdf.sheet.CSVSheetManager Maven / Gradle / Ivy

There is a newer version: 6.0.6
Show newest version
package it.uniroma2.art.sheet2rdf.sheet;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

public class CSVSheetManager implements SheetManager {
	
	private static final char[] popularAlternativeDelimiters = {'|', ';'};
	
	private List records;

	public CSVSheetManager(File file){
		try {
			Reader in = new FileReader(file);
			CSVParser parser = null;
			if (testFormat(file, CSVFormat.MYSQL)){
				parser = CSVFormat.MYSQL.parse(in);
			} else if (testFormat(file, CSVFormat.RFC4180)){
				parser = CSVFormat.RFC4180.parse(in);
			} else if (testFormat(file, CSVFormat.TDF)){
				parser = CSVFormat.TDF.parse(in);
			} else if (testFormat(file, CSVFormat.EXCEL)){
				parser = CSVFormat.EXCEL.parse(in);
			} else if (testFormat(file, CSVFormat.DEFAULT)){
				parser = CSVFormat.DEFAULT.parse(in);
			} else {
				for (char delim : popularAlternativeDelimiters){
					CSVFormat customFormat = CSVFormat.newFormat(delim).withEscape('\\').withQuote('"').withRecordSeparator('\n');
					if (testFormat(file, customFormat)){
						parser = customFormat.parse(in);
						break;
					}
				}
				if (parser == null) {
					parser = CSVFormat.DEFAULT.parse(in);
				}
			}
			records = parser.getRecords();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	@Override
	public List getHeaders(boolean includeDuplicate) {
		ArrayList headers = new ArrayList();
		Iterator it = records.iterator();
		if (it.hasNext()){
			CSVRecord record = it.next();
			for (String header : record){
				if (!headers.contains(header))//if the headers is not yet in the list, add it
					headers.add(header);
				else { //else add it only if includeDuplicate = true
					if (includeDuplicate)
						headers.add(header);
				}
			}
		}
		return headers;
	}

	@Override
	public ArrayList> getDataTable() {
		int rows = getDataSheetRowCount();
		int columns = getDataSheetColumnCount();
		ArrayList> table = new ArrayList>();
		for (int r = 1; r < rows; r++){
			ArrayList arrayListRow = new ArrayList();
			for (int c = 0; c < columns; c++){
				String data = getCellValueInDataSheet(r, c); 
				arrayListRow.add(data);
			}
			table.add(arrayListRow);
		}
		return table;
	}

	@Override
	public String getCellValueInDataSheet(int row, int column) {
		return records.get(row).get(column);
	}

	@Override
	public Map getPrefixNamespaceMapping() {
		return new HashMap();//for CSV is not defined a prefix mapping sheet
	}

	@Override
	public boolean isMultipleHeader(String headerValue) {
		List headers = getHeaders(true);
		//find the column of headerValue
		for (int i = 0; i < headers.size(); i++){
			String h = headers.get(i);
			if (h.equals(headerValue)){//once found, check if the following column has the same header
				if (i+1 != headers.size()){
					if (headers.get(i+1).equals(headerValue))
						return true;
				}
				break;
			}
		}
		return false;
	}

	@Override
	public boolean existsPrefixMappingSheet() {
		return false;
	}

	@Override
	public int getDataSheetColumnCount() {
		return records.get(0).size();
	}

	@Override
	public int getDataSheetRowCount() {
		return records.size();
	}

	@Override
	public int getPrefixSheetColumnCount() {
		return 0;
	}

	@Override
	public int getPrefixSheetRowCount() {
		return 0;
	}
	
	/**
	* make sure the reader has correct delimiter and quotation set.
	* Check first line and make sure that all the other rows have the same amount of columns and at least 2
	*
	* @param is input stream to be checked
	* @param strategy strategy to be verified.
	* @return
	* @throws IOException
	* @param is
	*/
	private static boolean testFormat(File file, CSVFormat format) throws IOException {
		boolean valid = true;
		Reader reader = new FileReader(file);
		
		int MIN_COLUMNS = 2;
		int MAX_ROWS = 50;
		
		CSVParser parser = format.parse(reader);
		List records = parser.getRecords();

//		System.out.println("Rows: " + records.size());
		if (records.size() == 0)
			valid = false;
		
		//get number of headers columns (min 2)
//		System.out.println("Columns 1st row: " + records.get(0).size());
		int headCols = records.get(0).size();
		if (headCols < MIN_COLUMNS)
			valid = false;
		
		int checkRows = MAX_ROWS;
		if (records.size() < checkRows)
			checkRows = records.size();
		//check if every row has the same columns of header (limit to first 50 rows if there are more than that)
		for (int i=1; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy