it.uniroma2.art.sheet2rdf.sheet.XLSSheetManager Maven / Gradle / Ivy
package it.uniroma2.art.sheet2rdf.sheet;
import it.uniroma2.art.coda.converters.commons.DateTimeUtils;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class XLSSheetManager implements SheetManager {
private static final String PREFIX_MAPPING_SHEET_NAME = "prefix_mapping";
private Workbook workbook;
private Sheet dataSheet, prefixMappingSheet;
public XLSSheetManager(File file){
try {
workbook = WorkbookFactory.create(file);
dataSheet = workbook.getSheetAt(0);
prefixMappingSheet = workbook.getSheet(PREFIX_MAPPING_SHEET_NAME);
} catch (InvalidFormatException | IOException e) {
e.printStackTrace();
}
}
@Override
public List getHeaders(boolean includeDuplicate){
ArrayList headers = new ArrayList<>();
Row headerRow = dataSheet.getRow(0);
int nCol = getDataSheetColumnCount();
for (int i = 0; i < nCol; i++){
Cell headerCell = headerRow.getCell(i);
String header = headerCell.getStringCellValue().trim();
if (!headers.contains(header))//if the headers is not yet in the list, add it
headers.add(header);
else { //else add it only if includeDuplicate = true
if (includeDuplicate)
headers.add(header);
}
}
return headers;
}
@Override
public ArrayList> getDataTable(){
int rows = getDataSheetRowCount();
int columns = getDataSheetColumnCount();
ArrayList> table = new ArrayList<>();
for (int r = 1; r < rows; r++){
ArrayList arrayListRow = new ArrayList<>();
for (int c = 0; c < columns; c++){
String data = getCellValueInDataSheet(r, c);
arrayListRow.add(data);
}
table.add(arrayListRow);
}
return table;
}
@Override
public String getCellValueInDataSheet(int row, int column){
String value = "";
Row r = dataSheet.getRow(row);
if (r != null){
Cell cell = r.getCell(column);
if (cell != null){
if (cell.getCellType() == cell.CELL_TYPE_NUMERIC) {
if (DateUtil.isCellDateFormatted(cell)) { //data
SimpleDateFormat sdf = new SimpleDateFormat(DateTimeUtils.DATE_PATTERN_ISO_8601);
value = sdf.format(cell.getDateCellValue());
} else { //number
value = cell.getNumericCellValue() + "";
}
} else if (cell.getCellType() == cell.CELL_TYPE_STRING) { //String
value = cell.getStringCellValue();
}
}
}
return value;
}
@Override
public Map getPrefixNamespaceMapping(){
Map map = new HashMap<>();
if (prefixMappingSheet != null){
int nRow = getPrefixSheetRowCount();
for (int i = 0; i < nRow; i++){
Row row = prefixMappingSheet.getRow(i);
Cell prefCell = row.getCell(0);
Cell nsCell = row.getCell(1);
if (prefCell != null && nsCell != null){
String pref = prefCell.getStringCellValue();
String ns = nsCell.getStringCellValue();
map.put(pref, ns);
}
}
}
return map;
}
@Override
public boolean isMultipleHeader(String headerValue) {
Row headerRow = dataSheet.getRow(0);
int nCol = getDataSheetColumnCount();
int idxHeaderValue = 0;
//find the column index of the headerValue
for (int i = 0; i < nCol; i++){
Cell headerCell = headerRow.getCell(i);
String h = headerCell.getStringCellValue().trim();
if (h.equals(headerValue)){
idxHeaderValue = i;
break;
}
}
//check if following column has the same headerValue
Cell followingHeader = headerRow.getCell(idxHeaderValue+1);
if (followingHeader != null) {//check necessary cause the header could be the last one
return followingHeader.getStringCellValue().trim().equals(headerValue);
}
else
return false;
}
@Override
public boolean existsPrefixMappingSheet(){
return (workbook.getSheet("prefix_mapping") != null);
}
@Override
public int getDataSheetColumnCount(){
int col;
int lastCellNum = dataSheet.getRow(0).getLastCellNum();
//if a cell has been removed simply selecting and then deleting it,
//getLastCellNum still considers the empty cell and alters the count
//so, starting from the last cell, check backward the last not-empty cell
for (col=lastCellNum-1; col>0; col--){
Cell lastCell = dataSheet.getRow(0).getCell(col);
if (lastCell.getCellType() == lastCell.CELL_TYPE_STRING && !lastCell.getStringCellValue().trim().equals("")) {
break;
}
}
return col+1; //+1 cause the index of getCell is 0-based
}
@Override
public int getDataSheetRowCount(){
int rowCount;
int tempRow = dataSheet.getPhysicalNumberOfRows();
//if every cell of a row has been removed simply selecting and then deleting it,
//getPhysicalNumberOfRows still considers the empty cell and alters the count
//so, starting from the last row, check backward the last not-empty row
int col = getDataSheetColumnCount();
emptyRowSearch: for (rowCount=tempRow-1; rowCount>=0; rowCount--){
Row row = dataSheet.getRow(rowCount);
if (row == null) continue; //prevents NullPointerException for wrong getPhysicalNumberOfRows result
//check if the row is empty
for (int i=0; i =0; rowCount--){
Row row = prefixMappingSheet.getRow(rowCount);
//check if the row is empty
for (int i=0; i