
prerna.poi.main.helper.excel.ExcelWorkbookFilePreProcessor Maven / Gradle / Ivy
The newest version!
package prerna.poi.main.helper.excel;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import prerna.util.Constants;
import prerna.util.Utility;
public class ExcelWorkbookFilePreProcessor {
private static final Logger classLogger = LogManager.getLogger(ExcelWorkbookFilePreProcessor.class);
private Workbook workbook = null;
private FileInputStream sourceFile = null;
private String fileLocation = null;
private String password = null;
private Map sheetProcessor = null;
@Deprecated
public void parse(String fileLocation) {
parse(fileLocation, null);
}
public void parse(String fileLocation, String password) {
this.fileLocation = fileLocation;
this.password = password;
createParser();
}
/**
* Opens the workbook
*/
private void createParser() {
try {
sourceFile = new FileInputStream(Utility.normalizePath(fileLocation));
try {
workbook = WorkbookFactory.create(sourceFile, this.password);
} catch (EncryptedDocumentException e) {
classLogger.error(Constants.STACKTRACE, e);
}
sheetProcessor = new HashMap();
} catch (FileNotFoundException e) {
classLogger.error(Constants.STACKTRACE, e);
} catch (IOException e) {
classLogger.error(Constants.STACKTRACE, e);
}
}
/**
* Loop through all the sheets to determine the ranges of tables
*/
public void determineTableRanges() {
int numSheets = workbook.getNumberOfSheets();
for(int sheetIndex = 0; sheetIndex < numSheets; sheetIndex++) {
determineTableRanges(workbook.getSheetAt(sheetIndex));
}
}
/**
* Determine ranges in a specific sheet
* @param sheet
*/
private void determineTableRanges(Sheet sheet) {
ExcelSheetPreProcessor sProcessor = new ExcelSheetPreProcessor(sheet);
sProcessor.determineSheetRanges();
sheetProcessor.put(sheet.getSheetName(), sProcessor);
}
public Map getSheetProcessors() {
if(this.sheetProcessor == null) {
throw new IllegalArgumentException("Must run determineTableRanges method to initialize pre processing of excel file");
}
return this.sheetProcessor;
}
/**
* Clears the parser and requires you to start the parsing from scratch
*/
public void clear() {
try {
if(sourceFile != null) {
sourceFile.close();
}
} catch (IOException e) {
classLogger.error(Constants.STACKTRACE, e);
} catch (Exception e) {
classLogger.error(Constants.STACKTRACE, e);
}
}
/**
* Get the sheets in order
* @return
*/
public List getSheetNames() {
int numSheets = this.workbook.getNumberOfSheets();
List sheets = new ArrayList(numSheets);
for(int i = 0; i < numSheets; i++) {
sheets.add(this.workbook.getSheetName(i));
}
return sheets;
}
/////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
// public static void main(String[] args) {
// String fileLocation = "C:\\Users\\SEMOSS\\Desktop\\shifted.xlsx";
//
// ExcelWorkbookFilePreProcessor processor = new ExcelWorkbookFilePreProcessor();
// processor.parse(fileLocation);
// processor.determineTableRanges();
// Map sheetProcessors = processor.getSheetProcessors();
// for(String sheet : sheetProcessors.keySet()) {
// ExcelSheetPreProcessor sProcessor = sheetProcessors.get(sheet);
//
// {
// List blocks = sProcessor.getAllBlocks();
// System.out.println("Streaming approach for types");
// for(int i = 0; i < blocks.size(); i++) {
// ExcelBlock block = blocks.get(i);
// List blockRanges = block.getRanges();
// for(int j = 0; j < blockRanges.size(); j++) {
// ExcelRange r = blockRanges.get(j);
// System.out.println("Found range = " + r.getRangeSyntax());
//
// System.out.println("Predicted range with headers " + Arrays.toString(sProcessor.getRangeHeaders(r)));
// System.out.println("Predicted types for range");
// Object[][] rangeTypes = block.getRangeTypes(r);
// for(Object[] p : rangeTypes) {
// System.out.println(Arrays.toString(p));
// }
// }
// }
// }
//
// System.out.println();
// System.out.println();
//
// {
// System.out.println("Brute force method for types");
// List blocks = sProcessor.getAllBlocks();
// for(int i = 0; i < blocks.size(); i++) {
// ExcelBlock block = blocks.get(i);
// List blockRanges = block.getRanges();
// for(int j = 0; j < blockRanges.size(); j++) {
// ExcelRange r = blockRanges.get(j);
// System.out.println("Getting prediciton for range = " + r.getRangeSyntax());
// Object[][] prediction = ExcelParsing.predictTypes(sProcessor.getSheet(), r.getRangeSyntax());
// for(Object[] p : prediction) {
// System.out.println(Arrays.toString(p));
// }
// }
// }
// }
// }
// }
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy