All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.poi.main.helper.excel.ExcelSheetPreProcessor Maven / Gradle / Ivy

The newest version!
package prerna.poi.main.helper.excel;

import java.util.List;
import java.util.Vector;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;

import prerna.algorithm.api.SemossDataType;
import prerna.date.SemossDate;
import prerna.om.HeadersException;

public class ExcelSheetPreProcessor {

	private static final Logger classLogger = LogManager.getLogger(ExcelSheetPreProcessor.class);

	private Sheet sheet;
	private String sheetName;
	
	// contain a list of all the blocks within this sheet
	private List allBlocks = new Vector();
	
	public ExcelSheetPreProcessor(Sheet sheet) {
		this.sheet = sheet;
		this.sheetName = sheet.getSheetName();
	}
	
	public Sheet getSheet() {
		return this.sheet;
	}
	
	public List getAllBlocks() {
		return this.allBlocks;
	}
	
	public String[] getRangeHeaders(ExcelRange range) {
		int[] rangeIndices = range.getIndices();
		// need to get the first row
		
		int startCol = rangeIndices[0]-1;
		int startRow = rangeIndices[1]-1;
		int endCol = rangeIndices[2];

		Row headerRow = sheet.getRow(startRow);
		String[] curHeaders = new String[endCol - startCol];
		
		int counter = 0;
		for(int i = startCol; i < endCol; i++) {
			curHeaders[counter] = ExcelParsing.getCell(headerRow.getCell(i)) + "";
			counter++;
		}
		
		return curHeaders;
	}
	
	public String[] getCleanedRangeHeaders(ExcelRange range) {
		String[] oHeaders = getRangeHeaders(range);
		
		// grab the headerChecker
		HeadersException headerChecker = HeadersException.getInstance();
		List newUniqueCleanHeaders = new Vector();
		
		int numCols = oHeaders.length;
		for(int colIdx = 0; colIdx < numCols; colIdx++) {
			String origHeader = oHeaders[colIdx];
			if(origHeader.trim().isEmpty()) {
				origHeader = "BLANK_HEADER";
			}
			String newHeader = headerChecker.recursivelyFixHeaders(origHeader, newUniqueCleanHeaders);
			
			// now update the unique headers, as this will be used to match duplications
			newUniqueCleanHeaders.add(newHeader);
		}
		
		return newUniqueCleanHeaders.toArray(new String[]{});
	}
	
	/**
	 * Determine table ranges within a specific sheet
	 */
	public void determineSheetRanges() {
		int startRow = sheet.getFirstRowNum();
		int lastRow = sheet.getLastRowNum();
		
		ExcelBlock thisBlock = new ExcelBlock();
		
		classLogger.info("Processing " + sheetName + " from rows " + startRow + " to " + lastRow);
		for(int rowIndex = startRow; rowIndex <= lastRow; rowIndex++) {
			Row thisRow = sheet.getRow(rowIndex);
			
			// if i have a null row then we have a new block
			if(thisRow == null) {
				if(!thisBlock.isEmpty()) {
					// add to the list of blocks
					allBlocks.add(thisBlock);
					// create a new block
					thisBlock = new ExcelBlock();
				}
				// continue to the next row
				continue;
			}
			
			int startCol = thisRow.getFirstCellNum();
			int lastCol = thisRow.getLastCellNum();
			
			// sometimes, we can have an empty row
			// treat this as being a null row as well
			if(lastCol <= 0) {
				if(thisBlock.numIndicesInBlock() > 1) {
					// add to the list of blocks
					allBlocks.add(thisBlock);
					// create a new block
					thisBlock = new ExcelBlock();
				}
				// continue to the next row
				continue;
			}
			
			// we want to keep track
			// if we are at the first column
			boolean initStartCol = true;
			int filledInColumns = 0;
			
			// loop through the row and add to the current block
			for(int colIndex = startCol; colIndex <= lastCol; colIndex++) {
				Cell thisCell = thisRow.getCell(colIndex);
				Object cellValue = ExcelParsing.getCell(thisCell);
				// if the cell is empty
				if(cellValue == null || cellValue.toString().trim().isEmpty()) {
					// ignore
					continue;
				} else {
					if(initStartCol) {
						thisBlock.addStartColumnIndex(colIndex);
						initStartCol = false;
					}
					
					// add column to row + type metadata
					String additionalFormatting = null;
					if(cellValue instanceof SemossDate) {
						additionalFormatting = ((SemossDate) cellValue).getPattern();
					}
					
					SemossDataType cellType = ExcelParsing.getTypeByCast(cellValue);
					thisBlock.addColumnToRowIndexWithData(colIndex, rowIndex+1, cellType, additionalFormatting);
					filledInColumns++;
				}
			}
			
			// now see if the block is the same as the last one or not
			if(filledInColumns > 0) {
				// add the total number of columns that have values
				thisBlock.addTotalColumnsInRowStats(filledInColumns);
				// add the row index that has data
				thisBlock.addRowIndexContainingData(rowIndex);
				// set the max column
				thisBlock.trySetLastColMaxIndex(lastCol);
			} else if(!thisBlock.isEmpty()) {
				tryMergeBlocks(thisBlock);
				// create a new block
				thisBlock = new ExcelBlock();
			}
		}
		
		// we gotta add the last block into the list
		if(thisBlock.numIndicesInBlock() > 1) {
			this.allBlocks.add(thisBlock);
		}
	}
	
	private void tryMergeBlocks(ExcelBlock thisBlock) {
		if(!allBlocks.isEmpty()) {
			// see if this was the same as the last block
			// or add it to the list of blocks
			ExcelBlock lastBlock = allBlocks.get(allBlocks.size()-1);
			if(lastBlock.sameAs(thisBlock)) {
				lastBlock.merge(thisBlock);
			} else {
				allBlocks.add(thisBlock);
			}
		} else {
			// add our first block
			allBlocks.add(thisBlock);
		}
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy