Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package prerna.poi.main.helper;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import com.univocity.parsers.csv.CsvFormat;
import com.univocity.parsers.csv.CsvParser;
import com.univocity.parsers.csv.CsvParserSettings;
import cern.colt.Arrays;
import prerna.algorithm.api.SemossDataType;
import prerna.poi.main.HeadersException;
import prerna.test.TestUtilityMethods;
import prerna.util.ArrayUtilityMethods;
import prerna.util.Utility;
public class CSVFileHelper {
private static final int NUM_ROWS_TO_PREDICT_TYPES = 1000;
private CsvParser parser = null;
private CsvParserSettings settings = null;
private char delimiter = ',';
private FileReader sourceFile = null;
private String fileLocation = null;
// we need to keep two sets of headers
// we will keep the headers as is within the physical file
private String [] allCsvHeaders = null;
// ... that is all good and all, but when we have duplicates, it
// messes things up. to reduce complexity elsewhere, we will just
// create a new unique csv headers string[] to store the values
// this will in essence become the new "physical names" for each
// column
private List newUniqueCSVHeaders = null;
// keep track of integer with values s.t. we can easily reset to get all the values
// without getting an error when there are duplicate headers within the univocity api
// this will literally be [0,1,2,3,...,n] where n = number of columns - 1
private Integer [] headerIntegerArray = null;
// keep track of the current headers being used
private String [] currHeaders = null;
/*
* THIS IS REALLY ANNOYING
* In thick client, need to know if the last column is
* the path to the prop file location for csv upload
*/
private boolean propFileExists = false;
// api stores max values for security reasons
private int maxColumns = 1_000_000;
private int maxCharsPerColumn = 1_000_000;
/**
* Parse the new file passed
* @param fileLocation The String location of the fileName
*/
public void parse(String fileLocation) {
this.fileLocation = fileLocation;
makeSettings();
createParser();
}
/**
* Generate a new settings object to parse based on a set delimiter
*/
private void makeSettings() {
settings = new CsvParserSettings();
settings.setNullValue("");
CsvFormat parseFormat = settings.getFormat();
parseFormat.setDelimiter(delimiter);
parseFormat.setLineSeparator(NewLinePredictor.predict(this.fileLocation));
settings.setEmptyValue("");
settings.setSkipEmptyLines(true);
// override default values
settings.setMaxColumns(maxColumns);
settings.setMaxCharsPerColumn(maxCharsPerColumn);
}
/**
* Creates the parser
*/
private void createParser() {
parser = new CsvParser(settings);
try {
File file = new File(fileLocation);
sourceFile = new FileReader(file);
parser.beginParsing(sourceFile);
collectHeaders();
// since files can be dumb and contain multiple indices
// we need to keep a map of the header to the index
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
/**
* Get the first row of the headers
*/
public void collectHeaders() {
if(allCsvHeaders == null) {
allCsvHeaders = getNextRow();
// need to keep track and make sure our headers are good
int numCols = allCsvHeaders.length;
/*
* THIS IS REALLY ANNOYING
* In thick client, need to know if the last column is
* the path to the prop file location for csv upload
*/
if(propFileExists) {
numCols--;
}
newUniqueCSVHeaders = new Vector(numCols);
// create the integer array s.t. we can reset the value to get in the future
headerIntegerArray = new Integer[numCols];
// grab the headerChecker
HeadersException headerChecker = HeadersException.getInstance();
for(int colIdx = 0; colIdx < numCols; colIdx++) {
// just trim all the headers
allCsvHeaders[colIdx] = allCsvHeaders[colIdx].trim();
String origHeader = allCsvHeaders[colIdx];
if(origHeader.trim().isEmpty()) {
origHeader = "BLANK_HEADER";
}
String newHeader = headerChecker.recursivelyFixHeaders(origHeader, newUniqueCSVHeaders);
// now update the unique headers, as this will be used to match duplications
newUniqueCSVHeaders.add(newHeader);
// fill in integer array
headerIntegerArray[colIdx] = colIdx;
}
}
}
/**
* Return the headers for the parser
* @return
*/
public String[] getHeaders() {
if(this.currHeaders == null) {
collectHeaders();
return this.newUniqueCSVHeaders.toArray(new String[]{});
}
return this.currHeaders;
}
public void setUsingPropFile(boolean propFileExist) {
this.propFileExists = propFileExist;
}
/**
* Get all the headers used in the csv file
* This is the clean version of the csv headers
* @return
*/
public String[] getAllCSVHeaders() {
return this.newUniqueCSVHeaders.toArray(new String[]{});
}
/**
* Set a limit on which columns you want to be parsed
* @param columns The String[] containing the headers you want
*/
public void parseColumns(String[] columns) {
// map it back to clean columns
makeSettings();
// must use index for when there are duplicate values
Integer[] values = new Integer[columns.length];
for(int colIdx = 0; colIdx < columns.length; colIdx++) {
values[colIdx] = newUniqueCSVHeaders.indexOf(columns[colIdx]);
}
settings.selectIndexes(values);
currHeaders = columns;
reset(false);
}
/**
* Get the next row of the file
* @return
*/
public String[] getNextRow() {
return parser.parseNext();
}
/**
* Reset to start the parser from the beginning of the file
*/
public void reset(boolean removeCurrHeaders) {
clear();
createParser();
if(removeCurrHeaders) {
currHeaders = null;
// setting the indices to be all the headers
settings.selectIndexes(headerIntegerArray);
}
// this is to get the header row
getNextRow();
}
/**
* Clears the parser and requires you to start the parsing from scratch
*/
public void clear() {
try {
if(sourceFile != null) {
sourceFile.close();
}
if(parser != null) {
parser.stopParsing();
}
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* Set the delimiter for parser
* @param charAt
*/
public void setDelimiter(char charAt) {
this.delimiter = charAt;
}
public char getDelimiter() {
return this.delimiter;
}
/**
* Get the file location
* @return String with the file location
*/
public String getFileLocation() {
return this.fileLocation;
}
public String[] orderHeadersToGet(String[] headersToGet) {
String[] orderedHeaders = new String[headersToGet.length];
int counter = 0;
for(String header : this.newUniqueCSVHeaders) {
if(ArrayUtilityMethods.arrayContainsValue(headersToGet, header)) {
orderedHeaders[counter] = header;
counter++;
}
}
return orderedHeaders;
}
/**
* Loop through all the data to see what the data types are for each column
* @return
*/
public Object[][] predictTypes() {
int numCols = newUniqueCSVHeaders.size();
Object[][] predictedTypes = new Object[numCols][3];
List