
prerna.om.HeadersException Maven / Gradle / Ivy
The newest version!
package prerna.om;
import java.io.IOException;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.Vector;
import java.util.regex.Pattern;
import prerna.util.Constants;
import prerna.util.Utility;
public class HeadersException {
/*
* Object to clear the headers and determine any exceptions that are invalid for loading
*
* Its a singleton since we need to read the giant list of values that are saved in RDF_Map which
* I do not want to do multiple times
*/
// the singleton object
private static HeadersException singleton;
// the list of prohibited words read through the RDF_MAP
// we will store everything in upper case format
private static Set prohibitedHeaders = new HashSet();
public final static String DUP_HEADERS_KEY = "DUPLICATE_HEADERS";
public final static String ILLEGAL_HEADERS_KEY = "ILLEGAL_HEADERS";
public final static String ILLEGAL_CHARACTER_KEY = "ILLEGAL_CHARACTER_KEY";
public final static String ILLEGAL_START_CHARACTER_KEY = "ILLEGAL_START_CHARACTER_KEY";
// the constructor
// responsible for loading in the prohibited headers
// requires DIHelper
private HeadersException() {
// grab the giant string from helper
try {
String prohibitedHeadersStr = Utility.getDIHelperProperty(Constants.PROBHIBITED_HEADERS);
// the string is comma delimited
String[] words = prohibitedHeadersStr.split(",");
for(String word : words) {
// keep everything upper case for simplicity in comparisons
prohibitedHeaders.add(word.toUpperCase());
}
} catch(Exception e) {
System.err.println("DIHelper is not loaded. THIS SHOULD ONLY BE THE CASE DURING TESTING!");
}
}
// singleton access point
public static HeadersException getInstance() {
if(singleton == null) {
singleton = new HeadersException();
}
return singleton;
}
/**
* This will compare the headers from the file and report all issues to the user if present
* @param headers The String[] containing the headers to test
* @return boolean true/false if headers are good
* if headers are not good, it just throws an exception at the end
* which needs to be caught and sent to the FE
* @throws IOException
*/
public boolean compareHeaders(String fileName, String[] headers) throws IOException {
headers = upperCaseAllHeaders(headers);
// instantiate errorMessage objects
boolean foundError = false;
StringBuilder errorMessage = new StringBuilder();
errorMessage.append("FILE ERROR : " + Utility.getOriginalFileName(fileName) + "
");
// two tests
// first one is if we have duplicate headers
// second one is if we have illegal headers - based on some sql terms i found online...
// for optimization to run through the headers only once, I combined the two tests
Map> comparisons = runAllComparisons(headers);
// duplicate headers will store which headers are duplicated
Set duplicateHeaders = comparisons.get(DUP_HEADERS_KEY);
// illegal headers will store which headers are illegal
Set illegalHeaders = comparisons.get(ILLEGAL_HEADERS_KEY);
// illegal characters will store headers which have any of the following: %+;@
Set illCharacterHeaders = comparisons.get(ILLEGAL_CHARACTER_KEY);
// illegal start characters will store headers which do not start with a digit
Set illegalStartHeaders = comparisons.get(ILLEGAL_START_CHARACTER_KEY);
if(!duplicateHeaders.isEmpty()) {
foundError = true;
errorMessage.append("
");
errorMessage.append("ERROR - Duplicate Column Names:
");
int dupCounter = 1;
for(String dupHeader : duplicateHeaders) {
errorMessage.append(dupCounter + ") " + dupHeader + "
");
dupCounter++;
}
}
if(!illegalHeaders.isEmpty()) {
foundError = true;
errorMessage.append("
");
errorMessage.append("ERROR - Prohibited Column Names:
");
// cause i'm ill son
int illCounter = 1;
for(String illHeader : illegalHeaders) {
errorMessage.append(illCounter + ") " + illHeader + "
");
illCounter++;
}
}
if(!illCharacterHeaders.isEmpty()) {
foundError = true;
errorMessage.append("
");
errorMessage.append("ERROR - Column name can't contain any of the following characters: %+;@
");
// cause i'm ill son
int illCounter = 1;
for(String illHeader : illCharacterHeaders) {
errorMessage.append(illCounter + ") " + illHeader + "
");
illCounter++;
}
}
if(!illegalStartHeaders.isEmpty()) {
foundError = true;
errorMessage.append("
");
errorMessage.append("ERROR - Column name must start with a letter
");
// cause i'm ill son
int illCounter = 1;
for(String illHeader : illCharacterHeaders) {
errorMessage.append(illCounter + ") " + illHeader + "
");
illCounter++;
}
}
if(foundError) {
throw new IOException(errorMessage.toString());
}
return true;
}
public Map> runAllComparisons(String[] headers) {
// this method is just a combination of finding duplicate headers
// and finding illegal headers
Map> returnComparisonsMap = new Hashtable>();
// this is a bit messier in terms of implementation, but it only requires
// us to go through the data once, instead of iterating once to find duplicate
// headers and another time to find illegal headers
// store duplicate values.. and make it an ordered set
Set duplicateHeaders = new TreeSet();
// store the illegal headers... make it an ordered set
Set illegalHeaders = new TreeSet();
// keep a list of the headers current seen
Set currHeadersProcessed = new HashSet();
// store the illegal headers... make it an ordered set
Set illConcatHeaders = new TreeSet();
// store the headers that start with non-letters... make it an ordered set
Set illealStartHeaders = new TreeSet();
int size = headers.length;
for(int headIdx = 0; headIdx < size; headIdx++) {
String thisHeader = headers[headIdx];
// THIS IS THE PORTION OF CODE FOR DUPLICATE HEADERS
if(currHeadersProcessed.contains(thisHeader)) {
// we found a duplicate value!
duplicateHeaders.add(thisHeader);
} else {
// add it to the set to see if we run into it again
currHeadersProcessed.add(thisHeader);
}
// END DUPLICATE HEADERS
// THIS IS THE PORTION OF CODE FOR ILLEGAL HEADERS
if(prohibitedHeaders.contains(thisHeader)) {
// we found an illegal value!
illegalHeaders.add(thisHeader);
}
// END ILLEGAL HEADERS
// THIS IS THE PORTION OF CODE FOR ILLEGAL CHARACTERS
if(containsIllegalCharacter(thisHeader)) {
// we found an illegal value!
illConcatHeaders.add(thisHeader);
}
// END ILLEGAL CONCATENATIONS
if(isIllegalStartCharacter(thisHeader)) {
illealStartHeaders.add(thisHeader);
}
}
returnComparisonsMap.put(DUP_HEADERS_KEY, duplicateHeaders);
returnComparisonsMap.put(ILLEGAL_HEADERS_KEY, illegalHeaders);
returnComparisonsMap.put(ILLEGAL_CHARACTER_KEY, illConcatHeaders);
returnComparisonsMap.put(ILLEGAL_START_CHARACTER_KEY, illealStartHeaders);
return returnComparisonsMap;
}
public String[] upperCaseAllHeaders(String[] headers) {
int size = headers.length;
for(int headIdx = 0; headIdx < size; headIdx++) {
headers[headIdx] = headers[headIdx].toUpperCase();
}
return headers;
}
public boolean isDuplicated(String checkHeader, String[] allHeaders) {
checkHeader = checkHeader.toUpperCase();
for(String currHeaders : allHeaders) {
if(currHeaders == null) {
continue;
}
if(checkHeader.equals(currHeaders.toUpperCase())) {
return true;
}
}
return false;
}
public boolean isDuplicated(String checkHeader, String[] allHeaders, int ignoreIndex) {
checkHeader = checkHeader.toUpperCase();
for(int colIdx = 0; colIdx < allHeaders.length; colIdx++) {
if(colIdx == ignoreIndex) {
continue;
}
String currHeaders = allHeaders[colIdx];
if(currHeaders == null) {
continue;
}
if(checkHeader.equals(currHeaders.toUpperCase())) {
return true;
}
}
return false;
}
public boolean isIllegalHeader(String checkHeader) {
checkHeader = checkHeader.toUpperCase();
if(prohibitedHeaders.contains(checkHeader)) {
return true;
}
return false;
}
public boolean containsIllegalCharacter(String checkHeader) {
// match any character not alpha, numeric, or underscore AND
// match 2 or more consecutive underscores AND
// match if starts with underscore AND
// match if ends with underscore
Pattern p = Pattern.compile("[^a-zA-Z0-9-_]|_{2,}|^_|_$|-");
boolean hasIllegalChar = p.matcher(checkHeader).find();
return hasIllegalChar;
}
public String removeIllegalCharacters(String checkHeader) {
checkHeader = checkHeader.trim();
checkHeader = checkHeader.replace("+", "");
checkHeader = checkHeader.replace("@", "");
checkHeader = checkHeader.replace("%", "");
checkHeader = checkHeader.replace(";", "");
checkHeader = checkHeader.replaceAll("[^a-zA-Z0-9]", "_");
// need to replace 2 "__" with a single "_"
while(checkHeader.contains("__")) {
checkHeader = checkHeader.replace("__", "_");
}
if(checkHeader.startsWith("_")) {
checkHeader = checkHeader.substring(1, checkHeader.length());
}
if(checkHeader.endsWith("_")) {
checkHeader = checkHeader.substring(0, checkHeader.length()-1);
}
return checkHeader;
}
public boolean isIllegalStartCharacter(String checkHeader) {
if(checkHeader.length() > 0) {
char start = checkHeader.charAt(0);
if(!Character.isLetter(start)) {
return true;
}
}
return false;
}
public String appendLetterAtBeginning(String origHeader) {
return "A" + origHeader;
}
public String recursivelyFixHeaders(String origHeader, List currCleanHeaders) {
boolean isAltered = false;
/*
* For the following 3 checks
* Just perform a single fix within each block
* And let the recursion deal with having to fix an issue that is arising
* due to a previous fix
* i.e. you made a header no longer illegal but now it is a duplicate, recursion of
* this method will deal with that
*/
// first, clean illegal characters
if(containsIllegalCharacter(origHeader)) {
origHeader = removeIllegalCharacters(origHeader);
isAltered = true;
}
// second, check if header is some kind of reserved word
if(isIllegalHeader(origHeader)) {
origHeader = appendNumOntoHeader(origHeader);
isAltered = true;
}
// third, check if header starts with a digit
if(isIllegalStartCharacter(origHeader)) {
origHeader = appendLetterAtBeginning(origHeader);
isAltered = true;
}
// final, check for duplications
for(String currHead : currCleanHeaders) {
if(origHeader.equalsIgnoreCase(currHead)) {
origHeader = appendNumOntoHeader(origHeader);
isAltered = true;
break;
}
}
// if we did alter the string at any point
// we need to continue and re-run these checks again
// until we have gone through without altering the string
// and return the string
if(isAltered) {
origHeader = recursivelyFixHeaders(origHeader, currCleanHeaders);
}
return origHeader;
}
public String recursivelyFixHeaders(String origHeader, String[] currCleanHeaders) {
boolean isAltered = false;
/*
* For the following 3 checks
* Just perform a single fix within each block
* And let the recursion deal with having to fix an issue that is arising
* due to a previous fix
* i.e. you made a header no longer illegal but now it is a duplicate, recurssion of
* this method will deal with that
*/
// first, clean illegal characters
if(containsIllegalCharacter(origHeader)) {
origHeader = removeIllegalCharacters(origHeader);
isAltered = true;
}
// second, check if header is some kind of reserved word
if(isIllegalHeader(origHeader)) {
origHeader = appendNumOntoHeader(origHeader);
isAltered = true;
}
// third, check if header starts with a digit
if(isIllegalStartCharacter(origHeader)) {
origHeader = appendLetterAtBeginning(origHeader);
isAltered = true;
}
// final, check for duplications
for(String currHead : currCleanHeaders) {
if(origHeader.equalsIgnoreCase(currHead)) {
origHeader = appendNumOntoHeader(origHeader);
isAltered = true;
break;
}
}
// if we did alter the string at any point
// we need to continue and re-run these checks again
// until we have gone through without altering the string
// and return the string
if(isAltered) {
origHeader = recursivelyFixHeaders(origHeader, currCleanHeaders);
}
return origHeader;
}
public String appendNumOntoHeader(String origHeader) {
int num = 0;
if(origHeader.matches(".*_\\d+")) {
String strNumbers = origHeader.substring(origHeader.lastIndexOf("_") + 1, origHeader.length());
num = Integer.parseInt(strNumbers);
// remove the existing appendage of the number
origHeader = origHeader.substring(0, origHeader.lastIndexOf("_"));
}
origHeader = origHeader + "_" + (++num);
return origHeader;
}
public String[] cleanAndMatchColumnNumbers(String header1, String header2, List otherColumns) {
if(header1.equalsIgnoreCase(header2)) {
throw new IllegalArgumentException("Cannot match the header to itself");
}
header1 = recursivelyFixHeaders(header1, otherColumns);
header2 = recursivelyFixHeaders(header2, otherColumns);
int header1Num = 0;
int header2Num = 0;
if(header1.matches(".*_\\d+")) {
String strNumbers = header1.substring(header1.lastIndexOf("_") + 1, header1.length());
header1Num = Integer.parseInt(strNumbers);
}
if(header2.matches(".*_\\d+")) {
String strNumbers = header2.substring(header2.lastIndexOf("_") + 1, header2.length());
header2Num = Integer.parseInt(strNumbers);
}
boolean hasAltered = false;
if(header1Num != header2Num) {
// we have to do another alteration
// which requires to perform another check for uniqueness
hasAltered = true;
// make them match
int maxNum = Math.max(header1Num, header2Num);
if(maxNum == header1Num) {
// update the header2 to be the larger
String origHeader2 = header2.substring(0, header2.lastIndexOf("_"));
header2 = origHeader2 + "_" + maxNum;
} else {
// update the header1 to be the larger
String origHeader1 = header1.substring(0, header1.lastIndexOf("_"));
header1 = origHeader1 + "_" + maxNum;
}
}
if(hasAltered) {
// gotta run through the routine again
return cleanAndMatchColumnNumbers(header1, header2, otherColumns);
}
return new String[]{header1, header2};
}
/**
* Takes an array of headers and validates each header against itself
* and returns the clean new header list.
*
* @param headers
* @return
*/
public String[] getCleanHeaders(String[] headers) {
int numCols = headers.length;
List newUniqueHeaders = new Vector(numCols);
for(int colIdx = 0; colIdx < numCols; colIdx++) {
String origHeader = headers[colIdx];
// validate header against other clean headers
String newHeader = recursivelyFixHeaders(origHeader, newUniqueHeaders);
// add it to the unique headers list so it can be used to validate others
newUniqueHeaders.add(newHeader);
}
return newUniqueHeaders.toArray(new String[newUniqueHeaders.size()]);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy