All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.om.HeadersException Maven / Gradle / Ivy

The newest version!
package prerna.om;

import java.io.IOException;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.Vector;
import java.util.regex.Pattern;

import prerna.util.Constants;
import prerna.util.Utility;

public class HeadersException {

	/*
	 * Object to clear the headers and determine any exceptions that are invalid for loading
	 * 
	 * Its a singleton since we need to read the giant list of values that are saved in RDF_Map which 
	 * I do not want to do multiple times
	 */
	
	// the singleton object
	private static HeadersException singleton;
	
	// the list of prohibited words read through the RDF_MAP
	// we will store everything in upper case format
	private static Set prohibitedHeaders = new HashSet();
	
	public final static String DUP_HEADERS_KEY = "DUPLICATE_HEADERS";
	public final static String ILLEGAL_HEADERS_KEY = "ILLEGAL_HEADERS";
	public final static String ILLEGAL_CHARACTER_KEY = "ILLEGAL_CHARACTER_KEY";
	public final static String ILLEGAL_START_CHARACTER_KEY = "ILLEGAL_START_CHARACTER_KEY";

	// the constructor
	// responsible for loading in the prohibited headers
	// requires DIHelper
	private HeadersException() {
		// grab the giant string from helper
		try {
			String prohibitedHeadersStr = Utility.getDIHelperProperty(Constants.PROBHIBITED_HEADERS);
			// the string is comma delimited
			String[] words = prohibitedHeadersStr.split(",");
			for(String word : words) {
				// keep everything upper case for simplicity in comparisons
				prohibitedHeaders.add(word.toUpperCase());
			}
		} catch(Exception e) {
			System.err.println("DIHelper is not loaded. THIS SHOULD ONLY BE THE CASE DURING TESTING!");
		}
	}
	
	// singleton access point
	public static HeadersException getInstance() {
		if(singleton == null) {
			singleton = new HeadersException();
		}
		return singleton;
	}
	
	/**
	 * This will compare the headers from the file and report all issues to the user if present
	 * @param headers				The String[] containing the headers to test
	 * @return						boolean true/false if headers are good
	 * 								if headers are not good, it just throws an exception at the end
	 * 								which needs to be caught and sent to the FE
	 * @throws IOException 
	 */
	public boolean compareHeaders(String fileName, String[] headers) throws IOException {
		headers = upperCaseAllHeaders(headers);
		// instantiate errorMessage objects
		boolean foundError = false;
		StringBuilder errorMessage = new StringBuilder();
		errorMessage.append("FILE ERROR : " + Utility.getOriginalFileName(fileName) + "
"); // two tests // first one is if we have duplicate headers // second one is if we have illegal headers - based on some sql terms i found online... // for optimization to run through the headers only once, I combined the two tests Map> comparisons = runAllComparisons(headers); // duplicate headers will store which headers are duplicated Set duplicateHeaders = comparisons.get(DUP_HEADERS_KEY); // illegal headers will store which headers are illegal Set illegalHeaders = comparisons.get(ILLEGAL_HEADERS_KEY); // illegal characters will store headers which have any of the following: %+;@ Set illCharacterHeaders = comparisons.get(ILLEGAL_CHARACTER_KEY); // illegal start characters will store headers which do not start with a digit Set illegalStartHeaders = comparisons.get(ILLEGAL_START_CHARACTER_KEY); if(!duplicateHeaders.isEmpty()) { foundError = true; errorMessage.append("
"); errorMessage.append("ERROR - Duplicate Column Names:
"); int dupCounter = 1; for(String dupHeader : duplicateHeaders) { errorMessage.append(dupCounter + ") " + dupHeader + "
"); dupCounter++; } } if(!illegalHeaders.isEmpty()) { foundError = true; errorMessage.append("
"); errorMessage.append("ERROR - Prohibited Column Names:
"); // cause i'm ill son int illCounter = 1; for(String illHeader : illegalHeaders) { errorMessage.append(illCounter + ") " + illHeader + "
"); illCounter++; } } if(!illCharacterHeaders.isEmpty()) { foundError = true; errorMessage.append("
"); errorMessage.append("ERROR - Column name can't contain any of the following characters: %+;@
"); // cause i'm ill son int illCounter = 1; for(String illHeader : illCharacterHeaders) { errorMessage.append(illCounter + ") " + illHeader + "
"); illCounter++; } } if(!illegalStartHeaders.isEmpty()) { foundError = true; errorMessage.append("
"); errorMessage.append("ERROR - Column name must start with a letter
"); // cause i'm ill son int illCounter = 1; for(String illHeader : illCharacterHeaders) { errorMessage.append(illCounter + ") " + illHeader + "
"); illCounter++; } } if(foundError) { throw new IOException(errorMessage.toString()); } return true; } public Map> runAllComparisons(String[] headers) { // this method is just a combination of finding duplicate headers // and finding illegal headers Map> returnComparisonsMap = new Hashtable>(); // this is a bit messier in terms of implementation, but it only requires // us to go through the data once, instead of iterating once to find duplicate // headers and another time to find illegal headers // store duplicate values.. and make it an ordered set Set duplicateHeaders = new TreeSet(); // store the illegal headers... make it an ordered set Set illegalHeaders = new TreeSet(); // keep a list of the headers current seen Set currHeadersProcessed = new HashSet(); // store the illegal headers... make it an ordered set Set illConcatHeaders = new TreeSet(); // store the headers that start with non-letters... make it an ordered set Set illealStartHeaders = new TreeSet(); int size = headers.length; for(int headIdx = 0; headIdx < size; headIdx++) { String thisHeader = headers[headIdx]; // THIS IS THE PORTION OF CODE FOR DUPLICATE HEADERS if(currHeadersProcessed.contains(thisHeader)) { // we found a duplicate value! duplicateHeaders.add(thisHeader); } else { // add it to the set to see if we run into it again currHeadersProcessed.add(thisHeader); } // END DUPLICATE HEADERS // THIS IS THE PORTION OF CODE FOR ILLEGAL HEADERS if(prohibitedHeaders.contains(thisHeader)) { // we found an illegal value! illegalHeaders.add(thisHeader); } // END ILLEGAL HEADERS // THIS IS THE PORTION OF CODE FOR ILLEGAL CHARACTERS if(containsIllegalCharacter(thisHeader)) { // we found an illegal value! illConcatHeaders.add(thisHeader); } // END ILLEGAL CONCATENATIONS if(isIllegalStartCharacter(thisHeader)) { illealStartHeaders.add(thisHeader); } } returnComparisonsMap.put(DUP_HEADERS_KEY, duplicateHeaders); returnComparisonsMap.put(ILLEGAL_HEADERS_KEY, illegalHeaders); returnComparisonsMap.put(ILLEGAL_CHARACTER_KEY, illConcatHeaders); returnComparisonsMap.put(ILLEGAL_START_CHARACTER_KEY, illealStartHeaders); return returnComparisonsMap; } public String[] upperCaseAllHeaders(String[] headers) { int size = headers.length; for(int headIdx = 0; headIdx < size; headIdx++) { headers[headIdx] = headers[headIdx].toUpperCase(); } return headers; } public boolean isDuplicated(String checkHeader, String[] allHeaders) { checkHeader = checkHeader.toUpperCase(); for(String currHeaders : allHeaders) { if(currHeaders == null) { continue; } if(checkHeader.equals(currHeaders.toUpperCase())) { return true; } } return false; } public boolean isDuplicated(String checkHeader, String[] allHeaders, int ignoreIndex) { checkHeader = checkHeader.toUpperCase(); for(int colIdx = 0; colIdx < allHeaders.length; colIdx++) { if(colIdx == ignoreIndex) { continue; } String currHeaders = allHeaders[colIdx]; if(currHeaders == null) { continue; } if(checkHeader.equals(currHeaders.toUpperCase())) { return true; } } return false; } public boolean isIllegalHeader(String checkHeader) { checkHeader = checkHeader.toUpperCase(); if(prohibitedHeaders.contains(checkHeader)) { return true; } return false; } public boolean containsIllegalCharacter(String checkHeader) { // match any character not alpha, numeric, or underscore AND // match 2 or more consecutive underscores AND // match if starts with underscore AND // match if ends with underscore Pattern p = Pattern.compile("[^a-zA-Z0-9-_]|_{2,}|^_|_$|-"); boolean hasIllegalChar = p.matcher(checkHeader).find(); return hasIllegalChar; } public String removeIllegalCharacters(String checkHeader) { checkHeader = checkHeader.trim(); checkHeader = checkHeader.replace("+", ""); checkHeader = checkHeader.replace("@", ""); checkHeader = checkHeader.replace("%", ""); checkHeader = checkHeader.replace(";", ""); checkHeader = checkHeader.replaceAll("[^a-zA-Z0-9]", "_"); // need to replace 2 "__" with a single "_" while(checkHeader.contains("__")) { checkHeader = checkHeader.replace("__", "_"); } if(checkHeader.startsWith("_")) { checkHeader = checkHeader.substring(1, checkHeader.length()); } if(checkHeader.endsWith("_")) { checkHeader = checkHeader.substring(0, checkHeader.length()-1); } return checkHeader; } public boolean isIllegalStartCharacter(String checkHeader) { if(checkHeader.length() > 0) { char start = checkHeader.charAt(0); if(!Character.isLetter(start)) { return true; } } return false; } public String appendLetterAtBeginning(String origHeader) { return "A" + origHeader; } public String recursivelyFixHeaders(String origHeader, List currCleanHeaders) { boolean isAltered = false; /* * For the following 3 checks * Just perform a single fix within each block * And let the recursion deal with having to fix an issue that is arising * due to a previous fix * i.e. you made a header no longer illegal but now it is a duplicate, recursion of * this method will deal with that */ // first, clean illegal characters if(containsIllegalCharacter(origHeader)) { origHeader = removeIllegalCharacters(origHeader); isAltered = true; } // second, check if header is some kind of reserved word if(isIllegalHeader(origHeader)) { origHeader = appendNumOntoHeader(origHeader); isAltered = true; } // third, check if header starts with a digit if(isIllegalStartCharacter(origHeader)) { origHeader = appendLetterAtBeginning(origHeader); isAltered = true; } // final, check for duplications for(String currHead : currCleanHeaders) { if(origHeader.equalsIgnoreCase(currHead)) { origHeader = appendNumOntoHeader(origHeader); isAltered = true; break; } } // if we did alter the string at any point // we need to continue and re-run these checks again // until we have gone through without altering the string // and return the string if(isAltered) { origHeader = recursivelyFixHeaders(origHeader, currCleanHeaders); } return origHeader; } public String recursivelyFixHeaders(String origHeader, String[] currCleanHeaders) { boolean isAltered = false; /* * For the following 3 checks * Just perform a single fix within each block * And let the recursion deal with having to fix an issue that is arising * due to a previous fix * i.e. you made a header no longer illegal but now it is a duplicate, recurssion of * this method will deal with that */ // first, clean illegal characters if(containsIllegalCharacter(origHeader)) { origHeader = removeIllegalCharacters(origHeader); isAltered = true; } // second, check if header is some kind of reserved word if(isIllegalHeader(origHeader)) { origHeader = appendNumOntoHeader(origHeader); isAltered = true; } // third, check if header starts with a digit if(isIllegalStartCharacter(origHeader)) { origHeader = appendLetterAtBeginning(origHeader); isAltered = true; } // final, check for duplications for(String currHead : currCleanHeaders) { if(origHeader.equalsIgnoreCase(currHead)) { origHeader = appendNumOntoHeader(origHeader); isAltered = true; break; } } // if we did alter the string at any point // we need to continue and re-run these checks again // until we have gone through without altering the string // and return the string if(isAltered) { origHeader = recursivelyFixHeaders(origHeader, currCleanHeaders); } return origHeader; } public String appendNumOntoHeader(String origHeader) { int num = 0; if(origHeader.matches(".*_\\d+")) { String strNumbers = origHeader.substring(origHeader.lastIndexOf("_") + 1, origHeader.length()); num = Integer.parseInt(strNumbers); // remove the existing appendage of the number origHeader = origHeader.substring(0, origHeader.lastIndexOf("_")); } origHeader = origHeader + "_" + (++num); return origHeader; } public String[] cleanAndMatchColumnNumbers(String header1, String header2, List otherColumns) { if(header1.equalsIgnoreCase(header2)) { throw new IllegalArgumentException("Cannot match the header to itself"); } header1 = recursivelyFixHeaders(header1, otherColumns); header2 = recursivelyFixHeaders(header2, otherColumns); int header1Num = 0; int header2Num = 0; if(header1.matches(".*_\\d+")) { String strNumbers = header1.substring(header1.lastIndexOf("_") + 1, header1.length()); header1Num = Integer.parseInt(strNumbers); } if(header2.matches(".*_\\d+")) { String strNumbers = header2.substring(header2.lastIndexOf("_") + 1, header2.length()); header2Num = Integer.parseInt(strNumbers); } boolean hasAltered = false; if(header1Num != header2Num) { // we have to do another alteration // which requires to perform another check for uniqueness hasAltered = true; // make them match int maxNum = Math.max(header1Num, header2Num); if(maxNum == header1Num) { // update the header2 to be the larger String origHeader2 = header2.substring(0, header2.lastIndexOf("_")); header2 = origHeader2 + "_" + maxNum; } else { // update the header1 to be the larger String origHeader1 = header1.substring(0, header1.lastIndexOf("_")); header1 = origHeader1 + "_" + maxNum; } } if(hasAltered) { // gotta run through the routine again return cleanAndMatchColumnNumbers(header1, header2, otherColumns); } return new String[]{header1, header2}; } /** * Takes an array of headers and validates each header against itself * and returns the clean new header list. * * @param headers * @return */ public String[] getCleanHeaders(String[] headers) { int numCols = headers.length; List newUniqueHeaders = new Vector(numCols); for(int colIdx = 0; colIdx < numCols; colIdx++) { String origHeader = headers[colIdx]; // validate header against other clean headers String newHeader = recursivelyFixHeaders(origHeader, newUniqueHeaders); // add it to the unique headers list so it can be used to validate others newUniqueHeaders.add(newHeader); } return newUniqueHeaders.toArray(new String[newUniqueHeaders.size()]); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy