All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.poi.main.helper.FileHelperUtil Maven / Gradle / Ivy

The newest version!
package prerna.poi.main.helper;

import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.Map;

import com.google.re2j.Matcher;
import com.google.re2j.Pattern;

import prerna.algorithm.api.SemossDataType;

public class FileHelperUtil {

	private FileHelperUtil() {
		
	}
	
	/**
	 * Convenience method to get the 2 maps we usually use within CsvQueryStruct
	 * @param headers
	 * @param predictions
	 * @return
	 */
	public static Map[] generateDataTypeMapsFromPrediction(String[] headers, Object[][] predictions) {
		Map[] retArray = new Map[2];
		Map dataTypeMap = new LinkedHashMap<>();
		Map additionalDataTypeMap = new LinkedHashMap<>();
		retArray[0] = dataTypeMap;
		retArray[1] = additionalDataTypeMap;
		
		int numHeaders = headers.length;
		for(int i = 0; i < numHeaders; i++) {
			Object[] pred = predictions[i];
			SemossDataType type = (SemossDataType) pred[0];
			dataTypeMap.put(headers[i], type.toString());
			if(pred[1] != null) {
				additionalDataTypeMap.put(headers[i], pred[1] + "");
			}
		}
		return retArray;
	}
	
	/**
	 * Convenience method to just return the data types
	 * @param predictions
	 * @return
	 */
	public static String[] generateDataTypeArrayFromPrediction(Object[][] predictions) {
		int numHeaders = predictions.length;
		String[] returnTypes = new String[numHeaders];
		
		for(int i = 0; i < numHeaders; i++) {
			Object[] pred = predictions[i];
			SemossDataType type = (SemossDataType) pred[0];
			returnTypes[i] = type.toString();
		}
		return returnTypes;
	}
	
	/**
	 * Determine date additional formatting
	 * @param type
	 * @param formatTracker
	 * @return
	 */
	public static Object[] determineDateFormatting(SemossDataType type, Map formatTracker) {
		Object[] result = new Object[2];
		result[0] = type;
		if(formatTracker.size() == 1) {
			result[1] = formatTracker.keySet().iterator().next();
		} else {
			// trying to figure out the best match for the format
			// taking into consideration formats that are basically the same
			// but may contain 2 value (i.e. 11th day) vs 1 value (i.e. 1st day)
			// which matches to different patterns
			if(type == SemossDataType.DATE || type == SemossDataType.TIMESTAMP) {
				reconcileDateFormats(formatTracker);
			}
			
			// now just choose the most occuring one
			String mostOccuringFormat = Collections.max(formatTracker.entrySet(), Comparator.comparingInt(Map.Entry::getValue)).getKey();
			result[1] = mostOccuringFormat;
		}
		return result;
	}
	
	/**
	 * Try to reconcile different date formats
	 * @param formats
	 * @return
	 */
	public static void reconcileDateFormats(Map formats) {
		int numFormats = formats.size();
		if(numFormats == 1) {
			return;
		}

		// loop and compare every format to every other format
		// once we have a match, we will recalculate
		String[] formatPaterns = formats.keySet().toArray(new String[numFormats]);
		char[] charsToFind = new char[]{'M', 'd', 'H', 'h', 'm', 's'};

		for(int i = 0; i < numFormats; i++) {
			String thisFormat = formatPaterns[i];
			// get the regex form of this
			String regexThisFormat = thisFormat;
			Pattern doubleCharRegex = null;
			Matcher matcher = null;

			for(char c : charsToFind) {
				if(!regexThisFormat.contains(c + "")) {
					continue;
				}
				// trim the format first
				// so MM or dd becomes just M or d
				doubleCharRegex = Pattern.compile(c + "{1,2}");
				matcher = doubleCharRegex.matcher(regexThisFormat);
				regexThisFormat = matcher.replaceAll(c + "");

				int indexToFind = regexThisFormat.lastIndexOf(c);
				int len = regexThisFormat.length();
				regexThisFormat = regexThisFormat.substring(0, indexToFind+1) + "{1,2}" + regexThisFormat.substring(indexToFind+1, len);
			}

			Pattern pattern = Pattern.compile(regexThisFormat);
			for(int j = i+1; j < numFormats; j++) {
				String otherFormat = formatPaterns[j];
				matcher = pattern.matcher(otherFormat);

				if(matcher.find()) {
					// they are equivalent
					String largerFormat = thisFormat.length() > otherFormat.length() ? thisFormat : otherFormat;
					int c1 = formats.remove(thisFormat);
					int c2 = formats.remove(otherFormat);
					formats.put(largerFormat, c1+c2);
					// recursively go back and recalculate
					reconcileDateFormats(formats);
					return;
				}
			}
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy