prerna.reactor.database.upload.PredictMetamodelReactor Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of semoss Show documentation
SEMOSS
The newest version!
package prerna.reactor.database.upload;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import prerna.algorithm.api.SemossDataType;
import prerna.poi.main.helper.CSVFileHelper;
import prerna.reactor.AbstractReactor;
import prerna.reactor.masterdatabase.util.GenerateMetamodelLayout;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.ArrayUtilityMethods;
import prerna.util.Constants;
import prerna.util.UploadInputUtility;

public class PredictMetamodelReactor extends AbstractReactor {
	
	protected static final String DIR_SEPARATOR = java.nio.file.FileSystems.getDefault().getSeparator();

	public PredictMetamodelReactor() {
		this.keysToGet = new String[] { UploadInputUtility.FILE_PATH, UploadInputUtility.SPACE, UploadInputUtility.DELIMITER, UploadInputUtility.ROW_COUNT };
	}
	
	@Override
	public NounMetadata execute() {
		organizeKeys();
		// get csv file path
		String filePath = UploadInputUtility.getFilePath(this.store, this.insight);
		if(!new File(filePath).exists()) {
			throw new IllegalArgumentException("Unable to locate file");
		}
		// get delimiter
		String delimiter = UploadInputUtility.getDelimiter(this.store);
		char delim = delimiter.charAt(0);

		// set csv file helper
		CSVFileHelper helper = new CSVFileHelper();
		helper.setDelimiter(delim);
		helper.parse(filePath);

		return new NounMetadata(autoGenerateMetaModel(helper), PixelDataType.MAP);
	}

	/**
	 * predict the meta model
	 */
	private Map autoGenerateMetaModel(CSVFileHelper helper) {
		// return map with file metamodel
		Map fileMetaModelData = new HashMap();
		String[] columnHeaders = helper.getHeaders();
		Map dataTypeMap = new LinkedHashMap();
		Map additionalDataTypeMap = new LinkedHashMap();

		// predict datatypes and additional types
		Object[][] dataTypes = helper.predictTypes();
		int size = columnHeaders.length;
		for (int colIdx = 0; colIdx < size; colIdx++) {
			Object[] prediction = dataTypes[colIdx];
			dataTypeMap.put(columnHeaders[colIdx], (SemossDataType) prediction[0]);
			if (prediction[1] != null) {
				additionalDataTypeMap.put(columnHeaders[colIdx], (String) prediction[1]);
			}
		}

		// get data from csv to predict types
		List data = new ArrayList<>(500);
		String[] cells = null;
		int count = 1;
		// predict meta model from limit row count
		int limit = 500;
		// get end row count
		boolean getEndRowCount = UploadInputUtility.getRowCount(this.store);
		while ((cells = helper.getNextRow()) != null) {
			if (count <= limit) {
				data.add(cells);
				count++;
			} else {
				// if we need to get total number of rows from csv continue
				if (getEndRowCount) {
					count++;
				} else {
					break;
				}
			}

		}
		int endRow = count;

		fileMetaModelData.put("startCount", 2);
		if (getEndRowCount) {
			fileMetaModelData.put("endCount", endRow);
		}
		fileMetaModelData.put("dataTypes", dataTypeMap);
		fileMetaModelData.put("additionalDataTypes", additionalDataTypeMap);
		// store auto modified header names
		fileMetaModelData.put("headerModifications", helper.getChangedHeaders());

		Map> matches = new HashMap<>(columnHeaders.length);
		Map columnPropMap = new HashMap<>(columnHeaders.length);
		for (String header : columnHeaders) {
			columnPropMap.put(header, false);
		}

		for (int i = 0; i < columnHeaders.length; i++) {
			runAllComparisons(columnHeaders, i, matches, columnPropMap, dataTypeMap, data);
		}

		// Format metamodel data
		Map propFileData = new HashMap<>();
		List> relationMapList = new ArrayList<>();
		Map> nodePropMap = new HashMap<>();

		for (String subject : matches.keySet()) {
			Set set = matches.get(subject);
			for (String object : set) {
				SemossDataType datatype = dataTypeMap.get(object);
				if (datatype == SemossDataType.STRING) {
					Map relMap = new HashMap<>();
					String relName = subject + "_" + object;
					relMap.put(Constants.FROM_TABLE, subject);
					relMap.put(Constants.TO_TABLE, object);
					relMap.put(Constants.REL_NAME, relName);
					relationMapList.add(relMap);
				} else {
					List properties = new ArrayList<>();
					if (nodePropMap.containsKey(subject)) {
						properties = nodePropMap.get(subject);
					}
					properties.add(object);
					nodePropMap.put(subject, properties);
				}
			}
		}

		propFileData.put(Constants.RELATION, relationMapList);
		propFileData.put(Constants.NODE_PROP, nodePropMap);
		// position tables in metamodel to be spaced and not overlap
		Map> nodePositionMap = GenerateMetamodelLayout.generateMetamodelPredictionLayout(nodePropMap, relationMapList);
		propFileData.put(Constants.POSITION_PROP, nodePositionMap);

		fileMetaModelData.putAll(propFileData);
		// get file location and file name
		String filePath = helper.getFileLocation();
		String file = filePath.substring(filePath.lastIndexOf(DIR_SEPARATOR) + DIR_SEPARATOR.length(), filePath.lastIndexOf("."));
		try {
			file = file.substring(0, file.indexOf("_____UNIQUE"));
		} catch (Exception e) {
			// just in case that fails, this shouldn't because if its a filename
			// it should have a "."
			file = filePath.substring(filePath.lastIndexOf(DIR_SEPARATOR) + DIR_SEPARATOR.length(), filePath.lastIndexOf("."));
		}

		// store file path and file name to send to FE
		fileMetaModelData.put("fileLocation", filePath);
		fileMetaModelData.put("fileName", file);
		helper.clear();
		return fileMetaModelData;
	}

	/**
	 * 
	 * @param columnHeaders
	 *            - the column headers in the csv
	 * @param firstColIndex
	 *            - the column which we are comparing to other columns
	 * @param matches
	 * @param columnPropMap
	 * @param dataTypeMap
	 * @param data
	 */
	private void runAllComparisons(String[] columnHeaders, int firstColIndex, Map> matches, Map columnPropMap, Map dataTypeMap, List data) {
		for(int i = 0; i < columnHeaders.length; i++) {
			//don't compare a column to itself
			if(i == firstColIndex) continue;

			String firstColumn = columnHeaders[firstColIndex];
			String secondColumn = columnHeaders[i];

			//need to make sure second column does not have first column as a a property already
			if(!matches.containsKey(secondColumn) || !matches.get(secondColumn).contains(firstColumn)) {
				if(!columnPropMap.get(secondColumn) && compareCols(firstColIndex, i, data)) {
					//we have a match
					boolean useInverse = false;
					int firstColIndexInCSV = ArrayUtilityMethods.arrayContainsValueAtIndex(columnHeaders, firstColumn);
					int secondColIndexInCSV = ArrayUtilityMethods.arrayContainsValueAtIndex(columnHeaders, secondColumn);
					if(firstColIndexInCSV > secondColIndexInCSV) {
						//try to see if inverse order is better
						//but first, check to make sure the second column in not a double or date
						SemossDataType dataType = dataTypeMap.get(secondColumn);
						if(dataType == SemossDataType.STRING) {
							if(!columnPropMap.get(firstColumn) && compareCols(i, firstColIndex, data)) {
								//use reverse order
								useInverse = true;
								if(matches.containsKey(secondColumn)) {
									matches.get(secondColumn).add(firstColumn);
								} else {
									Set set = new HashSet(1);
									set.add(firstColumn);
									matches.put(secondColumn, set);
								}
								columnPropMap.put(firstColumn, true);
							}
						}
					}

					if(!useInverse) {
						if(matches.containsKey(firstColumn)) {
							matches.get(firstColumn).add(secondColumn);
						} else {
							Set set = new HashSet(1);
							set.add(secondColumn);
							matches.put(firstColumn, set);
						}
						columnPropMap.put(secondColumn, true);
					}
				}
			}
		}
	}

	/**
	 * 
	 * @return
	 */
	private boolean compareCols(int firstIndex, int secondIndex, List data) {
		Map values = new HashMap<>();
		for(Object[] row : data) {
			Object firstValue = row[firstIndex];
			Object secondValue = row[secondIndex];
			if(values.containsKey(firstValue)) {
				if(!values.get(firstValue).equals(secondValue)) {
					return false;
				}
			} else {
				values.put(firstValue, secondValue);
			}
		}
		return true;
	}
}