
prerna.reactor.database.upload.PredictMetamodelReactor Maven / Gradle / Ivy
The newest version!
package prerna.reactor.database.upload;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import prerna.algorithm.api.SemossDataType;
import prerna.poi.main.helper.CSVFileHelper;
import prerna.reactor.AbstractReactor;
import prerna.reactor.masterdatabase.util.GenerateMetamodelLayout;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.ArrayUtilityMethods;
import prerna.util.Constants;
import prerna.util.UploadInputUtility;
public class PredictMetamodelReactor extends AbstractReactor {
protected static final String DIR_SEPARATOR = java.nio.file.FileSystems.getDefault().getSeparator();
public PredictMetamodelReactor() {
this.keysToGet = new String[] { UploadInputUtility.FILE_PATH, UploadInputUtility.SPACE, UploadInputUtility.DELIMITER, UploadInputUtility.ROW_COUNT };
}
@Override
public NounMetadata execute() {
organizeKeys();
// get csv file path
String filePath = UploadInputUtility.getFilePath(this.store, this.insight);
if(!new File(filePath).exists()) {
throw new IllegalArgumentException("Unable to locate file");
}
// get delimiter
String delimiter = UploadInputUtility.getDelimiter(this.store);
char delim = delimiter.charAt(0);
// set csv file helper
CSVFileHelper helper = new CSVFileHelper();
helper.setDelimiter(delim);
helper.parse(filePath);
return new NounMetadata(autoGenerateMetaModel(helper), PixelDataType.MAP);
}
/**
* predict the meta model
*/
private Map autoGenerateMetaModel(CSVFileHelper helper) {
// return map with file metamodel
Map fileMetaModelData = new HashMap();
String[] columnHeaders = helper.getHeaders();
Map dataTypeMap = new LinkedHashMap();
Map additionalDataTypeMap = new LinkedHashMap();
// predict datatypes and additional types
Object[][] dataTypes = helper.predictTypes();
int size = columnHeaders.length;
for (int colIdx = 0; colIdx < size; colIdx++) {
Object[] prediction = dataTypes[colIdx];
dataTypeMap.put(columnHeaders[colIdx], (SemossDataType) prediction[0]);
if (prediction[1] != null) {
additionalDataTypeMap.put(columnHeaders[colIdx], (String) prediction[1]);
}
}
// get data from csv to predict types
List data = new ArrayList<>(500);
String[] cells = null;
int count = 1;
// predict meta model from limit row count
int limit = 500;
// get end row count
boolean getEndRowCount = UploadInputUtility.getRowCount(this.store);
while ((cells = helper.getNextRow()) != null) {
if (count <= limit) {
data.add(cells);
count++;
} else {
// if we need to get total number of rows from csv continue
if (getEndRowCount) {
count++;
} else {
break;
}
}
}
int endRow = count;
fileMetaModelData.put("startCount", 2);
if (getEndRowCount) {
fileMetaModelData.put("endCount", endRow);
}
fileMetaModelData.put("dataTypes", dataTypeMap);
fileMetaModelData.put("additionalDataTypes", additionalDataTypeMap);
// store auto modified header names
fileMetaModelData.put("headerModifications", helper.getChangedHeaders());
Map> matches = new HashMap<>(columnHeaders.length);
Map columnPropMap = new HashMap<>(columnHeaders.length);
for (String header : columnHeaders) {
columnPropMap.put(header, false);
}
for (int i = 0; i < columnHeaders.length; i++) {
runAllComparisons(columnHeaders, i, matches, columnPropMap, dataTypeMap, data);
}
// Format metamodel data
Map propFileData = new HashMap<>();
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy