All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.reactor.algorithms.xray.GenerateXRayHashingReactor Maven / Gradle / Ivy

The newest version!
package prerna.reactor.algorithms.xray;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import prerna.auth.utils.SecurityEngineUtils;
import prerna.ds.r.RSyntaxHelper;
import prerna.engine.api.IDatabaseEngine;
import prerna.engine.api.IRawSelectWrapper;
import prerna.masterdatabase.utility.MasterDatabaseUtility;
import prerna.query.querystruct.SelectQueryStruct;
import prerna.query.querystruct.selectors.QueryColumnSelector;
import prerna.rdf.engine.wrappers.WrapperManager;
import prerna.reactor.frame.r.AbstractRFrameReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.AssetUtility;
import prerna.util.Constants;
import prerna.util.DIHelper;
import prerna.util.Utility;

public class GenerateXRayHashingReactor extends AbstractRFrameReactor {

	public static final String CLASS_NAME = GenerateXRayHashingReactor.class.getName();
	public static final Logger classLogger = LogManager.getLogger(CLASS_NAME);

	public static final String FILES_KEY = "files";
	public static final String STATUS_KEY = "status";
	public static final String DATABASE_IDS_KEY = "databaseIds";

	public static final String ROW_MATCHING = "rowComparison";

	private String folderPath;
	private Map configMap;
	
	public GenerateXRayHashingReactor() {
		this.keysToGet = new String[] {ReactorKeysEnum.FILE_PATH.getKey(), ReactorKeysEnum.SPACE.getKey(), 
				ReactorKeysEnum.DATABASE.getKey(), ReactorKeysEnum.OVERRIDE.getKey(), ReactorKeysEnum.CONFIG.getKey(),
				GenerateXRayMatchingReactor.ROW_MATCHING};
	}
	
	@Override
	public NounMetadata execute() {
		// init 
		init();
		String[] reqPackages = new String[] {"data.table", "textreuse"};
		this.rJavaTranslator.checkPackages(reqPackages);

		// logger + orgnaize keys
		Logger logger = this.getLogger(CLASS_NAME);
		organizeKeys();
		
		// check if user is logged in
		String space = this.keyValue.get(this.keysToGet[1]);
		// if security enables, you need proper permissions
		// this takes in the insight and does a user check that the user has access to perform the operations
		String baseFolder = AssetUtility.getAssetBasePath(this.insight, space, true);
		File baseF = new File(baseFolder);
		if(!baseF.exists() || !baseF.isDirectory()) {
			baseF.mkdirs();
		}

		// specify the folder from the base
		String folderName = keyValue.get(keysToGet[0]);
		if(folderName == null || folderName.isEmpty()) {
			folderName = "xray_corpus";
		}
		this.folderPath = (baseFolder + "/" + folderName).replace('\\', '/');
		File xrayFolder = new File(this.folderPath);
		if(!xrayFolder.exists() || !xrayFolder.isDirectory()) {
			xrayFolder.mkdirs();
		}
		
		// now we want to go through and save all the file details 
		List databaseIds = getDatabases();
		for(String dbId : databaseIds) {
			if(!SecurityEngineUtils.userCanViewEngine(this.insight.getUser(), dbId)) {
				throw new IllegalArgumentException("User does not have permission to view this database or database does not exist");
			}
		}
		
		// see if we are overriding the existing hash or not
		boolean override = Boolean.parseBoolean(this.keyValue.get(ReactorKeysEnum.OVERRIDE.getKey()) + "");
		boolean rowComparison = Boolean.parseBoolean(this.keyValue.get(GenerateXRayMatchingReactor.ROW_MATCHING) + "");

		// get the config map
		this.configMap = getConfig();
		
		// source the packages and R script
		this.rJavaTranslator.executeEmptyR(RSyntaxHelper.loadPackages(reqPackages) + "; source(\"" 
				+ DIHelper.getInstance().getProperty(Constants.BASE_FOLDER).replace('\\', '/') 
				+ "/R/XRay/encode_instances.R\", local=TRUE);");
		
		Map returnMap = new HashMap<>();
		returnMap.put(this.keysToGet[0], folderName);
		returnMap.put(this.keysToGet[1], space);
		List fileNames = new ArrayList<>();
		List status = new ArrayList<>();
		returnMap.put(FILES_KEY, fileNames);
		returnMap.put(STATUS_KEY, status);
		returnMap.put(DATABASE_IDS_KEY, databaseIds);

		// go through and write the database
		for(String databaseId : databaseIds) {
			IDatabaseEngine engine = Utility.getDatabase(databaseId);
			Collection pixelSelectors = MasterDatabaseUtility.getSelectorsWithinDatabaseRDBMS(databaseId);
			
			List selectorFilters = null;
			if(this.configMap != null && this.configMap.containsKey(databaseId)) {
				selectorFilters = (List) this.configMap.get(databaseId);
			}
			
			if(rowComparison) {
				// need to group each table into a single query
				Map tableToQs = new HashMap<>();
				for(String selector : pixelSelectors) {
					// see if its part of the filters
					// but if the selectors is empty, it means we include them all
					if(selectorFilters != null && !selectorFilters.isEmpty() && !selectorFilters.contains(selector)) {
						// ignore the selector
						continue;
					}
					
					// see if the file already exists
					// so if we are not overriding, we can skip this selector
					String tableName = null;
					if(selector.contains("__")) {
						String[] split = selector.split("__");
						tableName = split[0];
					} else {
						tableName = selector;
					}
					
					SelectQueryStruct qs = null;
					if(tableToQs.containsKey(tableName)) {
						qs = tableToQs.get(tableName);
					} else {
						qs = new SelectQueryStruct();
						tableToQs.put(tableName, qs);
					}
					qs.addSelector(new QueryColumnSelector(selector));
				}
				
				// now loop through all the QS we have aggregated and write the tables out
				for(String table : tableToQs.keySet()) {
					SelectQueryStruct qs = tableToQs.get(table);
					String outputFileName = databaseId + ";" + table + ";row_comparison.tsv";
					
					String outputFile = this.folderPath + "/" + Utility.normalizePath(outputFileName);
					if(!override && new File(outputFile).exists()) {
						logger.info("Hash already exists for " + Utility.cleanLogString(table));
						
						// add to list of files used
						fileNames.add(outputFileName);
						status.add("existing");
						continue;
					}
					// add to list of files used
					fileNames.add(outputFileName);
					status.add("new");

					IRawSelectWrapper wrapper = null;
					try {
						wrapper = WrapperManager.getInstance().getRawWrapper(engine, qs);
						File f = new File(this.folderPath + "/" + Utility.normalizePath(table) + "_base.tsv");
						try {
							// write file
							// no separator so its all concat together
							Utility.writeResultToFile(f.getAbsolutePath(), wrapper, "");
							// read into R
							String randomFrame = Utility.getRandomString(6);
							this.rJavaTranslator.executeEmptyR(RSyntaxHelper.getFReadSyntax(randomFrame, f.getAbsolutePath(), "\t"));
							// run the script which also outputs the file
							// we care about the file name since we use that to split to know the source
							logger.info("Generating hash for " + Utility.cleanLogString(table));
							this.rJavaTranslator.executeEmptyR("encode_instances(" + randomFrame + ", \"" + outputFile + "\")");
							logger.info("Done generating hash");
						} finally {
							if(f.exists()) {
								f.delete();
							}
						}
					} catch (Exception e) {
						classLogger.error(Constants.STACKTRACE, e);
					} finally {
						if(wrapper != null) {
							try {
								wrapper.close();
							} catch(IOException e) {
								classLogger.error(Constants.STACKTRACE, e);
							}
						}
					}
				}
			} else {
				for(String selector : pixelSelectors) {
					// see if its part of the filters
					// but if the selectors is empty, it means we include them all
					if(selectorFilters != null && !selectorFilters.isEmpty() && !selectorFilters.contains(selector)) {
						// ignore the selector
						continue;
					}
					
					// see if the file already exists
					// so if we are not overriding, we can skip this selector
					String outputFileName = databaseId + ";";
					if(selector.contains("__")) {
						String[] split = selector.split("__");
						outputFileName += split[0] + ";" + split[1];
					} else {
						outputFileName += selector + ";default_node_value";
					}
					outputFileName += ".tsv";
					String outputFile = this.folderPath + "/" + Utility.normalizePath(outputFileName);
					if(!override && new File(outputFile).exists()) {
						logger.info("Hash already exists for " + Utility.cleanLogString(selector));
						
						// add to list of files used
						fileNames.add(outputFileName);
						status.add("existing");
						continue;
					}
					
					// add to list of files used
					fileNames.add(outputFileName);
					status.add("new");
					
					logger.info("Querying data for " + Utility.cleanLogString(selector));
					SelectQueryStruct qs = new SelectQueryStruct();
					qs.addSelector(new QueryColumnSelector(selector));
					IRawSelectWrapper wrapper = null;
					try {
						wrapper = WrapperManager.getInstance().getRawWrapper(engine, qs);
						File f = new File(this.folderPath + "/" + Utility.normalizePath(selector) + "_base.tsv");
						try {
							// write file
							Utility.writeResultToFile(f.getAbsolutePath(), wrapper, "/t");
							// read into R
							String randomFrame = Utility.getRandomString(6);
							this.rJavaTranslator.executeEmptyR(RSyntaxHelper.getFReadSyntax(randomFrame, f.getAbsolutePath(), "\t"));
							// run the script which also outputs the file
							// we care about the file name since we use that to split to know the source
							logger.info("Generating hash for " + Utility.cleanLogString(selector));
							this.rJavaTranslator.executeEmptyR("encode_instances(" + randomFrame + ", \"" + outputFile + "\")");
							logger.info("Done generating hash");
						} finally {
							if(f.exists()) {
								f.delete();
							}
						}
					} catch (Exception e) {
						classLogger.error(Constants.STACKTRACE, e);
					} finally {
						if(wrapper != null) {
							try {
								wrapper.close();
							} catch (IOException e) {
								classLogger.error(Constants.STACKTRACE, e);
							}
						}
					}
				}
			}
		}
		
		NounMetadata noun = new NounMetadata(returnMap, PixelDataType.MAP);
		noun.addAdditionalReturn(NounMetadata.getSuccessNounMessage("Successfully stored hash files for databases"));
		return noun;
	}
	
	///////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////
	///////////////////////////////////////////////////////////////////////////////////
	
	/*
	 * Grab values from store
	 */
	
	private List getDatabases() {
		GenRowStruct grs = this.store.getNoun(ReactorKeysEnum.DATABASE.getKey());
		return grs.getAllStrValues();
	}

	private Map getConfig() {
		GenRowStruct grs = this.store.getNoun(ReactorKeysEnum.CONFIG.getKey());
		if(grs != null && !grs.isEmpty()) {
			NounMetadata value = grs.getNoun(0);
			if(value.getNounType() == PixelDataType.MAP) {
				return (Map) value.getValue();
			}
		}
		return null;
	}
	
	/*
	 * Getters for other reactors
	 */
	String getFolderPath() {
		return this.folderPath;
	}
	
	Map getConfigMap() {
		return this.configMap;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy