All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.reactor.frame.r.analytics.RunSimilarityReactor Maven / Gradle / Ivy

The newest version!
package prerna.reactor.frame.r.analytics;

import java.util.ArrayList;
import java.util.List;

import org.apache.logging.log4j.Logger;

import prerna.algorithm.api.ITableDataFrame;
import prerna.ds.OwlTemporalEngineMeta;
import prerna.ds.r.RSyntaxHelper;
import prerna.reactor.frame.r.AbstractRFrameReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.ArrayUtilityMethods;
import prerna.util.Utility;
import prerna.util.usertracking.AnalyticsTrackerHelper;
import prerna.util.usertracking.UserTrackerFactory;

/**
 * This reactor updates determines the similarity between values in an instance column based on the selected attribute columns
 * The result is a new column added to the data frame with values between 0 and 1 for each row
 * Higher values indicate stronger similarity
 * The inputs to the reactor are: 
 * 1) the instance column
 * 2) the attribute columns
 */

public class RunSimilarityReactor extends AbstractRFrameReactor {
	
	private static final String CLASS_NAME = RunSimilarityReactor.class.getName();
	
	private String[] attributeNames;
	private List attributeNamesList;
	private String instanceColumn;
	
	public RunSimilarityReactor() {
		this.keysToGet = new String[]{ReactorKeysEnum.INSTANCE_KEY.getKey(), ReactorKeysEnum.ATTRIBUTES.getKey()};
	}

	@Override
	public NounMetadata execute() {
		init();
		Logger logger = this.getLogger(CLASS_NAME);
		ITableDataFrame dataFrame = getFrame();
		String frameName = dataFrame.getName();
		dataFrame.setLogger(logger);
		OwlTemporalEngineMeta meta = this.getFrame().getMetaData();
		
		// figure out inputs
		this.instanceColumn = getInstanceColumn();
		this.attributeNamesList = getAttributes(instanceColumn);
		this.attributeNames = this.attributeNamesList.toArray(new String[0]);
		
		// determine the name for the new similarity column
		// to avoid adding columns with same name
		String[] allColNames = dataFrame.getColumnHeaders();
		int counter = 0;
		String newColName = this.instanceColumn + "_SIMILARITY";
		while (ArrayUtilityMethods.arrayContainsValue(allColNames, newColName)) {
			counter++;
			newColName = this.instanceColumn + "_SIMILARITY_" + counter;
		}
		
		// get the correlation data from the run r correlation algorithm
		logger.info("Start iterating through data to determine similarity");
		runRSimilarityAlgorithm(frameName, attributeNames, instanceColumn, newColName);
		logger.info("Done iterating through data to determine similarity");

		// track GA data
//		UserTrackerFactory.getInstance().trackAnalyticsPixel(this.insight, "SimilarityAlgorithm");
		
		// NEW TRACKING
		UserTrackerFactory.getInstance().trackAnalyticsWidget(
				this.insight, 
				dataFrame, 
				"SimilarityAlgorithm", 
				AnalyticsTrackerHelper.getHashInputs(this.store, this.keysToGet));
		
		// create the new frame meta
		meta.addProperty(frameName, frameName + "__" + newColName);
		meta.setAliasToProperty(frameName + "__" + newColName, newColName);
		meta.setDataTypeToProperty(frameName + "__" + newColName, "DOUBLE");
		
		// now return this object
		// we are returning the name of our table that sits in R; it is structured as a list of entries: x,y,cor
		NounMetadata noun = new NounMetadata(dataFrame, PixelDataType.FRAME, PixelOperationType.FRAME_DATA_CHANGE, PixelOperationType.FRAME_HEADERS_CHANGE);
		noun.addAdditionalReturn(NounMetadata.getSuccessNounMessage("Similarity ran succesfully! See new \"" + newColName + "\" column in the grid."));
		return noun;
	}
	
	/**
	 * R the similarity algorithm using an r script
	 * @param frameName
	 * @param attributeNames
	 * @param instanceColumn
	 * @param newColName
	 */
	private void runRSimilarityAlgorithm(String frameName, String[] attributeNames, String instanceColumn, String newColName) {
		
		// create a column vector to pass as an input into our R script
		String colVector = RSyntaxHelper.createStringRColVec(attributeNames);
		
		// the name of the results table is what we will be passing to the FE
		String resultsFrameName = "ResultsTable" + Utility.getRandomString(10);
		
		// create a stringbuilder for our r syntax
		StringBuilder rsb = new StringBuilder();
		// source the r script that will run the numerical correlation routine
		String correlationScriptFilePath = getBaseFolder() + "\\R\\AnalyticsRoutineScripts\\Similarity.R";
		correlationScriptFilePath = correlationScriptFilePath.replace("\\", "/");
		rsb.append("source(\"" + correlationScriptFilePath + "\");");
        // R syntax for the routine: ResultsTableName <- getCorrelationTable(frameName, "Title", c("col1", "col2"), "simColumnName")
		rsb.append(resultsFrameName + "<- GenerateSimilarityTable(" + frameName + ", " + "\"" + instanceColumn + "\"" + ", " + colVector + ", " + "\"" + newColName + "\"" + ");");
		rsb.append(RSyntaxHelper.asDataTable(frameName, resultsFrameName));
		// garbage collection
		rsb.append("rm(" + resultsFrameName + ",CalculateSimilarity, CSimilarity,DefineRatios,"
				+ "FindCentroids,GenerateCountTable,GenerateLookupDT, "
				+ "GenerateSimilarityTable,GenerateWeightsTable,ScaleUniqueData); gc();");
		// run the script
		this.rJavaTranslator.runR(rsb.toString());


	}

	////////////////////////////////////////////////////////////
	////////////////////////////////////////////////////////////
	////////////////////////////////////////////////////////////
	
	/*
	 * Retrieving inputs
	 */
	
	private String getInstanceColumn() {
		//check if instance column was input with the key 
		GenRowStruct instanceIndexGrs = this.store.getNoun(keysToGet[0]);
		String instanceColumn = "";
		NounMetadata instanceColumnNoun;
		if (instanceIndexGrs != null) {
			instanceColumnNoun = instanceIndexGrs.getNoun(0);
			instanceColumn = (String) instanceColumnNoun.getValue();
		} else {
			//else assume the column is the zero index noun in the curRow
			instanceColumnNoun = this.curRow.getNoun(0);
			instanceColumn = (String) instanceColumnNoun.getValue();
		}
		return instanceColumn;
	}

	private List getAttributes(String instanceColumn) {
		// see if defined as individual key
		List retList = new ArrayList();
		GenRowStruct columnGrs = this.store.getNoun(keysToGet[1]);
		if (columnGrs != null) {
			for (NounMetadata noun : columnGrs.vector) {
				String attribute = noun.getValue().toString();
				if (!(attribute.equals(instanceColumn))) {
					retList.add(attribute);
				}
			}
		} else {
			int rowLength = this.curRow.size();
			for (int i = 1; i < rowLength; i++) {
				NounMetadata colNoun = this.curRow.getNoun(i);
				String attribute = colNoun.getValue().toString();
				if (!(attribute.equals(instanceColumn))) {
					retList.add(attribute);
				}
			}
		}
		return retList;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy