
prerna.reactor.frame.r.analytics.RunNumericalColumnSimilarityReactor Maven / Gradle / Ivy
The newest version!
package prerna.reactor.frame.r.analytics;
import java.util.List;
import java.util.Vector;
import org.apache.logging.log4j.Logger;
import prerna.ds.r.RDataTable;
import prerna.ds.r.RSyntaxHelper;
import prerna.reactor.frame.r.AbstractRFrameReactor;
import prerna.reactor.task.constant.ConstantTaskCreationHelper;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.sablecc2.om.task.ITask;
import prerna.util.Utility;
public class RunNumericalColumnSimilarityReactor extends AbstractRFrameReactor {
private static final String CLASS_NAME = RunNumericalColumnSimilarityReactor.class.getName();
// variable to determine size of sampled data for analysis, default is 100
protected static final String SAMPLE_SIZE = "sampleSize";
// significance level set to run analysis, default is 0.05
protected static final String SIGNIFICANCE = "significance";
// value to determine whether we also want script to retun non-similar columns, default is true
protected static final String SHOW_ALL = "showAll";
/**
* RunNumericalColumnSimilarity ( columns = [ "RottenTomatoes_Audience" , "RottenTomatoes_Critics", "Revenue_Domestic" , "Revenue_International", "MovieBudget" ] , panel = [ "0" ] , sampleSize = ["100"], significance = ["0.05"], showAll = ["TRUE"] );
*/
public RunNumericalColumnSimilarityReactor() {
this.keysToGet = new String[] { ReactorKeysEnum.COLUMNS.getKey(), ReactorKeysEnum.PANEL.getKey(), SAMPLE_SIZE, SIGNIFICANCE, SHOW_ALL };
}
@Override
public NounMetadata execute() {
init();
organizeKeys();
String[] packages = new String[] { "data.table" };
this.rJavaTranslator.checkPackages(packages);
Logger logger = this.getLogger(CLASS_NAME);
RDataTable dataFrame = (RDataTable) getFrame();
String frameName = dataFrame.getName();
dataFrame.setLogger(logger);
// get inputs
List numericalCols = getColumns();
String panelId = this.keyValue.get(this.keysToGet[1]);
String sampleSize = this.keyValue.get(this.keysToGet[2]);
if (sampleSize == null ) {
sampleSize = "100";
}
String significance = this.keyValue.get(this.keysToGet[3]);
if (significance == null) {
significance = "0.05";
}
String showAll = this.keyValue.get(this.keysToGet[4]);
if(showAll == null) {
showAll = "true";
}
// get number of cols
int numCols = numericalCols.size();
// make sure that at least two columns were provided
if(numCols < 2) {
String errorString = "Please select two or more numerical columns to run this algorithm";
logger.info(errorString);
throw new IllegalArgumentException(errorString);
}
// get the correlation data from the run r regression algorithm
logger.info("Start iterating through data to determine similarity");
String resultsList = runAlgorithm(frameName, numericalCols, sampleSize, significance, showAll);
logger.info("Done iterating through data to determine similarity");
// create the object to return to the FE
// there will always be three new, existing, and similarity
int numRows = this.rJavaTranslator.getInt("nrow(" + resultsList + ")");
Object[][] retOutput = new Object[numRows][3];
// need to fill in the object with the data values
// retrieve data using getBulkDataRow
String[] heatMapHeaders = new String[] { "New", "Existing", "Similarity" };
String query = resultsList + "[" + 1 + ":" + numRows + "]";
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy