
prerna.reactor.algorithms.RunSimilarityReactor Maven / Gradle / Ivy
The newest version!
package prerna.reactor.algorithms;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.core.config.Configurator;
import prerna.algorithm.api.ITableDataFrame;
import prerna.algorithm.learning.util.Cluster;
import prerna.algorithm.learning.util.IClusterDistanceMode;
import prerna.algorithm.learning.util.IClusterDistanceMode.DistanceMeasure;
import prerna.reactor.frame.AbstractFrameReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.ArrayUtilityMethods;
import prerna.util.usertracking.AnalyticsTrackerHelper;
import prerna.util.usertracking.UserTrackerFactory;
public class RunSimilarityReactor extends AbstractFrameReactor {
private static final String CLASS_NAME = RunSimilarityReactor.class.getName();
private List attributeNamesList;
private String[] attributeNames;
private int instanceIndex;
private String instanceColumn;
private Map distanceMeasure;
/**
* RunSimilarity(instance = column, columns = attributeNamesList);
*/
public RunSimilarityReactor() {
this.keysToGet = new String[]{ReactorKeysEnum.INSTANCE_KEY.getKey(), ReactorKeysEnum.ATTRIBUTES.getKey()};
}
@Override
public NounMetadata execute() {
Logger logger = this.getLogger(CLASS_NAME);
ITableDataFrame dataFrame = getFrame();
dataFrame.setLogger(logger);
AlgorithmSingleColStore results = new AlgorithmSingleColStore();
//get inputs from pixel command
this.instanceColumn = getInstanceColumn();
this.attributeNamesList = getAttributes(instanceColumn);
this.attributeNames = this.attributeNamesList.toArray(new String[0]);
this.instanceIndex = attributeNamesList.indexOf(this.instanceColumn);
//store which attributes are numeric
boolean[] isNumeric = new boolean[this.attributeNames.length];
for (int i = 0; i < this.attributeNames.length; i++) {
isNumeric[i] = dataFrame.isNumeric(this.attributeNames[i]);
}
// set the type of distance measure to be used for each numerical
// property - default is using mean
if (this.distanceMeasure == null) {
distanceMeasure = new HashMap();
for (int i = 0; i < attributeNames.length; i++) {
if (isNumeric[i]) {
distanceMeasure.put(attributeNames[i], DistanceMeasure.MEAN);
}
}
} else {
for (int i = 0; i < attributeNames.length; i++) {
if (!distanceMeasure.containsKey(attributeNames[i])) {
distanceMeasure.put(attributeNames[i], DistanceMeasure.MEAN);
}
}
}
Cluster cluster = new Cluster(attributeNames, isNumeric);
cluster.setDistanceMode(distanceMeasure);
logger.info("Start generating cluster center for similarity of instances");
Configurator.setLevel(logger.getName(), Level.OFF);
generateClusterCenters(dataFrame, cluster, isNumeric);
Configurator.setLevel(logger.getName(), Level.INFO);
logger.info("Done generating cluster centers for similarity of instances");
logger.info("Start generating similarity of instance to dataset center");
getSimilarityValuesForInstances(dataFrame, cluster, results, isNumeric, logger);
Configurator.setLevel(logger.getName(), Level.INFO);
logger.info("Done generating similarity of instance to dataset center");
String[] allColNames = dataFrame.getColumnHeaders();
String attributeName = attributeNames[instanceIndex];
// to avoid adding columns with same name
int counter = 0;
String newColName = attributeName + "_SIMILARITY";
while (ArrayUtilityMethods.arrayContainsValue(allColNames, newColName)) {
counter++;
newColName = attributeName + "_SIMILARITY_" + counter;
}
// merge data back onto the frame
AlgorithmMergeHelper.mergeSimpleAlgResult(dataFrame, this.instanceColumn, newColName, "NUMBER", results);
// track GA data
// UserTrackerFactory.getInstance().trackAnalyticsPixel(this.insight, "Similarity");
// NEW TRACKING
UserTrackerFactory.getInstance().trackAnalyticsWidget(
this.insight,
dataFrame,
"Similarity",
AnalyticsTrackerHelper.getHashInputs(this.store, this.keysToGet));
//return successful frame change to FE
return new NounMetadata(dataFrame, PixelDataType.FRAME, PixelOperationType.FRAME_DATA_CHANGE, PixelOperationType.FRAME_HEADERS_CHANGE);
}
private void generateClusterCenters(ITableDataFrame dataFrame, Cluster cluster, boolean[] isNumeric) {
Iterator> it = dataFrame.scaledUniqueIterator(attributeNames[instanceIndex], attributeNamesList);
while (it.hasNext()) {
cluster.addToCluster(it.next(), attributeNames, isNumeric);
}
}
public void getSimilarityValuesForInstances(
ITableDataFrame dataFrame,
Cluster cluster,
AlgorithmSingleColStore results,
boolean[] isNumeric,
Logger logger
) {
Configurator.setLevel(logger.getName(), Level.OFF);
int counter = 0;
Iterator> it = dataFrame.scaledUniqueIterator(attributeNames[instanceIndex], attributeNamesList);
while (it.hasNext()) {
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy