
prerna.reactor.algorithms.RunLOFReactor Maven / Gradle / Ivy
The newest version!
package prerna.reactor.algorithms;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.math3.special.Erf;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.core.config.Configurator;
import prerna.algorithm.api.ITableDataFrame;
import prerna.algorithm.learning.unsupervised.outliers.KDTree;
import prerna.algorithm.learning.util.DuplicationReconciliation;
import prerna.algorithm.learning.util.DuplicationReconciliation.ReconciliationMode;
import prerna.math.StatisticsUtilityMethods;
import prerna.reactor.frame.AbstractFrameReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.ArrayUtilityMethods;
import prerna.util.usertracking.AnalyticsTrackerHelper;
import prerna.util.usertracking.UserTrackerFactory;
public class RunLOFReactor extends AbstractFrameReactor {
private static final String CLASS_NAME = RunLOFReactor.class.getName();
private static final String K_NEIGHBORS = "kNeighbors";
private String[] attributeNames;
private List attributeNamesList;
private int instanceIndex;
private String instanceColumn;
private int numInstances;
private int dimensions;
private Map dups;
private Object[] index;
private int k; // How many neighbors to examine?
public KDTree Tree; // Points in dataset are put into KDTree to help find nearest neighbors
private double[][] reachDistance; // This stores the reachDistance between 2 points. Not symmetrical!
private double[] kDistance; // This stores the k-Distance for each point in dataset
private double[] LRD; // This stores the LRD for each point in dataset
private double[] LOF; // This stores the LOF for each point in dataset
private double[] LOP; // This stores the LOP for each point in dataset
private double[][] dataFormatted; // This stores the formatted data from dataTable
/*
* RunLOF(instance = col, subsetSize = 25, columns = col1, col2, ...);
*/
public RunLOFReactor() {
this.keysToGet = new String[]{ReactorKeysEnum.INSTANCE_KEY.getKey(), K_NEIGHBORS, ReactorKeysEnum.ATTRIBUTES.getKey()};
}
@Override
public NounMetadata execute() {
// TODO: need to throw an error saying parameters are required
this.instanceIndex = 0;
this.k = getKNeighborhoodSize();
this.instanceColumn = getInstanceColumn();
this.attributeNamesList = getColumns();
this.attributeNames = attributeNamesList.toArray(new String[] {});
if (dups == null) {
dups = new HashMap();
for (int i = 0; i < attributeNames.length; i++) {
dups.put(attributeNames[i], new DuplicationReconciliation(ReconciliationMode.MEAN));
}
}
// get number of rows and cols
Logger logger = this.getLogger(CLASS_NAME);
ITableDataFrame dataFrame = getFrame();
dataFrame.setLogger(logger);
// get number of rows and cols
this.numInstances = dataFrame.getUniqueInstanceCount(instanceColumn);
if (k > numInstances) {
throw new IllegalArgumentException("Number of unqiue instances: " + this.numInstances + ", is less than the selected K value: " + k + ".");
}
boolean[] isNumeric = new boolean[this.attributeNames.length];
for (int i = 0; i < this.attributeNames.length; i++) {
isNumeric[i] = dataFrame.isNumeric(this.attributeNames[i]);
if (i != instanceIndex && !isNumeric[i]) {
throw new IllegalArgumentException(
"All columns must be numbers! \n" + "Column " + attributeNames[i] + " is not all numbers!");
}
}
this.dimensions = this.attributeNames.length - 1;
// Initialize arrays
kDistance = new double[numInstances];
LRD = new double[numInstances];
LOF = new double[numInstances];
LOP = new double[numInstances];
reachDistance = new double[numInstances][numInstances];
index = new Object[numInstances];
dataFormatted = new double[numInstances][dimensions];
this.Tree = new KDTree(dimensions);
logger.info("Starting to process instances..");
Configurator.setLevel(logger.getName(), Level.OFF);
// This code flattens out instances, incase there are repeat appearances of an identifier
Iterator> it = dataFrame.scaledUniqueIterator(instanceColumn, attributeNamesList);
int numInstance = 0;
while (it.hasNext()) {
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy