All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.reactor.algorithms.dataquality.RunDataQualityReactor Maven / Gradle / Ivy

The newest version!
package prerna.reactor.algorithms.dataquality;

import java.util.List;
import java.util.Vector;

import prerna.algorithm.api.SemossDataType;
import prerna.ds.r.RDataTable;
import prerna.reactor.frame.r.AbstractRFrameReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.Constants;
import prerna.util.DIHelper;
import prerna.util.Utility;

/*
 * This is the class of DQ that performs data quality from the input information
 */
public class RunDataQualityReactor extends AbstractRFrameReactor {

	// Define rules because ReactorKeyEnum doesn't have default values for our purposes
	private static final String RULE_KEY = "rule";
	private static final String COLUMNS_KEY = "column";
	private static final String OPTIONS_KEY = "options";
	private static final String INPUT_TABLE_KEY = "inputTable";

	// This gets the basic format of the data. The information put in here will be in the format of the 
	// information we want to read in. I.E. assigning a var [INT]
	public RunDataQualityReactor() {
		this.keysToGet = new String[] {
				RULE_KEY, COLUMNS_KEY, OPTIONS_KEY, INPUT_TABLE_KEY
		};
	}

	@Override
	/*
	 * (non-Javadoc)
	 * @see prerna.sablecc2.reactor.IReactor#execute()
	 * 
	 * This function creates Meta Data on the variable. It takes in the value and also takes the pixel
	 * and assigns a type. We use this to create the variables to pass to R
	 */
	public NounMetadata execute() {
		// Initiate R
		init();
		organizeKeys();
		RDataTable frame = (RDataTable) getFrame();
		String rFrameVariable = frame.getName();
		String rule = getData(RULE_KEY);

		String column = getData(COLUMNS_KEY);
		// only apply char length to string cols
		if(rule.equals("Character Length")) {
			SemossDataType dt = frame.getMetaData().getHeaderTypeAsEnum(frame.getName() + "__" + column);
			if(!dt.equals(SemossDataType.STRING)) {
				throw new IllegalArgumentException("Character length rule only applies to STRING column");
			}
		}
		List optionsList = getOptions(OPTIONS_KEY);
		RDataTable inputTable = getInputTable();
		
		String retRVariableName = null;
		if(inputTable != null) {
			retRVariableName = inputTable.getName();
		} else {
			// did user define output table?
			retRVariableName = getInputTableName();
			// no, make one up
			if(retRVariableName == null) {
				retRVariableName = "dataQualityTable_" + Utility.getRandomString(5);
			}
		}
		
		StringBuilder str = new StringBuilder();
		str.append("list(");
		str.append("rule = \"").append(rule).append("\"");
		str.append(", col = \"").append(column).append("\"");
		str.append(", options = ");
		if (!optionsList.isEmpty()) {
			int optListSize = optionsList.size();
			str.append("c(");
			for(int i = 0; i < optListSize; i++) {
				str.append("\"").append(optionsList.get(i)).append("\"");
				if( (i + 1) != optListSize) {
					str.append(",");
				}
			}
			str.append(")");
		}
		else {
			str.append("NULL");
		}

		str.append(")");		
		////////  Variable that will be set to map of rules/ input of mission control //////
		StringBuilder inputString = new StringBuilder();
		String inputVariable = "inputRules_" + Utility.getRandomString(5);
		inputString.append(inputVariable + " <- " + str + ";"); 
		
		// will call the script with all the source calls 
		
		StringBuilder rScript = new StringBuilder();
		String base = DIHelper.getInstance().getProperty(Constants.BASE_FOLDER);
		String dqDirLoc = null;
		if(base.endsWith(DIR_SEPARATOR)) {
			dqDirLoc = base + "R" + DIR_SEPARATOR + "DQ" + DIR_SEPARATOR;
		} else {
			dqDirLoc = base + DIR_SEPARATOR + "R" + DIR_SEPARATOR + "DQ" + DIR_SEPARATOR;
		}
		dqDirLoc = dqDirLoc.replace("\\", "/");
		rScript.append("source(\"" + dqDirLoc + "sourceFile.R" + "\");");
		rScript.append("sourceFiles(\"" + dqDirLoc + "\");");
		
		if(inputTable == null) {
			rScript.append(retRVariableName).append(" <- data.table(Columns=character(), Errors=integer(), Valid=integer(), Total=integer(), Rules=character(), Description=character(), toColor = character());");
		}
		rScript.append(inputString.toString());
		
		rScript.append(retRVariableName).append(" <- missionControl(" + rFrameVariable + ", " + inputVariable + ", " + retRVariableName + ");");
		
		
		// R garbage collection
		rScript.append("source(\"" + dqDirLoc + "fileCleanup.R" + "\");");
		rScript.append("rm(" + inputVariable + ");");
//		System.out.println(rScript);

		// you will run this rScript
		this.rJavaTranslator.runR(rScript.toString());
		frame.recreateMeta();
		
		if(inputTable != null) {
			return new NounMetadata(inputTable, PixelDataType.FRAME, PixelOperationType.FRAME_DATA_CHANGE);
		}
		// make a new frame
		RDataTable newFrame = createNewFrameFromVariable(retRVariableName);
		NounMetadata noun = new NounMetadata(newFrame, PixelDataType.FRAME, PixelOperationType.FRAME_DATA_CHANGE, PixelOperationType.FRAME_HEADERS_CHANGE);
		this.insight.getVarStore().put(retRVariableName, noun);
		return noun;
	}

	private List getOptions(String key) {
		// instantiate var ruleList as a list of strings 
		List optionList = new Vector();
		// Class call to make grs to get the Noun of getRules
		GenRowStruct grs = this.store.getNoun(key);

		if(grs == null || grs.isEmpty()) {
			optionList.add("NULL");
			return optionList;
		}
		// Assign size to the length of grs
		int size = grs.size();
		// Iterate through the rule and add the value to the list
		for(int i = 0; i < size; i++) {
			optionList.add(grs.get(i).toString());
		}
		return optionList;
	}
	
	private String getData(String key) {
		GenRowStruct grs = this.store.getNoun(key);

		if(grs == null || grs.isEmpty()) {
			throw new IllegalArgumentException("Missing Necessary Value to Run");
		}

		return grs.get(0).toString();
	}
	
	private RDataTable getInputTable() {
		GenRowStruct grs = this.store.getNoun(INPUT_TABLE_KEY);
		if(grs == null || grs.isEmpty()) {
			return null;
		}
		NounMetadata noun = grs.getNoun(0);
		if(noun.getNounType() == PixelDataType.FRAME) {
			return (RDataTable) grs.get(0);
		}
		return null;
	}
	
	private String getInputTableName() {
		GenRowStruct grs = this.store.getNoun(INPUT_TABLE_KEY);
		if(grs == null || grs.isEmpty()) {
			return null;
		}
		NounMetadata noun = grs.getNoun(0);
		if(noun.getNounType() == PixelDataType.CONST_STRING) {
			return grs.get(0).toString();
		}
		return null;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy