All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.reactor.frame.r.UpdateMatchColumnValuesReactor Maven / Gradle / Ivy

The newest version!
package prerna.reactor.frame.r;

import java.util.List;
import java.util.Map;

import prerna.algorithm.api.SemossDataType;
import prerna.ds.OwlTemporalEngineMeta;
import prerna.ds.r.RDataTable;
import prerna.ds.r.RSyntaxHelper;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.Utility;
import prerna.util.usertracking.AnalyticsTrackerHelper;
import prerna.util.usertracking.UserTrackerFactory;

public class UpdateMatchColumnValuesReactor extends AbstractRFrameReactor {

	public static final String MATCHES = "matches";
	public static final String MATCHES_TABLE = "matchesTable";

	public UpdateMatchColumnValuesReactor() {
		this.keysToGet = new String[] { ReactorKeysEnum.COLUMN.getKey(), MATCHES_TABLE, MATCHES };
	}

	@Override
	public NounMetadata execute() {
		init();
		organizeKeys();
		String column = this.keyValue.get(this.keysToGet[0]);
		if(column == null | column.isEmpty()) {
			throw new IllegalArgumentException("Must pass in the column to run the update on");
		}
		String matchesTable = this.keyValue.get(this.keysToGet[1]);

		// check if packages are installed
		String[] packages = { "stringdist", "data.table" };
		this.rJavaTranslator.checkPackages(packages);
		
		StringBuilder rsb = new StringBuilder();
		String baseFolder = Utility.getBaseFolder().replace("\\", "/");
		String bestMatchScript = "source(\"" + baseFolder + "/R/Recommendations/advanced_federation_blend.r\");";
		bestMatchScript = bestMatchScript.replace("\\", "/");
		rsb.append(bestMatchScript);

		// get single column input
		String linkFrame = "link" + Utility.getRandomString(5);
		RDataTable frame = (RDataTable) getFrame();
		String frameName = frame.getName();
		String col1 = matchesTable + "col1";
		rsb.append(col1 + "<- as.character(" + frameName + "$" + column + ");");

		// iterate matches and create the link frame
		List allMatches = getInputList(MATCHES);
		if(allMatches == null || allMatches.isEmpty()) {
			throw new IllegalArgumentException("Must pass in matches to connect the 'current value' to the 'replacement value'");
		}
		// add all matches
		StringBuilder col1Builder = new StringBuilder();
		StringBuilder col2Builder = new StringBuilder();
		StringBuilder col3Builder = new StringBuilder();
		for (int i = 0; i < allMatches.size(); i++) {
			if (i != 0) {
				col1Builder.append(",");
				col2Builder.append(",");
				col3Builder.append(",");
			}
			String match = (String) allMatches.get(i);
			String[] matchList = match.split(" == ");
			if (matchList.length > 2) {
				throw new IllegalArgumentException("match seperator didnt work");
			}
			String column1 = matchList[0];
			String column2 = matchList[1];
			col1Builder.append("\"" + column1 + "\"");
			col2Builder.append("\"" + column2 + "\"");
			col3Builder.append("1");
		}
		// add all matches provided
		String script = linkFrame + " <- data.table(\"col1\"=c(" + col1Builder + "), \"col2\"=c(" + col2Builder	+ ")); ";
		rsb.append(script);
		// make link frame unique
		rsb.append(linkFrame + " <- unique(" + linkFrame + ");");

		// call the curate script
		String resultFrame = Utility.getRandomString(8);
		rsb.append(resultFrame + "<- curate(" + col1 + "," + linkFrame + ");");

		String tempColHeader = Utility.getRandomString(8);

		// make resultFrame a DT and update the header to a temp name
		rsb.append(resultFrame + " <- as.data.table(" + resultFrame + ");" + "names(" + resultFrame + ")<-\"" + tempColHeader + "\";");
		
		// add new temp name column to frame
		rsb.append(frameName + " <- cbind(" + frameName + "," + resultFrame + ");");
		
		// delete existing column from frame
		rsb.append(frameName + " <- " + frameName + "[,-c(\"" + column + "\")];");
		
		// update temp column name to the original column name
		rsb.append("colnames(" + frameName + ")[colnames(" + frameName + ")==\"" + tempColHeader + "\"] <- \"" + column + "\";");

		// get current frame data type
		OwlTemporalEngineMeta metaData = this.getFrame().getMetaData();
		Map typeMap = metaData.getHeaderToTypeMap();
		SemossDataType dataType = typeMap.get(column);
		// return data type to original state
		if (dataType == SemossDataType.DOUBLE ) {
			rsb.append(RSyntaxHelper.alterColumnType(frameName, column, SemossDataType.DOUBLE));
		} else if(dataType == SemossDataType.INT) {
			rsb.append(RSyntaxHelper.alterColumnType(frameName, column, SemossDataType.INT));
		}

		rsb.append("rm(" + resultFrame + "," + linkFrame + "," + col1 +  "," + matchesTable + ", best_match, best_match_nonzero, best_match_zero, blend, curate, self_match );");
		
		this.rJavaTranslator.runR(rsb.toString());
		this.addExecutedCode(rsb.toString());

		// NEW TRACKING
		UserTrackerFactory.getInstance().trackAnalyticsWidget(
				this.insight, 
				frame, 
				"UpdateSimilarColumnValues", 
				AnalyticsTrackerHelper.getHashInputs(this.store, this.keysToGet));
		NounMetadata retNoun = new NounMetadata(frame, PixelDataType.FRAME, PixelOperationType.FRAME_DATA_CHANGE);

		return retNoun;
	}

	private List getInputList(String key) {
		// see if defined as individual key
		GenRowStruct columnGrs = this.store.getNoun(key);
		if (columnGrs != null) {
			if (columnGrs.size() > 0) {
				List values = columnGrs.getAllStrValues();
				return values;
			}
		}
		// else, we assume it is values in the curRow
		List values = this.curRow.getAllStrValues();
		return values;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy