All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.reactor.frame.r.analytics.RunDocCosSimilarityReactor Maven / Gradle / Ivy

The newest version!
package prerna.reactor.frame.r.analytics;

import java.util.ArrayList;
import java.util.List;

import prerna.ds.OwlTemporalEngineMeta;
import prerna.ds.r.RDataTable;
import prerna.ds.r.RSyntaxHelper;
import prerna.query.interpreters.RInterpreter;
import prerna.query.querystruct.SelectQueryStruct;
import prerna.query.querystruct.selectors.QueryColumnSelector;
import prerna.query.querystruct.transform.QSAliasToPhysicalConverter;
import prerna.reactor.frame.r.AbstractRFrameReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.execptions.SemossPixelException;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.Utility;

public class RunDocCosSimilarityReactor extends AbstractRFrameReactor {

	private static final String CLASS_NAME = RunNumericalCorrelationReactor.class.getName();

	public RunDocCosSimilarityReactor() {
		this.keysToGet = new String[] { ReactorKeysEnum.INSTANCE_KEY.getKey(), ReactorKeysEnum.DESCRIPTION.getKey(),
				ReactorKeysEnum.OVERRIDE.getKey() };
	}

	@Override
	public NounMetadata execute() {
		organizeKeys();
		init();
		String[] packages = new String[] { "lsa", "text2vec" };
		this.rJavaTranslator.checkPackages(packages);
		RDataTable rFrame = (RDataTable) this.getFrame();
		OwlTemporalEngineMeta meta = rFrame.getMetaData();
		String dataFrame = rFrame.getName();
		String instanceCol = this.keyValue.get(ReactorKeysEnum.INSTANCE_KEY.getKey());
		String description = this.keyValue.get(ReactorKeysEnum.DESCRIPTION.getKey());
		boolean override = overrideFrame();

		StringBuilder rsb = new StringBuilder();
		// check if there are filters on the frame. if so then need to run
		// algorithm on subsetted data
		String tempFrame = "DocSim" + Utility.getRandomString(5);
		if (!rFrame.getFrameFilters().isEmpty()) {
			SelectQueryStruct qs = new SelectQueryStruct();
			List selectedCols = new ArrayList();
			selectedCols.add(instanceCol);
			selectedCols.add(description);
			for (String s : selectedCols) {
				qs.addSelector(new QueryColumnSelector(s));
			}
			qs.setImplicitFilters(rFrame.getFrameFilters());
			qs = QSAliasToPhysicalConverter.getPhysicalQs(qs, meta);
			RInterpreter interp = new RInterpreter();
			interp.setQueryStruct(qs);
			interp.setDataTableName(dataFrame);
			interp.setColDataTypes(meta.getHeaderToTypeMap());
			String query = interp.composeQuery();
			rsb.append(tempFrame + "<- {" + query + "};");
		} else {
			rsb.append(tempFrame + "<- " + dataFrame + ";");
		}

		// source the r script that will run the numerical correlation routine
		String correlationScriptFilePath = getBaseFolder() + "\\R\\AnalyticsRoutineScripts\\DocumentSimilarity.R";
		correlationScriptFilePath = correlationScriptFilePath.replace("\\", "/");
		rsb.append("source(\"" + correlationScriptFilePath + "\");");

		// create temp frame with column and description
		rsb.append(tempFrame + " <-  data.frame(description=" + tempFrame + "$" + description + ", column=" + tempFrame
				+ "$" + instanceCol + ");");
		// make columns as character
		rsb.append(tempFrame + "$column<- as.character(" + tempFrame + "$column);");
		rsb.append(tempFrame + "$description<- as.character(" + tempFrame + "$description);");
		String similarityFrame = "SimFrame" + Utility.getRandomString(5);
		if(override) {
			similarityFrame = dataFrame;
		}
		rsb.append(similarityFrame + "<- getDocumentCosineSimilarity(" + tempFrame + ");");
		rsb.append(RSyntaxHelper.asDataTable(similarityFrame, similarityFrame));

		// r clean up
		rsb.append("rm(getDocumentCosineSimilarity, " + tempFrame + ");");
		this.rJavaTranslator.runR(rsb.toString());

		// check if similarity frame exists
		String frameExists = "exists('" + similarityFrame + "')";
		if (!this.rJavaTranslator.getBoolean(frameExists)) {
			String errorMessage = "Unable to generate document similarity";
			NounMetadata error = new NounMetadata(errorMessage, PixelDataType.CONST_STRING, PixelOperationType.ERROR);
			SemossPixelException spe = new SemossPixelException(error);
			spe.setContinueThreadOfExecution(false);
			throw spe;
		}

		// create new R DataTable from results
		RDataTable returnTable = createNewFrameFromVariable(similarityFrame);
		NounMetadata retNoun = new NounMetadata(returnTable, PixelDataType.FRAME);
		// replace existing frame
		if (override) {
			this.insight.setDataMaker(returnTable);
			retNoun = new NounMetadata(returnTable, PixelDataType.FRAME, PixelOperationType.FRAME_DATA_CHANGE, PixelOperationType.FRAME_HEADERS_CHANGE);
		}

		return retNoun;
	}

	private boolean overrideFrame() {
		GenRowStruct overrideGrs = this.store.getNoun(ReactorKeysEnum.OVERRIDE.getKey());
		if (overrideGrs != null && !overrideGrs.isEmpty()) {
			return (boolean) overrideGrs.get(0);
		}
		// default is to override
		return true;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy