All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.reactor.algorithms.RunSentimentAnalysisReactor Maven / Gradle / Ivy

The newest version!
package prerna.reactor.algorithms;

import java.util.Arrays;
import java.util.List;
import java.util.Vector;

import org.apache.logging.log4j.Logger;

import prerna.ds.r.RDataTable;
import prerna.ds.r.RSyntaxHelper;
import prerna.reactor.frame.r.AbstractRFrameReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.DIHelper;
import prerna.util.Utility;

public class RunSentimentAnalysisReactor extends AbstractRFrameReactor {

	/**
	 * User enters a string column and the reactor will run sentiment analysis on that column
	 * and also give an advanced (more meaningful) aggregate sentiment of a grouping
	 * User can also add miscellaneous emotion columns if desired
	 */
	
	private static final String SENTIMENT_COLUMN_KEY = "sentimentCol";
	private static final String GROUP_COLUMN_KEY = "groupCol";
	private static final String EMOTIONS_KEY = "addEmotionCols";
	
	protected static final String CLASS_NAME = RunSentimentAnalysisReactor.class.getName();
		
	public RunSentimentAnalysisReactor() {
		this.keysToGet = new String[] { SENTIMENT_COLUMN_KEY, GROUP_COLUMN_KEY , EMOTIONS_KEY , ReactorKeysEnum.PANEL.getKey() };
	}

	@Override
	public NounMetadata execute() {
		// get inputs
		init();
		organizeKeys();
		Logger logger = this.getLogger(CLASS_NAME);
		StringBuilder rsb = new StringBuilder();
		String baseFolder = DIHelper.getInstance().getProperty("BaseFolder");
		String sentimentCol = this.keyValue.get(this.keysToGet[0]);
		String groupCol = this.keyValue.get(this.keysToGet[1]);
		Boolean addEmotionCols = addEmotionCols();
		RDataTable dataFrame = (RDataTable) getFrame();
		String frameName = dataFrame.getName();
		List colHeaders = Arrays.asList( dataFrame.getColumnHeaders());
		boolean hasAgg = true;
		String sentimentFrame = "sentimentFrame" + Utility.getRandomString(5);
		
		// determine if we have an aggregate column
		if(groupCol == null || groupCol.isEmpty()) {
			hasAgg = false;
		}
		
		logger.info("Calculating column sentiment...");

		// source the files
		rsb.append("source(\"" + baseFolder.replace("\\", "/") + "/R/AnalyticsRoutineScripts/sentiment.R\");");
		
		// check if packages are installed
		String[] packages = { "sentimentr" };
		this.rJavaTranslator.checkPackages(packages);

		// let's first create the input frame
		// make sure to minimize number of rows with unique
		if (!hasAgg) {
			rsb.append(sentimentFrame + " <- data.frame(" + sentimentCol + " = " + frameName + "$" + sentimentCol + ");");
		} else {
			rsb.append(sentimentFrame + " <- data.frame(" + groupCol + " = " + frameName + "$" + groupCol + " " + " , "
					+ sentimentCol + " = " + frameName + "$" + sentimentCol + ");");
		}
		
		rsb.append(sentimentFrame + " <- unique(" + sentimentFrame + ");");
		
		// lets run the function
		rsb.append(sentimentFrame + " <- " + "score_sentiment(" + sentimentFrame);
		rsb.append(", review_col = \"" + sentimentCol + "\"");
		if(hasAgg) {
			rsb.append(", aggr_col = \"" + groupCol + "\"");
		} else {
			rsb.append(", aggr_col = NULL");
		}
		rsb.append(", emtn = " + addEmotionCols.toString().toUpperCase() + ");");
		
		// if this script returned an error, lets throw that error
		String isError = "sentimentError" + Utility.getRandomString(5);
		rsb.append("if(nrow(" + sentimentFrame + ") > 0 ) {" + isError + "<- \"\";}\n");	
		
		// run the rsb and get the column headers in the result
		this.rJavaTranslator.runR(rsb.toString());
		rsb.setLength(0);
		
		Boolean errorCheck = this.rJavaTranslator.getBoolean("!exists(\"" + isError + "\")");
		if(errorCheck) {
			throw new IllegalArgumentException("Sentiment could not be calculated");
		}
		
		// remove the columns from the current frame if they are about to be added
		// to avoid duplicates
		String[] newColHeaders = this.rJavaTranslator.getColumns(sentimentFrame);		
		removeDuplicateColumns(frameName,colHeaders,newColHeaders);
		
		// merge this back to frame based on input cols
		rsb.append("colnames("+sentimentFrame+")[colnames("+sentimentFrame+")==\""+ sentimentCol+"_of_" + sentimentCol +"\"] <- \""+sentimentCol+"\";");
		if(!hasAgg) {
			rsb.append(frameName + " <- merge(" + frameName + "," + sentimentFrame + ",by=c(\""+ sentimentCol +"\"));");
		} else {
			rsb.append("colnames("+sentimentFrame+")[colnames("+sentimentFrame+")==\""+ groupCol+"_of_" + sentimentCol +"\"] <- \""+groupCol+"\";");
			rsb.append(frameName + " <- merge(" + frameName + "," + sentimentFrame + ",by=c(\""+ groupCol +"\",\"" + sentimentCol + "\"));");
		}
		
		// convert to data table
		rsb.append(RSyntaxHelper.asDataTable(frameName, frameName));
		
		// run the R
		this.rJavaTranslator.runR(rsb.toString());
		
		// return the new frame
		RDataTable newTable = createNewFrameFromVariable(frameName);
		this.insight.setDataMaker(newTable);
		NounMetadata noun = new NounMetadata(newTable, PixelDataType.FRAME, PixelOperationType.FRAME_DATA_CHANGE, PixelOperationType.FRAME_HEADERS_CHANGE);
		this.insight.getVarStore().put(frameName, noun);
		
		// also return a success message and result
		noun.addAdditionalReturn(NounMetadata.getSuccessNounMessage("Sentiment analysis ran successfully!"));	
		
		// garbage cleanup
		this.rJavaTranslator.executeEmptyR("rm(" + sentimentFrame + "," + isError + "); gc();");
		
		// return all
		return noun;
		
	}

	private void removeDuplicateColumns(String frameName, List currentColHeaders, String[] newColHeaders) {
		List colsToRemove = new Vector();
		for(String newCol : newColHeaders) {
			if(currentColHeaders.contains(newCol)) {
				colsToRemove.add(newCol);
			}
		}
		if(!colsToRemove.isEmpty()) {
			// get the columns in needed string
			StringBuilder str = new StringBuilder("c(");
			for(int i = 0; i < colsToRemove.size(); i++) {
					str.append(colsToRemove.get(i));
				// if not the last entry, append a "," to separate entries
				if( (i+1) != colsToRemove.size()) {
					str.append(",");
				}
			}
			str.append(")");
			
			String script = frameName + "<- subset(" + frameName + ", select=-" + str.toString() + ");";
			this.rJavaTranslator.runR(script);
		}
	}

	/**
	 * Determine if we should override existing values
	 * @return
	 */
	
	private boolean addEmotionCols() {
		GenRowStruct grs = this.store.getNoun(this.keysToGet[2]);
		if (grs == null || grs.isEmpty()) {
			return false;
		}
		return (boolean) grs.get(0);
	}
	
	private String getPanelId() {
		// see if defined as individual key
		GenRowStruct columnGrs = this.store.getNoun(keysToGet[3]);
		if(columnGrs != null) {
			if(columnGrs.size() > 0) {
				return columnGrs.get(0).toString();
			}
		}
		return null;
	}
	
///////////////////////// KEYS /////////////////////////////////////
	
	@Override
	protected String getDescriptionForKey(String key) {
		if (key.equals(SENTIMENT_COLUMN_KEY)) {
			return "The text column to run the sentiment analysis on";
		} else if (key.equals(GROUP_COLUMN_KEY)) {
			return "The column to group the sentiment analysis results on to show the aggregate results";
		} else if (key.equals(EMOTIONS_KEY)) {
			return "Boolean to determine whether or not to also determine the emotions behind the sentiment column text";
		} else {
			return super.getDescriptionForKey(key);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy