All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.reactor.frame.py.RankReactor Maven / Gradle / Ivy

The newest version!
package prerna.reactor.frame.py;

import java.util.ArrayList;
import java.util.List;

import prerna.algorithm.api.SemossDataType;
import prerna.ds.OwlTemporalEngineMeta;
import prerna.ds.py.PandasFrame;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.usertracking.AnalyticsTrackerHelper;
import prerna.util.usertracking.UserTrackerFactory;

public class RankReactor extends AbstractPyFrameReactor {

	/**
	 * This reactor ranks the data based on a given column(s) and sort
	 * direction. The inputs to the reactor are: 1) the column(s) to be used for
	 * rank 2) the name of the rank column 3) the sorting order for each column
	 * 4) the partition column's to be used for rank
	 */

	private static final String PARTITION_BY_COLS = "partitionByCols";
	private static final String ASC = "ASC";
	private static final String DESC = "DESC";

	public RankReactor() {
		this.keysToGet = new String[] { ReactorKeysEnum.COLUMNS.getKey(), ReactorKeysEnum.NEW_COLUMN.getKey(),
				ReactorKeysEnum.SORT.getKey(), PARTITION_BY_COLS };
	}

	@Override
	public NounMetadata execute() {
		organizeKeys();
		// get frame
		PandasFrame frame = (PandasFrame) getFrame();

		// get the wrapper name
		// which is the frame name with w in the end
		String wrapperFrameName = frame.getWrapperName();

		// get inputs
		List columns = getCols(ReactorKeysEnum.COLUMNS.getKey());
		// at least one column should be there
		if (columns.isEmpty()) {
			throw new IllegalArgumentException("Must pass at least one column for the rank");
		}

		String newColName = keyValue.get(this.keysToGet[1]);
		// checks
		if (newColName == null || newColName.isEmpty()) {
			throw new IllegalArgumentException("Need to define the new column name");
		}
		// clean the column name to ensure that it is valid
		newColName = getCleanNewColName(frame, newColName);
		
		// partition by ex.(by=\"Age_Range\")
		// String partitionbyCol = this.keyValue.get(PARTITION_BY_COLS);
		List partitionbyCols = getCols(PARTITION_BY_COLS);

		String script = null;
		StringBuilder finalRankScript = new StringBuilder();
		StringBuilder sortByRankScript = new StringBuilder();
		StringBuilder colsArrayScript = new StringBuilder();
		StringBuilder dropTempRankColsScript = new StringBuilder();

		if (!partitionbyCols.isEmpty()) {
			// it will form the following script
			// ex.(groupby(\"Age_Range\",\"Relationship\"))

			StringBuilder sortValues = new StringBuilder();
			StringBuilder tempRankScript = new StringBuilder();
			StringBuilder partitionByScript = new StringBuilder();

			colsArrayScript.append("[");

			sortValues.append("[");

			for (int i = 0; i < partitionbyCols.size(); i++) {
				colsArrayScript.append("'").append(partitionbyCols.get(i)).append("'");
				sortValues.append("True");
				partitionByScript.append("'").append(partitionbyCols.get(i)).append("'");
				if (i != partitionbyCols.size() - 1) {
					colsArrayScript.append(",");
					sortValues.append(",");
					partitionByScript.append(",");
				}
			}

			for (int i = 0; i < columns.size(); i++) {
				colsArrayScript.append(", '" + columns.get(i) + "'");
				sortValues.append(", " + getSortOrder(i, ReactorKeysEnum.SORT.getKey()));
			}
			colsArrayScript.append("]");
			sortValues.append("]");

			script = "cols= " + colsArrayScript.toString();
			frame.runScript(script);
			this.addExecutedCode(script);
			// sort and groupby + ngroup to label each group with your ranking
			tempRankScript.append(frame.getName()).append("['TempRank'] =").append(frame.getName())
					.append(".sort_values(cols,").append(" ascending=").append(sortValues).append(")")
					.append(".groupby(").append("cols,").append("sort=False, dropna=True).ngroup()");

			script = tempRankScript.toString();
			frame.runScript(script);
			this.addExecutedCode(script);
			// Subtracting the minimum rank within each 'key' then gives the
			// desired ranking within group
			finalRankScript.append(frame.getName()).append("['").append(newColName).append("']").append("=")
					.append(frame.getName()).append("['TempRank'] - ").append(frame.getName()).append(".groupby(")
					.append("[" + partitionByScript + "]").append(")['TempRank'].transform('min') + 1");

			sortByRankScript.append(frame.getName()).append(".sort_values([").append(partitionByScript).append(",'")
					.append(newColName).append("'], inplace=True)");

			dropTempRankColsScript.append(frame.getName()).append(" = ").append(frame.getName())
					.append(".drop(columns=['TempRank'])");

		} else {
			StringBuilder createColsArray = new StringBuilder();
			// createColsArray.append("cols = [");
			for (int i = 0; i < columns.size(); i++) {
				StringBuilder rankScript = new StringBuilder();
				rankScript.append(frame.getName()).append("[\"").append(columns.get(i)).append("Rank").append("\"] = ")
						.append(frame.getName()).append("[\"").append(columns.get(i))
						.append("\"].rank(method = 'min',na_option='bottom',ascending=")
						.append(getSortOrder(i, ReactorKeysEnum.SORT.getKey())).append(")");

				// running script to rank each column individually
				script = rankScript.toString();
				frame.runScript(script);
				this.addExecutedCode(script);

				createColsArray.append("'").append(columns.get(i)).append("Rank").append("'");
				if (i != columns.size() - 1) {
					createColsArray.append(",");
				}
			}

			finalRankScript.append(frame.getName()).append("['").append(newColName).append("'] =")
					.append(frame.getName())
					.append(".sort_values(cols, ascending=True).groupby(cols, sort=False,dropna=True).ngroup() + 1");

			sortByRankScript.append(frame.getName()).append(".sort_values(['").append(newColName)
					.append("'], inplace=True)");

			dropTempRankColsScript.append(frame.getName()).append(" = ").append(frame.getName())
					.append(".drop(columns=[ ").append(createColsArray).append("])");

			// create array of columns which is passed to groupby
			colsArrayScript.append("cols = [").append(createColsArray).append("]");
			
			script = colsArrayScript.toString();
			frame.runScript(script);
			this.addExecutedCode(script);
		}

		// running script to generate final rank
		script = finalRankScript.toString();
		frame.runScript(script);
		this.addExecutedCode(script);

		// running script to sort by final rank column
		script = sortByRankScript.toString();
		frame.runScript(script);
		this.addExecutedCode(script);

		// run script to drop intermediate rank columns as we only need the
		// final rank
		script = dropTempRankColsScript.toString();
		frame.runScript(script);
		this.addExecutedCode(script);
		
		// update wrapperFrameName it will end up frame name with 'w'
		script = wrapperFrameName + ".cache['data'][['" + newColName + "']]" 
				+ "=" + frame.getName() + "['" + newColName + "']";
		frame.runScript(script);
		this.addExecutedCode(script);

		// update meta data
		OwlTemporalEngineMeta metaData = frame.getMetaData();
		String frameName = frame.getName();
		metaData.addProperty(frameName, frameName + "__" + newColName);
		metaData.setAliasToProperty(frameName + "__" + newColName, newColName);
		metaData.setDataTypeToProperty(frameName + "__" + newColName, SemossDataType.DOUBLE.toString());
		metaData.setDerivedToProperty(frameName + "__" + newColName, true);
		frame.syncHeaders();
		// to avoid the sorting of first column by default
		// this.insight.getPragmap().put("IMPLICIT_ORDER", false);

		// NEW TRACKING
		UserTrackerFactory.getInstance().trackAnalyticsWidget(this.insight, frame, "Rank",
				AnalyticsTrackerHelper.getHashInputs(this.store, this.keysToGet));

		// return the output
		NounMetadata retNoun = new NounMetadata(frame, PixelDataType.FRAME, PixelOperationType.FRAME_HEADERS_CHANGE,
				PixelOperationType.FRAME_DATA_CHANGE);
		retNoun.addAdditionalReturn(NounMetadata.getSuccessNounMessage("Successfully performed Rank"));
		return retNoun;
	}

	//////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////
	///////////////////////// GET PIXEL INPUT ////////////////////////////
	//////////////////////////////////////////////////////////////////////
	//////////////////////////////////////////////////////////////////////

	private List getCols(String key) {
		// first input is the columns on which rank will be applied
		List columns = new ArrayList<>();
		GenRowStruct colGrs = this.store.getNoun(key);
		if (colGrs != null && !colGrs.isEmpty()) {
			for (int selectIndex = 0; selectIndex < colGrs.size(); selectIndex++) {
				String column = colGrs.get(selectIndex) + "";
				columns.add(column);
			}
		}
		return columns;
	}

	// get the sort order for each column
	private String getSortOrder(int index, String key) {
		// third input is the sorting to be applied to each column
		GenRowStruct grs = this.store.getNoun(key);

		// if no sort order is passed, ascending order will be applied
		if (grs == null || grs.isEmpty() || index >= grs.size()) {
			return "True";
		} else {
			// if sort order other than ASC or DESC, throw error
			if (!grs.get(index).toString().isEmpty() && grs.get(index).toString() != null
					&& !(grs.get(index).toString().equalsIgnoreCase(ASC)
							|| grs.get(index).toString().equalsIgnoreCase(DESC))) {
				throw new IllegalArgumentException("Column order not valid");
			} else {
				// if sort = ASC or blank, then order will be ascending else it
				// will be descending
				if (grs.get(index).toString().equalsIgnoreCase(ASC) || grs.get(index).toString().isEmpty()) {
					return "True";
				} else {
					return "False";
				}
			}
		}
	}

	@Override
	protected String getDescriptionForKey(String key) {
		if (key.equals(PARTITION_BY_COLS)) {
			return "The columns used for partitioning the rank";
		}
		return super.getDescriptionForKey(key);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy