All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.reactor.federation.FederationBestMatches Maven / Gradle / Ivy

The newest version!
package prerna.reactor.federation;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import prerna.algorithm.api.SemossDataType;
import prerna.ds.r.RDataTable;
import prerna.engine.api.IDatabaseEngine;
import prerna.engine.api.IRawSelectWrapper;
import prerna.masterdatabase.utility.MasterDatabaseUtility;
import prerna.query.querystruct.SelectQueryStruct;
import prerna.query.querystruct.selectors.QueryColumnSelector;
import prerna.rdf.engine.wrappers.WrapperManager;
import prerna.reactor.frame.r.AbstractRFrameReactor;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.Constants;
import prerna.util.DIHelper;
import prerna.util.Utility;

/*
 * Please use {@link #FuzzyMatchesReactor.class}
 */

@Deprecated
public class FederationBestMatches extends AbstractRFrameReactor {
	private static final Logger logger = LogManager.getLogger(FederationBestMatches.class);
	
	public static final String FRAME_COLUMN = "frameCol";	
	public static final String OUTPUT_FRAME_NAME = "outputFrame";
	
	public FederationBestMatches() {
		this.keysToGet = new String[] {ReactorKeysEnum.DATABASE.getKey(), ReactorKeysEnum.CONCEPT.getKey(), ReactorKeysEnum.COLUMN.getKey(), FRAME_COLUMN, OUTPUT_FRAME_NAME};
	}

	@Override
	public NounMetadata execute() {
		init();
		organizeKeys();
		String baseFolder = DIHelper.getInstance().getProperty("BaseFolder");

		// check if packages are installed
		String[] packages = { "stringdist", "data.table" };
		this.rJavaTranslator.checkPackages(packages);

		// for the first iteration we have to build the inputs, second iteration
		// we already have them
		String newDatabase = this.keyValue.get(this.keysToGet[0]);
		String newTable = this.keyValue.get(this.keysToGet[1]);
		String newCol = this.keyValue.get(this.keysToGet[2]);
		String frameCol = this.keyValue.get(this.keysToGet[3]);
		
		// 4 column results df with matches, distance, and combined column
		final String matchesFrame = getMatchesName();
		// 1 column df of all data in frame join column
		final String rCol1 = matchesFrame + "col1";
		// 1 column df of all data in the incoming join column
		final String rCol2 = matchesFrame + "col2";
		
		// accept input info, generate matches table
		IDatabaseEngine newColDatabase = Utility.getDatabase(newDatabase);
		RDataTable frame = (RDataTable) getFrame();
		String frameName = frame.getName();
		String rTable1 = rCol1 + " <- as.character(" + frameName + "$" + frameCol + ");";

		// create script to generate col2 from table to be joined
		SelectQueryStruct qs = new SelectQueryStruct();
		qs.setEngine(newColDatabase);

		// we will fill these once we figure out if it is a concept or property
		QueryColumnSelector selector = null;
		String conceptDataType = null;
		// this is a hack
		// since i dont know if it is a concept or a property
		// if i get a valid data type, new col is a property for new table
		// if i dont, then newtable is a concept with a prim key that i need to use
		// update - switching logic for different method
		if(newColDatabase.getPhysicalUriFromPixelSelector(newTable + "__" + newCol) == null) {
			// we couldn't find a parent for this property
			// this means it is a concept itself
			// and we should only use table
			selector = new QueryColumnSelector(newTable);
			conceptDataType = MasterDatabaseUtility.getBasicDataType(newDatabase, newTable, null);
		} else {
			selector = new QueryColumnSelector(newTable + "__" + newCol);
			conceptDataType = MasterDatabaseUtility.getBasicDataType(newDatabase, newCol, newTable);
		}
		// add the selector to the qs
		qs.addSelector(selector);

		// get the info to write this data to a tsv
		Map typesMap = new HashMap();
		SemossDataType semossType = SemossDataType.convertStringToDataType(conceptDataType);
		typesMap.put(newCol, semossType);
		String newFileLoc = DIHelper.getInstance().getProperty(Constants.INSIGHT_CACHE_DIR) + "/" + Utility.getRandomString(6) + ".tsv";

		// exec query
		File newFile = null;
		IRawSelectWrapper it2 = null;
		try {
			it2 = WrapperManager.getInstance().getRawWrapper(newColDatabase, qs);
			// write to file
			 newFile = Utility.writeResultToFile(newFileLoc, it2, typesMap, "\t");
		} catch (Exception e) {
			logger.error(Constants.STACKTRACE, e);
		} finally {
			if(it2 != null) {
				try {
					it2.close();
				} catch (IOException e) {
					logger.error(Constants.STACKTRACE, e);
				}
			}
		}

		if (newFile != null) {
			String loadFileRScript = rCol2 + " <- fread(\"" + newFile.getAbsolutePath().replace("\\", "/") + "\", sep=\"\t\");";
			this.rJavaTranslator.runR(loadFileRScript);
			this.rJavaTranslator.runR(rCol2 + " <- as.character(" + rCol2 + "$" + newCol + ")");
			newFile.delete();
		}
		
		// execute the scripts
		this.rJavaTranslator.executeEmptyR(rTable1);

		// generate script based on what george wants - empty list of selected
		String bestMatchScript = "source(\"" + baseFolder + "\\R\\Recommendations\\advanced_federation_blend.r\") ; "
				+ matchesFrame + " <- best_match(" + rCol1 + "," + rCol2 + ");";
		bestMatchScript = bestMatchScript.replace("\\", "/");

		this.rJavaTranslator.runR(bestMatchScript);

		// add a unique combined col1 == col2, remove extra columns,
		String combineScript = matchesFrame + "$distance <- as.numeric(" + matchesFrame + "$dist);" + matchesFrame
				+ "<-" + matchesFrame + "[,c(\"col1\",\"col2\",\"distance\")]; " + matchesFrame + "<-" + matchesFrame
				+ "[order(unique(" + matchesFrame + ")$distance),] ;";

		this.rJavaTranslator.runR(combineScript + matchesFrame + " <- as.data.table(" + matchesFrame + ");");

		//convert col1/col2 from factor to list
		String convertType = matchesFrame+"$col1<-as.character("+matchesFrame+"$col1);"+matchesFrame+"$col2<-as.character("+matchesFrame+"$col2);";
		this.rJavaTranslator.runR(convertType);
		
		// remove all garbage 
		this.rJavaTranslator.runR("rm(" + rCol1 + "," + rCol2 + ")");
		
	
		RDataTable returnTable = createNewFrameFromVariable(matchesFrame);
		NounMetadata retNoun = new NounMetadata(returnTable, PixelDataType.FRAME);
		
		// get count of exact matches
		String exactMatchCount = this.rJavaTranslator.getString("as.character(nrow(" + matchesFrame + "[" + matchesFrame + "$distance == 0,]))");
		if (exactMatchCount != null){
			int val = Integer.parseInt(exactMatchCount);
			retNoun.addAdditionalReturn(new NounMetadata(val, PixelDataType.CONST_INT));
		} else{
			throw new IllegalArgumentException("No matches found.");
		}
		
		this.insight.getVarStore().put(matchesFrame, retNoun);
		return retNoun;
	}

	///////////////////////// KEYS /////////////////////////////////////

	@Override
	protected String getDescriptionForKey(String key) {
		if (key.equals(FRAME_COLUMN)) {
			return "The column from the existing frame to join on";
		} else if(key.equals(OUTPUT_FRAME_NAME)){
			return "Specify the output frame name";
		} else {
			return super.getDescriptionForKey(key);
		}
	}
	
	private String getMatchesName() {
		String matchesFrame = this.keyValue.get(this.keysToGet[4]);
		if(matchesFrame == null || matchesFrame.isEmpty()) {
			matchesFrame = Utility.getRandomString(8) + "adFed";
		}
		return matchesFrame;
	}
	
	public String getName()
	{
		return "FederationBestMatches";
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy