
prerna.reactor.federation.FederationBlend Maven / Gradle / Ivy
The newest version!
package prerna.reactor.federation;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import prerna.algorithm.api.SemossDataType;
import prerna.ds.OwlTemporalEngineMeta;
import prerna.ds.r.RDataTable;
import prerna.ds.r.RSyntaxHelper;
import prerna.engine.api.IDatabaseEngine;
import prerna.engine.api.IRawSelectWrapper;
import prerna.masterdatabase.utility.MasterDatabaseUtility;
import prerna.query.querystruct.SelectQueryStruct;
import prerna.query.querystruct.selectors.QueryColumnSelector;
import prerna.rdf.engine.wrappers.WrapperManager;
import prerna.reactor.frame.r.AbstractRFrameReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.AddHeaderNounMetadata;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.Constants;
import prerna.util.DIHelper;
import prerna.util.Utility;
/*
* Please use {@link #FuzzyMergeReactor.class}
*
*/
@Deprecated
public class FederationBlend extends AbstractRFrameReactor {
private static final Logger logger = LogManager.getLogger(FederationBlend.class);
public static final String JOIN_TYPE = "joinType";
public static final String FRAME_COLUMN = "frameCol";
public static final String ADDITIONAL_COLS = "additionalCols";
public static final String MATCHES = "matches";
public static final String NONMATCHES = "nonMatches";
public static final String PROP_MAX = "propagation";
public static final String FED_FRAME = "fedFrame";
public FederationBlend() {
this.keysToGet = new String[] { JOIN_TYPE, FRAME_COLUMN, ReactorKeysEnum.DATABASE.getKey(), ReactorKeysEnum.CONCEPT.getKey(), ReactorKeysEnum.COLUMN.getKey(), ADDITIONAL_COLS, MATCHES, NONMATCHES, FED_FRAME, PROP_MAX };
}
@Override
public NounMetadata execute() {
init();
organizeKeys();
// get all inputs
String joinType = this.keyValue.get(this.keysToGet[0]);
String frameCol = this.keyValue.get(this.keysToGet[1]);
String newDb = this.keyValue.get(this.keysToGet[2]);
String newTable = this.keyValue.get(this.keysToGet[3]);
String newCol = this.keyValue.get(this.keysToGet[4]);
List columns = getColumns();
List allMatches = getInputList(MATCHES);
List nonMatches = getInputList(NONMATCHES);
String matchesFrame = this.keyValue.get(this.keysToGet[8]);
String propValue = this.keyValue.get(this.keysToGet[9]);
final String linkFrame = matchesFrame + "link";
String rand = Utility.getRandomString(8);
final String trg = "trg_" + rand;
String updatedNewCol = newCol;
// SUMMARY: The blend R script uses a link table of all matches selected:
//
// Build the base link table using the matchesFrame from best matches reactor using a max distance filter (prop value)
//
// Add any specific matches the user selected in the UI
//
// Remove any specific non-matches the user selected in UI
//
// Build the target table - join column and additional columns to bring in (account for duplicate headers)
//
// Convert data types of frame join col and target join col to chr if they are numbers because the link frame will only match chr values
//
// Execute blend script
//
// Convert data types back to num if they were converted
// check if packages are installed
String[] packages = { "stringdist", "data.table" };
this.rJavaTranslator.checkPackages(packages);
// add propagation values to link frame if its not null
if (propValue != null && !(propValue.isEmpty())) {
Float intVal = (100 - Float.parseFloat(propValue)) / 100;
String formattedString = String.format("%.08f", intVal);
propValue = formattedString;
} else {
propValue = "0";
}
// create link table by filtering for propagation value or less from fed frame
this.rJavaTranslator.runR("library(data.table); " + linkFrame + " <- " + matchesFrame + "[" + matchesFrame + "$distance <= " + propValue + ",]");
// add combined lookup column
this.rJavaTranslator.runR(matchesFrame + "$combined <- paste(" + matchesFrame + "$col1, " + matchesFrame + "$col2, sep=\"==\");");
// add all matches
if (allMatches != null && !(allMatches.isEmpty())) {
StringBuilder col1Builder = new StringBuilder();
StringBuilder col2Builder = new StringBuilder();
StringBuilder col3Builder = new StringBuilder();
for (int i = 0; i < allMatches.size(); i++) {
if (i != 0) {
col1Builder.append(",");
col2Builder.append(",");
col3Builder.append(",");
}
String match = allMatches.get(i);
String col1 = this.rJavaTranslator.getString("as.character(" + matchesFrame + "[" + matchesFrame
+ "$combined %in% c(\"" + match + "\"), ]$col1)");
String col2 = this.rJavaTranslator.getString("as.character(" + matchesFrame + "[" + matchesFrame
+ "$combined %in% c(\"" + match + "\"), ]$col2)");
String dist = this.rJavaTranslator.getString("as.character(" + matchesFrame + "[" + matchesFrame
+ "$combined %in% c(\"" + match + "\"), ]$distance)");
col1Builder.append("\"" + col1 + "\"");
col2Builder.append("\"" + col2 + "\"");
col3Builder.append(dist);
}
// add all matches provided
String script = rand + " <- data.table(\"col1\"=c(" + col1Builder + "), \"col2\"=c(" + col2Builder
+ "), \"distance\"=c(" + col3Builder + ")); " + linkFrame + " <- rbind(" + linkFrame + "," + rand
+ "); rm(" + rand + ");";
this.rJavaTranslator.runR(script);
}
// make linkframe unique
this.rJavaTranslator.runR(linkFrame + " <- unique(" + linkFrame + ");");
this.rJavaTranslator.runR(linkFrame + "$combined <- paste(" + linkFrame + "$col1, " + linkFrame + "$col2, sep=\"==\");");
// remove all non-matches
if (nonMatches != null && !(nonMatches.isEmpty())) {
StringBuilder nonMatchCombo = new StringBuilder();
for (int i = 0; i < nonMatches.size(); i++) {
if (i != 0) {
nonMatchCombo.append(",");
}
String match = nonMatches.get(i);
nonMatchCombo.append("\"" + match + "\"");
}
// remove all non matches from LinkFrame
this.rJavaTranslator.runR(linkFrame + " <- " + linkFrame + "[!" + linkFrame + "$combined %in% c(" + nonMatchCombo + ")]");
// run a check if linkframe is empty, no data will result
String link = this.rJavaTranslator.getString("all.equal(as.character(" + linkFrame + "),character(0))");
if (link == null) {
throw new IllegalArgumentException("No matching values left to blend.");
}
}
// drop combined column
this.rJavaTranslator.runR(linkFrame + " <- " + linkFrame + "[,-c(\"combined\")]");
// get R Frame
RDataTable frame = (RDataTable) getFrame();
String frameName = frame.getName();
// get all columns to federate on
List columnArray = new ArrayList<>();
columnArray.add(newCol);
List inputCols = new ArrayList<>();
if (columns != null && !(columns.isEmpty())) {
inputCols.addAll(columns);
columnArray.addAll(inputCols);
}
boolean convertJoinColFromNum = false;
// update frame meta for new cols being added
// build qs to pull the target data
IDatabaseEngine newColEngine = Utility.getDatabase(newDb);
Map typesMap = new HashMap<>();
SelectQueryStruct qs = new SelectQueryStruct();
qs.setEngine(newColEngine);
for (int i = 0; i < columnArray.size(); i++) {
String additionalColumnInput = columnArray.get(i);
// we will fill these once we figure out if it is a concept or property
QueryColumnSelector selector = null;
String conceptDataType = null;
// this is a hack
// since i dont know if it is a concept or a property
// if i get a valid data type, new col is a property for new table
// if i dont, then newtable is a concept with a prim key that i need to use
if(newColEngine.getPhysicalUriFromPixelSelector(newTable + "__" + newCol) == null) {
// we couldn't find a parent for this property
// this means it is a concept itself
// and we should only use table
selector = new QueryColumnSelector(newTable);
conceptDataType = MasterDatabaseUtility.getBasicDataType(newDb, selector.getTable(), null);
} else {
selector = new QueryColumnSelector(newTable + "__" + additionalColumnInput);
conceptDataType = MasterDatabaseUtility.getBasicDataType(newDb, selector.getColumn(), selector.getTable());
}
// add the selector to the qs
qs.addSelector(selector);
String name = getCleanNewColName(frame, selector.getAlias());
// get semoss type, update meta data and keep track
SemossDataType semossType = SemossDataType.convertStringToDataType(conceptDataType);
typesMap.put(name, semossType);
// update target join column name if it was cleaned
if(additionalColumnInput.equals(newCol)){
updatedNewCol = name;
}
// do we need to convert the join col to a string?
if(selector.getAlias().equals(newCol) && (semossType == SemossDataType.DOUBLE || semossType == SemossDataType.INT)) {
convertJoinColFromNum = true;
}
// update meta data in frame
OwlTemporalEngineMeta metaData = this.getFrame().getMetaData();
metaData.addProperty(frameName, frameName + "__" + name);
metaData.setAliasToProperty(frameName + "__" + name, name);
metaData.setDataTypeToProperty(frameName + "__" + name, semossType.toString());
metaData.setQueryStructNameToProperty(frameName + "__" + name, newDb, selector.getQueryStructName());
selector.setAlias(name);
}
// write iterator data to csv, then read csv into R table as trg
File newFile = null;
IRawSelectWrapper it = null;
try {
it = WrapperManager.getInstance().getRawWrapper(newColEngine, qs);
String newFileLoc = DIHelper.getInstance().getProperty(Constants.INSIGHT_CACHE_DIR) + "/" + Utility.getRandomString(6) + ".tsv";
newFile = Utility.writeResultToFile(newFileLoc, it, typesMap, "\t");
} catch (Exception e) {
logger.error(Constants.STACKTRACE, e);
} finally {
if(it != null) {
try {
it.close();
} catch (IOException e) {
logger.error(Constants.STACKTRACE, e);
}
}
}
if (newFile != null) {
String loadFileRScript = RSyntaxHelper.getFReadSyntax(trg, newFile.getAbsolutePath(), "\\t");
//trg + " <- fread(\"" + newFile.getAbsolutePath().replace("\\", "/") + "\", sep=\"\t\");";
this.rJavaTranslator.runR(loadFileRScript);
newFile.delete();
}
// get frame join column data type
OwlTemporalEngineMeta frameMeta = frame.getMetaData();
String unique = frameMeta.getUniqueNameFromAlias(frameCol);
String uniqueMetaName = frameMeta.getPhysicalName(unique);
String frameColType = frameMeta.getHeaderTypeAsString(uniqueMetaName);
boolean linkColWasNum = false;
// if either join columns are numbers, update the frames so it joins properly
if (SemossDataType.convertStringToDataType(frameColType).equals(SemossDataType.DOUBLE) || SemossDataType.convertStringToDataType(frameColType).equals(SemossDataType.INT)) {
// convert frame col to chr
this.rJavaTranslator.runR(frameName + "$" + frameCol + " <- as.character(" + frameName + "$" + frameCol + ");");
// make note for later to convert back to num
linkColWasNum = true;
}
if (convertJoinColFromNum) {
// convert trg table col to chr
this.rJavaTranslator.runR(trg + "$" + updatedNewCol + " <- as.character(" + trg + "$" + updatedNewCol + ");");
}
// execute blend
String blendedFrameScript = frameName + " <- blend(" + frameName + ", \"" + frameCol + "\"," + trg + ",\"" + updatedNewCol + "\", " + linkFrame + " , \"" + joinType + "\")";
this.rJavaTranslator.runR(blendedFrameScript);
// if columns were num before convert them back so
// the data types are the same as they started with
if (linkColWasNum) {
this.rJavaTranslator.runR(frameName + "$" + frameCol + " <- as.numeric(as.character(" + frameName + "$" + frameCol + "));");
}
if (convertJoinColFromNum) {
this.rJavaTranslator.runR(frameName + "$" + updatedNewCol + " <- as.numeric(as.character(" + frameName + "$" + updatedNewCol + "));");
}
// remove columns from frame that are temporary
String removeExtras = frameName + " <- " + frameName + "[,-c(\"i.and.d\",\"col1\",\"col2\",\"distance.y\",\"distance.x\")]";
this.rJavaTranslator.runR(removeExtras);
// reset the frame headers
this.getFrame().syncHeaders();
// delete advanced Fed frame in R, done with it
this.rJavaTranslator.runR("rm(" + trg + "," + matchesFrame + ", BaseLinkFrame, " + linkFrame + ", best_match_zero, best_match, blend, best_match_lessthan, best_match_nonzero)");
NounMetadata retNoun = new NounMetadata(frame, PixelDataType.FRAME, PixelOperationType.FRAME_HEADERS_CHANGE, PixelOperationType.FRAME_DATA_CHANGE);
retNoun.addAdditionalReturn(new AddHeaderNounMetadata((inputCols)));
return retNoun;
}
private List getColumns() {
// see if defined as individual key
GenRowStruct columnGrs = this.store.getNoun(ADDITIONAL_COLS);
if (columnGrs != null && columnGrs.size() > 0) {
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy