
prerna.reactor.imports.MergeReactor Maven / Gradle / Ivy
The newest version!
package prerna.reactor.imports;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import prerna.algorithm.api.ITableDataFrame;
import prerna.algorithm.api.SemossDataType;
import prerna.ds.OwlTemporalEngineMeta;
import prerna.ds.TinkerFrame;
import prerna.ds.nativeframe.NativeFrame;
import prerna.engine.api.IDatabaseEngine;
import prerna.engine.api.IHeadersDataRow;
import prerna.engine.api.IRDBMSEngine;
import prerna.engine.api.IRawSelectWrapper;
import prerna.om.Insight;
import prerna.om.InsightFile;
import prerna.query.querystruct.AbstractQueryStruct;
import prerna.query.querystruct.AbstractQueryStruct.QUERY_STRUCT_TYPE;
import prerna.query.querystruct.CsvQueryStruct;
import prerna.query.querystruct.ExcelQueryStruct;
import prerna.query.querystruct.HardSelectQueryStruct;
import prerna.query.querystruct.LambdaQueryStruct;
import prerna.query.querystruct.SQLQueryUtils;
import prerna.query.querystruct.SelectQueryStruct;
import prerna.query.querystruct.filters.IQueryFilter;
import prerna.query.querystruct.filters.SimpleQueryFilter;
import prerna.query.querystruct.selectors.IQuerySelector;
import prerna.query.querystruct.selectors.QueryColumnSelector;
import prerna.query.querystruct.transform.QSAliasToPhysicalConverter;
import prerna.reactor.AbstractReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.Join;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.execptions.SemossPixelException;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.sablecc2.om.task.ITask;
import prerna.util.Constants;
import prerna.util.usertracking.UserTrackerFactory;
public class MergeReactor extends AbstractReactor {
private static final Logger classLogger = LogManager.getLogger(MergeReactor.class);
public MergeReactor() {
this.keysToGet = new String[]{ReactorKeysEnum.FRAME.getKey(), ReactorKeysEnum.QUERY_STRUCT.getKey(), ReactorKeysEnum.JOINS.getKey()};
}
@Override
public NounMetadata execute() {
ITableDataFrame frame = getFrame();
SelectQueryStruct qs = getQueryStruct();
if(qs != null) {
AbstractQueryStruct.QUERY_STRUCT_TYPE type = qs.getQsType();
if( (type == QUERY_STRUCT_TYPE.FRAME || type == QUERY_STRUCT_TYPE.RAW_FRAME_QUERY) && qs.getFrame() == null) {
qs.setFrame(frame);
}
}
// set the logger into the frame
Logger logger = getLogger(frame.getClass().getName());
frame.setLogger(logger);
// first convert the join to use the physical frame name in the selector
List joins = getJoins();
joins = convertJoins(joins, frame.getMetaData());
// we could either be merging from a QS that we want to convert into a task
// or it is a task already and we want to merge
// in either case, we will not return anything but just update the frame
// btw this can also be valid for HardQueryStruct any frame possibly..
ITableDataFrame mergeFrame = null;
ITableDataFrame curFrame = this.insight.getCurFrame();
if(frame instanceof NativeFrame) {
try {
mergeFrame = mergeNative(curFrame, frame, qs, joins);
} catch (Exception e) {
classLogger.error(Constants.STACKTRACE, e);
throw new SemossPixelException(e.getMessage(), e);
}
}
// did the merge go through on native ? if not
if(mergeFrame == null)
{
if(qs != null) {
try {
mergeFrame = mergeFromQs(frame, qs, joins);
} catch (Exception e) {
classLogger.error(Constants.STACKTRACE, e);
throw new SemossPixelException(e.getMessage(), e);
}
} else {
ITask task = getTask();
if(task != null) {
try {
mergeFrame = mergeFromTask(frame, task, joins);
} catch (Exception e) {
classLogger.error(Constants.STACKTRACE, e);
throw new SemossPixelException(e.getMessage(), e);
} finally {
try {
task.close();
} catch (IOException e) {
classLogger.error(Constants.STACKTRACE, e);
}
}
} else {
throw new IllegalArgumentException("Could not find any data input to merge into the frame");
}
}
}
// clear cached info after merge
mergeFrame.clearCachedMetrics();
mergeFrame.clearQueryCache();
NounMetadata noun = new NounMetadata(mergeFrame, PixelDataType.FRAME, PixelOperationType.FRAME_DATA_CHANGE, PixelOperationType.FRAME_HEADERS_CHANGE);
// in case we generated a new frame
// update existing references
if(mergeFrame != frame) {
if(frame.getName() != null) {
this.insight.getVarStore().put(frame.getName(), noun);
}
if(frame == this.insight.getVarStore().get(Insight.CUR_FRAME_KEY).getValue()) {
this.insight.setDataMaker(mergeFrame);
}
}
return noun;
}
private ITableDataFrame mergeNative(ITableDataFrame curFrame, ITableDataFrame frame, SelectQueryStruct qs, List joins) throws Exception {
// track GA data
UserTrackerFactory.getInstance().trackDataImport(this.insight, qs);
/*
/// OLD METHOD
IImporter importer = ImportFactory.getImporter(frame, qs);
// we reassign the frame because it might have changed
// this only happens for native frame
frame = importer.mergeData(joins);
return frame;
*/
// get the database information of the current frame
// get the databsase information of the second frame
// need a way to find if they both are referring to the same physical database.. but for now, we just match the app let us say
// need to get the qs from the native frame
// get the engine from the qs and engine id
// get the schema of the engine - this is important because you could have 2 apps in which case we need to make sure they are going off the same conn url / schema
// may be we just check the connection URL
ITableDataFrame mergeFrame = null;
if(curFrame instanceof NativeFrame && frame instanceof NativeFrame) {
// get the querystruct
SelectQueryStruct curQS = ((NativeFrame)curFrame).getQueryStruct();
curQS = QSAliasToPhysicalConverter.getPhysicalQs(curQS, curFrame.getMetaData());
qs = ((NativeFrame)qs.getFrame()).getQueryStruct();
qs = QSAliasToPhysicalConverter.getPhysicalQs(qs, qs.getFrame().getMetaData());
IDatabaseEngine curEngine = curQS.getEngine();
IDatabaseEngine thisEngine = qs.getEngine();
if(thisEngine == null)
thisEngine = qs.retrieveQueryStructEngine();
if(curEngine == null)
curEngine = curQS.retrieveQueryStructEngine();
// check to see they are RDBMS
if(curEngine instanceof IRDBMSEngine && thisEngine instanceof IRDBMSEngine) {
if(curEngine.getEngineId().equals(thisEngine.getEngineId())) {
// ok great these are same database
// create the SQL Queries
// need to check if these are query structs also
mergeFrame = (NativeFrame) SQLQueryUtils.joinQueryStructs(curQS, qs, joins);
mergeFrame.setOriginalName(curFrame.getOriginalName());
mergeFrame.setName(curFrame.getName());
} else {
throw new SemossPixelException("Joining tables across databases is not possible, please consider converting to a materialized frame");
}
} else {
throw new SemossPixelException("Joining to a native frame from a materialized frame not possible, please consider swapping the join order");
}
}
return mergeFrame;
}
/**
* Merge via a QS that we will execute into an iterator
* @param frame
* @param qs
* @param joins
* @return
* @throws Exception
*/
private ITableDataFrame mergeFromQs(ITableDataFrame frame, SelectQueryStruct qs, List joins) throws Exception {
// track GA data
UserTrackerFactory.getInstance().trackDataImport(this.insight, qs);
// if we have an inner join, add the current values as a filter on the query
// important for performance on large dbs when the user has already
// filtered to small subset
boolean noDataError = false;
try {
if(!(qs instanceof HardSelectQueryStruct)) {
for(Join j : joins) {
// the join format is
// LHS = COLUMN NAME OF THE FRAME I AM MERGING INTO
// RHS = COLUMN NAME OF THE NEW DATA WE ARE JOINING TO
// LHS IS WHAT IS MAINTAINED AFTER THE JOIN
// RHS IS THE NAME IN THE QUERY
String leftColumnJoin = j.getLColumn();
String rColumnJoin = j.getRColumn();
String type = j.getJoinType();
String comparator = j.getComparator();
if(IQueryFilter.comparatorIsEquals(comparator) && (type.equals("inner.join") || type.equals("left.outer.join"))) {
// we need to make sure we apply the filter correctly!
// remember, RHS is the alias we provide the selector
// but might not match the physical
if(!qs.hasColumn(rColumnJoin)) {
IQuerySelector selector = null;
if(rColumnJoin.contains("__")) {
selector = qs.findSelectorFromAlias(rColumnJoin.split("__")[1]);
} else {
selector = qs.findSelectorFromAlias(rColumnJoin);
}
// get the correct q
if(selector == null) {
throw new IllegalArgumentException("There is an error with the join. Please make sure the columns are matched appropriately based on the frame you want to maintain");
}
rColumnJoin = selector.getQueryStructName();
}
// we will add a filter frame existing values in frame
// but wait... need to make sure an existing filter isn't there
if(qs.hasFiltered(rColumnJoin)) {
continue;
}
// if current frame is empty
// well, you will end up with no data
// unless you are on a graph, which will just append nodes
// as there is no real concept of joins currently
if(frame.isEmpty()) {
noDataError = true;
throw new IllegalArgumentException("Attempting to join new data with an empty frame. End result is still an empty frame.");
}
SelectQueryStruct filterQs = new SelectQueryStruct();
QueryColumnSelector column = new QueryColumnSelector(leftColumnJoin);
filterQs.addSelector(column);
try {
Iterator it = frame.query(filterQs);
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy