
prerna.reactor.imports.TinkerImporter Maven / Gradle / Ivy
The newest version!
package prerna.reactor.imports;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.Vector;
import java.util.stream.Collectors;
import org.apache.commons.io.FilenameUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import prerna.algorithm.api.ITableDataFrame;
import prerna.ds.OwlTemporalEngineMeta;
import prerna.ds.TinkerFrame;
import prerna.engine.api.IDatabaseEngine;
import prerna.engine.api.IDatabaseEngine.DATABASE_TYPE;
import prerna.engine.api.IHeadersDataRow;
import prerna.query.querystruct.AbstractFileQueryStruct;
import prerna.query.querystruct.SelectQueryStruct;
import prerna.query.querystruct.selectors.IQuerySelector;
import prerna.query.querystruct.selectors.QueryColumnSelector;
import prerna.sablecc2.om.Join;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.PixelOperationType;
import prerna.sablecc2.om.execptions.SemossPixelException;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.Constants;
import prerna.util.Utility;
public class TinkerImporter extends AbstractImporter {
private static final Logger classLogger = LogManager.getLogger(TinkerImporter.class);
private TinkerFrame dataframe;
private SelectQueryStruct qs;
private Iterator it;
public TinkerImporter(TinkerFrame dataframe, SelectQueryStruct qs) {
this.dataframe = dataframe;
this.qs = qs;
// generate the iterator
try {
this.it = ImportUtility.generateIterator(this.qs, this.dataframe);
} catch (Exception e) {
classLogger.error(Constants.STACKTRACE, e);
throw new SemossPixelException(
new NounMetadata("Error occurred executing query before loading into frame",
PixelDataType.CONST_STRING, PixelOperationType.ERROR));
}
}
public TinkerImporter(TinkerFrame dataframe, SelectQueryStruct qs, Iterator it) {
this.dataframe = dataframe;
this.qs = qs;
// generate the iterator
this.it = it;
if(this.it == null) {
try {
this.it = ImportUtility.generateIterator(this.qs, this.dataframe);
} catch (Exception e) {
classLogger.error(Constants.STACKTRACE, e);
throw new SemossPixelException(
new NounMetadata("Error occurred executing query before loading into frame",
PixelDataType.CONST_STRING, PixelOperationType.ERROR));
}
}
}
@Override
public void insertData() {
if(this.qs instanceof AbstractFileQueryStruct) {
Map> edgeHash = genFileEdgeHash( (AbstractFileQueryStruct) qs);
// create the metadata
ImportUtility.parseFileQueryStructAsGraph(this.dataframe, this.qs, edgeHash);
// add the data
processFileImport(edgeHash, ((AbstractFileQueryStruct) qs).getNewHeaderNames(), edgeHash.keySet().iterator().next() );
} else {
boolean processFlat = processAsFlat(this.qs);
Map> edgeHash = getEdgeMap(this.qs, processFlat);
if(processFlat) {
ImportUtility.parseFlatEdgeHashAsGraph(this.dataframe, this.qs, edgeHash);
} else {
ImportUtility.parseQueryStructAsGraph(this.dataframe, this.qs, edgeHash);
}
// add the data
processImport(edgeHash, null);
}
}
@Override
public void insertData(OwlTemporalEngineMeta metaData) {
this.dataframe.setMetaData(metaData);
// get the edge hash so we know how to add data connections
Map> edgeHash = getEdgeMap(this.qs);
// add the data
processImport(edgeHash, null);
}
/**
* Flush out the iterator into the tinker frame using the specified edge relationships
* @param edgeHash
*/
private void processImport(Map> edgeHash, Map headerAlias) {
Map> cardinality = null;
String[] headers = null;
while(this.it.hasNext()) {
IHeadersDataRow row = it.next();
if(cardinality == null) {
headers = row.getHeaders();
// update the headers with the join info
// so we create the vertices correctly
if(headerAlias != null && !headerAlias.isEmpty()) {
for(int i = 0; i < headers.length; i++) {
if(headerAlias.containsKey(headers[i])) {
headers[i] = headerAlias.get(headers[i]);
continue;
}
}
}
// get the cardinality with the new headers since the edge hash is also modified
cardinality = Utility.getCardinalityOfValues(headers, edgeHash);
}
dataframe.addRelationship(headers, row.getValues(), cardinality);
}
}
/**
* Flush out the iterator into the tinker frame using the specified edge relationships
* @param edgeHash
*/
private void processFileImport(Map> edgeHash, Map headerAlias, String autoRowIdx) {
Map> cardinality = null;
String[] headers = null;
int counter = 1;
while(this.it.hasNext()) {
IHeadersDataRow row = it.next();
if(cardinality == null) {
headers = row.getHeaders();
String[] newHeaders = new String[headers.length+1];
newHeaders[0] = autoRowIdx;
System.arraycopy(headers,0,newHeaders,1,headers.length);
headers = newHeaders;
// update the headers with the join info
// so we create the vertices correctly
if(headerAlias != null && !headerAlias.isEmpty()) {
for(int i = 0; i < headers.length; i++) {
if(headerAlias.containsKey(headers[i])) {
headers[i] = headerAlias.get(headers[i]);
continue;
}
}
}
// get the cardinality with the new headers since the edge hash is also modified
cardinality = Utility.getCardinalityOfValues(headers, edgeHash);
}
Object[] values = row.getValues();
Object[] newValues = new Object[values.length+1];
newValues[0] = counter++;
System.arraycopy(values,0,newValues,1,values.length);
dataframe.addRelationship(headers, newValues, cardinality);
}
}
@Override
public ITableDataFrame mergeData(List joins) {
List existingRels = this.dataframe.getMetaData().getAllRelationships();
// get the edge hash so we know how to add data connections
// this edge hash will be used as part of the cardinality
Map joinMods = qsJoinMod(joins);
if(!joinMods.isEmpty()) {
modifyQsSelectorAlias(joinMods);
}
// get the edge hash so we know how to add data connections
Map> edgeHash = getEdgeMap(this.qs);
processEdgeHash(edgeHash, joins);
// determine if there are loops
List loopRels = getLoopRels(edgeHash, existingRels);
if(loopRels.isEmpty()) {
return processMerge(edgeHash, joinMods);
} else {
return processLoop(loopRels, joinMods, joins);
}
}
/**
* When we try to join via properties and never add a concept
* The edge hash will be incomplete
* @param edgeHash
* @param joins
*/
private void processEdgeHash(Map> edgeHash, List joins) {
Set availableKeys = new HashSet();
for(String k : edgeHash.keySet()) {
if(!edgeHash.get(k).isEmpty()) {
// we have a valid edge hash
// just return
return;
}
availableKeys.add(k);
}
// if we got to this point
// the edge hash needs to be readjsuted
for(Join j : joins) {
String frameValue = j.getLColumn();
String newValue = j.getRColumn();
if(newValue.contains("__")) {
newValue = newValue.split("__")[1];
}
// if both values are returned
// set them up as a relationship
// from frame to new
if(availableKeys.contains(frameValue) && availableKeys.contains(newValue)) {
edgeHash.get(frameValue).addAll(availableKeys.stream().filter(p -> !p.equals(frameValue)).collect(Collectors.toList()));
}
}
}
/**
* This is the default method to merge data into the frame
* @param edgeHash
* @param joinMods
* @return
*/
private ITableDataFrame processMerge(Map> edgeHash, Map joinMods) {
// create the metadata
// note this has the updated qs based on the join mods already
ImportUtility.parseQueryStructAsGraph(this.dataframe, this.qs, edgeHash);
processImport(edgeHash, joinMods);
return this.dataframe;
}
private ITableDataFrame processLoop(List loopRels, Map joinMods, List joins) {
Map> originalEdgeHash = getEdgeMap(this.qs);
// so we have a -> b -> a
// but i need the last a to be a_1
// so my loop node is going the be the second index the the loopRels
// all i need to do is go through, and assign the alias everywhere, and then i'm golden
Set joinCols = new HashSet();
for(Join j : joins) {
joinCols.add(j.getRColumn());
}
// update qs selectors with new alias
Map oldAliasToNew = new HashMap();
for(IQuerySelector selector : this.qs.getSelectors()) {
String curAlias = selector.getAlias();
// we do not want to do this for the join column!
if(joinCols.contains(curAlias)) {
continue;
}
for(String[] loop : loopRels) {
if(loop[0].equals(curAlias)) {
String newAlias = curAlias + "_2";
selector.setAlias(newAlias);
oldAliasToNew.put(curAlias, newAlias);
} else if(loop[1].equals(curAlias)) {
String newAlias = curAlias + "_2";
selector.setAlias(newAlias);
oldAliasToNew.put(curAlias, newAlias);
}
}
}
Map> updatedEdgeHash = getEdgeMap(this.qs);
// we need to define something to say
// that we are actually adding these with a different type
// remember: on tinker, we want to reuse the same node
ImportUtility.parseQueryStructAsGraph(this.dataframe, this.qs, updatedEdgeHash);
OwlTemporalEngineMeta meta = this.dataframe.getMetaData();
for(String oldAlias : oldAliasToNew.keySet()) {
meta.setPhysicalNameToVertex(oldAliasToNew.get(oldAlias), oldAlias);
}
// note, we use the original edge hash since the headers from the iterator
// do not know that we have modified the meta
processImport(originalEdgeHash, joinMods, oldAliasToNew);
return this.dataframe;
}
/**
* Flush out the iterator into the tinker frame using the specified edge relationships
* @param edgeHash
*/
private void processImport(Map> edgeHash, Map headerAlias, Map oldAliasToNew) {
Map> cardinality = null;
String[] headers = null;
while(this.it.hasNext()) {
IHeadersDataRow row = it.next();
if(cardinality == null) {
headers = row.getHeaders();
// update the headers with the join info
// so we create the vertices correctly
if(headerAlias != null && !headerAlias.isEmpty()) {
for(int i = 0; i < headers.length; i++) {
if(headerAlias.containsKey(headers[i])) {
headers[i] = headerAlias.get(headers[i]);
continue;
}
}
}
// get the cardinality with the new headers since the edge hash is also modified
cardinality = Utility.getCardinalityOfValues(headers, edgeHash);
}
dataframe.addRelationship(headers, row.getValues(), cardinality, oldAliasToNew);
}
}
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////
/*
* Utility methods
*/
private List getLoopRels(Map> edgeHash, List existingRels) {
// we are only searching for a simple loop
// i.e. a -> b -> a
List loopRels = new Vector();
for(String[] relArray : existingRels) {
// so we just need to do a comparison
// if we already have a -> b
// is there b -> a in the edge hash
String upNode = relArray[0];
String downNode = relArray[1];
// if the edge hash doesn't have downNode as a key
// just continue
if(edgeHash.containsKey(downNode)) {
// we found it, lets go and see if it goes back to the up node
if(edgeHash.get(downNode).contains(upNode)) {
// we have a loop!
loopRels.add(new String[]{downNode, upNode});
}
}
}
return loopRels;
}
private void modifyQsSelectorAlias(Map joinMods) {
for(String valToFind : joinMods.keySet()) {
String newValue = joinMods.get(valToFind);
// loop through the selectors
// and see if one of them has the alias we are looking for
for(IQuerySelector selector : this.qs.getSelectors()) {
if(selector.getAlias().equals(valToFind)) {
// alright, set the alias to be the same as the join one
// so we can easily update the metadata
selector.setAlias(newValue);
break;
}
}
}
}
private Map qsJoinMod(List joins) {
Map joinMap = new HashMap();
for(Join j : joins) {
// s is the frame name
String s = j.getLColumn();
// q is the query name
String q = j.getRColumn();
// if they are not equal, we need to replace q with s
if(!s.equals(q)) {
joinMap.put(q, s);
}
}
return joinMap;
}
private Map> genFileEdgeHash(AbstractFileQueryStruct qs) {
String autoRowIdx = qs.getFilePath();
autoRowIdx = FilenameUtils.getBaseName(autoRowIdx);
// remove the ugly stuff we add to make this unique
if(autoRowIdx.contains("_____UNIQUE")) {
autoRowIdx = autoRowIdx.substring(0, autoRowIdx.indexOf("_____UNIQUE"));
}
autoRowIdx = autoRowIdx + "_ROW_ID";
Set cols = new TreeSet();
List selectors = qs.getSelectors();
for(int i = 0; i < selectors.size(); i++) {
QueryColumnSelector c = (QueryColumnSelector) selectors.get(i);
cols.add(c.getColumn());
}
Map> edgeMap = new HashMap>();
edgeMap.put(autoRowIdx, cols);
return edgeMap;
}
private Map> getEdgeMap(SelectQueryStruct qs) {
return getEdgeMap(qs, processAsFlat(qs));
}
private Map> getEdgeMap(SelectQueryStruct qs, boolean processAsFlat) {
Map> edgeHash = null;
if(processAsFlat) {
// if relational engine / other table structure
edgeHash = ImportUtility.getFlatEngineEdgeHash(this.qs);
} else {
edgeHash = ImportUtility.getGraphEdgeHash(qs);
}
return edgeHash;
}
/**
* Need to use different logic if merging data from flat engine source
* @return
*/
private boolean processAsFlat(SelectQueryStruct qs) {
IDatabaseEngine engine = qs.getEngine();
DATABASE_TYPE dbType = engine == null ? null : engine.getDatabaseType();
if(dbType == null || dbType == DATABASE_TYPE.TINKER || dbType == DATABASE_TYPE.SESAME) {
return false;
}
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy