All Downloads are FREE. Search and download functionalities are using the official Maven repository.

prerna.util.usertracking.reactors.ExtractDatabaseMetaReactor Maven / Gradle / Ivy

The newest version!
package prerna.util.usertracking.reactors;

import java.io.IOException;
import java.util.List;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import prerna.auth.utils.SecurityEngineUtils;
import prerna.auth.utils.SecurityQueryUtils;
import prerna.engine.api.IDatabaseEngine;
import prerna.engine.api.IDatabaseEngine.DATABASE_TYPE;
import prerna.engine.impl.owl.WriteOWLEngine;
import prerna.reactor.frame.r.AbstractRFrameReactor;
import prerna.sablecc2.om.GenRowStruct;
import prerna.sablecc2.om.PixelDataType;
import prerna.sablecc2.om.ReactorKeysEnum;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.util.Constants;
import prerna.util.UploadInputUtility;
import prerna.util.Utility;
import prerna.util.usertracking.TrackRequestThread;

/**
 * Generates column descriptions and stores in the tracking database Adds unique
 * count to owl file for each column
 *
 */
public class ExtractDatabaseMetaReactor extends AbstractRFrameReactor {
	
	private static final Logger classLogger = LogManager.getLogger(ExtractDatabaseMetaReactor.class);

	private static final String CLASS_NAME = ExtractDatabaseMetaReactor.class.getName();
	public static final String DESCRIPTIONS_BOOL = "descriptions";

	public ExtractDatabaseMetaReactor() {
		this.keysToGet = new String[] { ReactorKeysEnum.DATABASE.getKey(), DESCRIPTIONS_BOOL };
	}

	@Override
	public NounMetadata execute() {
		init();
		organizeKeys();
		// get inputs - engine
		String engineId = UploadInputUtility.getDatabaseNameOrId(this.store);
		// we may have the alias
		engineId = SecurityQueryUtils.testUserEngineIdForAlias(this.insight.getUser(), engineId);
		if(!SecurityEngineUtils.userCanViewEngine(this.insight.getUser(), engineId)) {
			throw new IllegalArgumentException("Database " + engineId + " does not exist or user does not have access to database");
		}
		
		boolean descriptions = getDescriptionsBool();
		IDatabaseEngine engine = Utility.getDatabase(engineId);

		// validate engine exists
		if (engine == null) {
			throw new IllegalArgumentException("Engine does not exist");
		}

		// only executes for rdbms, tinker, and rdf
		DATABASE_TYPE engineType = engine.getDatabaseType();
		if (engineType == DATABASE_TYPE.RDBMS || engineType == DATABASE_TYPE.SESAME || engineType == DATABASE_TYPE.TINKER) {
			try(WriteOWLEngine owlEngine = engine.getOWLEngineFactory().getWriteOWL()) {
				owlEngine.addUniqueCounts(engine);
			} catch (IOException | InterruptedException e) {
				classLogger.error(Constants.STACKTRACE, e);
			}
		}

		//Turning off due to an issue in docker/openjdk 
		
		/*
		if (UserTrackerFactory.isTracking()) {
			// store descriptions if requested
//			if (descriptions) {
				storeColumnDescriptions(engine);
//			}
		}
		*/

		return new NounMetadata(true, PixelDataType.BOOLEAN);
	}

	private boolean getDescriptionsBool() {
		GenRowStruct boolGrs = this.store.getNoun(DESCRIPTIONS_BOOL);
		if (boolGrs != null) {
			if (boolGrs.size() > 0) {
				List val = boolGrs.getValuesOfType(PixelDataType.BOOLEAN);
				return (boolean) val.get(0);
			}
		}
		return false;
	}

//	private void storeColumnDescriptions(IDatabase engine) {
//		String[] packages = new String[] { "data.table", "WikidataR", "curl", "doParallel", "XML" };
//		Logger logger = this.getLogger(CLASS_NAME);
//		this.rJavaTranslator.checkPackages(packages);
//		int stepCounter = 1;
//		logger.info(stepCounter + ". Loading R scripts to store column descriptions");
//		StringBuilder rsb = new StringBuilder();
//		String wd = "wd" + Utility.getRandomString(5);
//		String baseFolder = DIHelper.getInstance().getProperty("BaseFolder");
//		rsb.append(wd + "<- getwd();");
//		rsb.append("setwd(\"" + baseFolder + "\\R\\Recommendations\");\n");
//		rsb.append("source(\"" + baseFolder + "\\R\\Recommendations\\SemanticSimilarity\\lsi_dataitem.r\");\n");
//		rsb.append("source(\"" + baseFolder + "\\R\\Recommendations\\db_recom.r\");\n");
//		rsb.append("source(\"" + baseFolder + "\\R\\Recommendations\\datasemantic.r\");\n");
//		rsb.append("source(\"" + baseFolder + "\\R\\Recommendations\\topic_modelling.r\");\n");
//		this.rJavaTranslator.runR(rsb.toString().replace("\\", "/"));
//		logger.info(stepCounter + ". Done");
//		stepCounter++;
//
//		// GENERATING DESCRIPTIONS
//		logger.info(stepCounter + ". Getting Database schema to generate descriptions");
//		String rTempTable = "semanticTempTable";
//		List allTableCols = MasterDatabaseUtility.getAllTablesAndColumns(engine.getEngineId());
//		String engineName = engine.getEngineName();
//		String engineID = engine.getEngineId();
//		String seperator = "$";
//		List list = new ArrayList();
//		logger.info(stepCounter + ". Done");
//		stepCounter++;
//
//
//		// iterate through all the rows and sample about 15 rows from each
//		// of
//		// those
//		logger.info(stepCounter + ". Processing columns to find descriptions");
//		for (Object[] tableCol : allTableCols) {
//			SelectQueryStruct qs = new SelectQueryStruct();
//			if (tableCol.length == 4) {
//				String table = tableCol[0] + "";
//				String col = tableCol[1] + "";
//				String dataType = tableCol[2] + "";
//				boolean primFlag = false;
//				String descriptions = "";
//
//				// Only generate column descriptions if the data type is a
//				// string
//				if (dataType.equals(SemossDataType.STRING.toString())) {
//					// we will fill this in once we figure out if it is a
//					// concept or property
//					QueryColumnSelector colSelector = null;
//					// this is a hack we used in advanced federate
//					if (engine.getParentOfProperty(col + "/" + table) == null) {
//						// we couldn't find a parent for this property
//						// this means it is a concept itself
//						// and we should only use table
//						colSelector = new QueryColumnSelector(table);
//						primFlag = true;
//					} else {
//						colSelector = new QueryColumnSelector(table + "__" + col);
//					}
//					qs.addSelector(colSelector);
//
//					// select only non-null values from database
//					SimpleQueryFilter nulls = new SimpleQueryFilter(new NounMetadata(colSelector, PixelDataType.COLUMN),
//							"!=", new NounMetadata("null", PixelDataType.NULL_VALUE));
//					qs.addExplicitFilter(nulls);
//					IRawSelectWrapper iterator = WrapperManager.getInstance().getRawWrapper(engine, qs);
//					if (!iterator.hasNext()) {
//						// all values are null in this column
//						continue;
//					}
//					StringBuilder sb = new StringBuilder();
//					sb.append("rm(result);");
//					// write to csv and read into R
//					String newFileLoc = DIHelper.getInstance().getProperty(Constants.INSIGHT_CACHE_DIR) + "/"
//							+ Utility.getRandomString(6) + ".tsv";
//					String header = engine.getEngineId() + seperator + engine.getEngineName() + seperator + table
//							+ seperator + col;
//					File newFile = MetaSemanticSimilarityReactor.writeResultToFile(newFileLoc, iterator, header);
//					sb.append(RSyntaxHelper.getFReadSyntax(rTempTable, newFile.getAbsolutePath(), "\\t") + "\n");
//
//					// get random subset of column data
//					sb.append("if(nrow(" + rTempTable + ") > 15) {");
//					sb.append(rTempTable + "<-" + rTempTable + "[sample(nrow(" + rTempTable + "),15),c(");
//					sb.append("\"" + engineID + seperator + engineName + seperator + table + seperator + col + "\"");
//					sb.append(")];}\n");
//					//logger.info("Searching description for: " + engine.getEngineName() + ":::" + table + ":::" + col);
//
//					// execute script to get descriptions for this column
//					sb.append(RSyntaxHelper.asDataFrame(rTempTable, rTempTable) + "\n");
//					sb.append("semantic_tracking_mgr(" + rTempTable + ",\"dataitem\");\n");
//					sb.append("result <- readRDS('dataitem-semantic-history.rds');");
//
//					// get only the row that you are adding to the table
//					sb.append("result <- result[(result$ENGINE_ID== \"" + engineID + "\" & result$ENGINE_NAME== \""
//							+ engineName + "\" & result$TABLE== \"" + table + "\" & result$COLUMN== \"" + col
//							+ "\"),]");
//					this.rJavaTranslator.runR(sb.toString());
//					newFile.delete();
//					descriptions = this.rJavaTranslator.getString("as.character(result[1,\"DESCRIPTION\"])");
//					if (descriptions == null) {
//						// no results found
//						continue;
//					}
//
//					// Truncate row lengths to be the appropriate lengths
//					// before
//					// sending to table
//					if (primFlag) {
//						logger.info("Found description for: " + engine.getEngineName() + ":::" + table);
//
//					} else {
//						logger.info("Found description for: " + engine.getEngineName() + ":::" + table + ":::" + col);
//					}
//					if (descriptions.length() > 1000) {
//						descriptions = descriptions.substring(0, 999);
//					}
//
//					// Add a new row into the table with the necessary
//					// details
//					// We check to see if this table already exists in the
//					// endpoint
//					Object[] newRow = { engineID, engineName, table, col, descriptions };
//					list.add(newRow);
//				} else {
//					continue;
//				}
//			}
//		}
//		logger.info(stepCounter + ". Done");
//		stepCounter++;
//		
//		logger.info(stepCounter + ". Storing descriptions");
//		sendTrackRequest("semantic", list);
//		logger.info(stepCounter + ". Done");
//		
//
//		String gc = "rm(\"a5_97b6491748854929b50f55f5818b1634\",	\"a9_8ca904d356784e2d88427675e946b591\","
//				+ "\"apply_tfidf\",                        \"assign_unique_concepts\",            "
//				+ "\"aTempInsightNotStored\",              \"blend_mgr\",                         "
//				+ "\"blend_tracking_semantic\",            \"breakdown\",                         "
//				+ "\"build_data_landmarks\",               \"build_dbid_domain\",                 "
//				+ "\"build_query_doc\",                    \"build_query_tdm\",                   "
//				+ "\"build_sim\",                          \"build_tdm\",                         "
//				+ "\"col2db\",                             \"col2tbl\",                           "
//				+ "\"column_doc_mgr_do\",                  \"column_doc_mgr_dopar\",              "
//				+ "\"column_lsi_mgr\",                     \"compute_column_desc_sim\",           "
//				+ "\"compute_entity_sim\",                 \"con\",                               "
//				+ "\"construct_column_doc\",               \"constructName\",                     "
//				+ "\"cosine_jaccard_sim\",                 \"create_column_doc\",                 "
//				+ "\"data_domain_mgr\",                    \"dataitem_history_do\",               "
//				+ "\"dataitem_history_dopar\",             \"dataitem_recom_mgr\",                "
//				+ "\"datasemantic_history\",               \"discover_column_desc\",              "
//				+ "\"drilldown_communities\",              \"exec_tfidf\",                        "
//				+ "\"find_db\",                            \"get_dataitem_rating\",               "
//				+ "\"get_item_recom\",                     \"get_items_users\",                   "
//				+ "\"get_similar_doc\",                    \"get_user_recom\",                    "
//				+ "\"getSearchURL\",                       \"hop_away_mgr\",                      "
//				+ "\"hop_away_recom_mgr\",                 \"jaccard_sim\",                       "
//				+ "\"locate_data_communities\",            \"locate_data_district\",              "
//				+ "\"locate_user_communities\",            \"lsi_mgr\",                           "
//				+ "\"match_desc\",                         \"populate_ratings\",                  "
//				+ "\"read_datamatrix\",                    \"refresh_base\",                      "
//				+ "\"refresh_data_mgr\",                   \"refresh_semantic_mgr\",              "
//				+ "\"remove_files\",                       \"semantic_tracking_mgr\",             "
//				+ "\"semanticTempTable\")";
//		this.rJavaTranslator.runR(gc);
//	}

	private void sendTrackRequest(String type, List rows) {
		TrackRequestThread t = new TrackRequestThread(type, rows);
		t.start();
	}
	
	public String getName()
	{
		return "ExtractDatabaseMeta";
	}

}