
prerna.query.interpreters.PandasInterpreter Maven / Gradle / Ivy
The newest version!
package prerna.query.interpreters;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import prerna.algorithm.api.SemossDataType;
import prerna.ds.py.PandasFrame;
import prerna.ds.py.PandasSyntaxHelper;
import prerna.ds.py.PyTranslator;
import prerna.engine.api.IRawSelectWrapper;
import prerna.query.querystruct.SelectQueryStruct;
import prerna.query.querystruct.filters.AndQueryFilter;
import prerna.query.querystruct.filters.BetweenQueryFilter;
import prerna.query.querystruct.filters.IQueryFilter;
import prerna.query.querystruct.filters.OrQueryFilter;
import prerna.query.querystruct.filters.SimpleQueryFilter;
import prerna.query.querystruct.filters.SimpleQueryFilter.FILTER_TYPE;
import prerna.query.querystruct.selectors.IQuerySelector;
import prerna.query.querystruct.selectors.IQuerySelector.SELECTOR_TYPE;
import prerna.query.querystruct.selectors.IQuerySort;
import prerna.query.querystruct.selectors.QueryArithmeticSelector;
import prerna.query.querystruct.selectors.QueryColumnOrderBySelector;
import prerna.query.querystruct.selectors.QueryColumnOrderBySelector.ORDER_BY_DIRECTION;
import prerna.query.querystruct.selectors.QueryColumnSelector;
import prerna.query.querystruct.selectors.QueryConstantSelector;
import prerna.query.querystruct.selectors.QueryFunctionHelper;
import prerna.query.querystruct.selectors.QueryFunctionSelector;
import prerna.query.querystruct.selectors.QueryIfSelector;
import prerna.reactor.IReactor;
import prerna.reactor.qs.SubQueryExpression;
import prerna.sablecc2.om.nounmeta.NounMetadata;
import prerna.sablecc2.om.task.ITask;
import prerna.util.Constants;
import prerna.util.Utility;
public class PandasInterpreter extends AbstractQueryInterpreter {
private static final Logger classLogger = LogManager.getLogger(PandasInterpreter.class);
private String frameName = null;
private String wrapperFrameName = null;
private String swifter = "";
private String exp = ""; // says if this feature is experimental
private Map colDataTypes;
private StringBuilder selectorCriteria;
private StringBuilder filterCriteria;
private StringBuilder havingCriteria;
private StringBuilder renameCriteria = new StringBuilder(""); // look into renCriteria below. I don't think it's necessary.
private StringBuilder groupCriteria = new StringBuilder("");
private StringBuilder dateCriteria = new StringBuilder("");
private StringBuilder arithmeticCriteria = new StringBuilder();
private StringBuilder caseWhenCriteria = new StringBuilder();
private StringBuilder aggCriteria = new StringBuilder("");
private StringBuilder aggCriteria2 = new StringBuilder("");
private StringBuilder renCriteria = new StringBuilder(".rename(columns={'mean':'Average', 'nunique':'UniqueCount', 'sum':'Sum', 'median':'Median', 'max':'Max', 'min':'Min', 'count':'Count'})");
private StringBuilder orderBy = new StringBuilder("");
private StringBuilder orderBy2 = new StringBuilder("");
private StringBuilder ascending = new StringBuilder("");
private StringBuilder ascending2 = new StringBuilder("");
private StringBuilder overrideQuery = null;
private StringBuilder normalizer = new StringBuilder(".to_dict('split')['data']");
private boolean isHavingFilter;
private List havingList = new ArrayList();
private static final List DATE_FUNCTION_LIST = new ArrayList(5);
static {
DATE_FUNCTION_LIST.add(QueryFunctionHelper.YEAR);
DATE_FUNCTION_LIST.add(QueryFunctionHelper.QUARTER);
DATE_FUNCTION_LIST.add(QueryFunctionHelper.MONTH_NAME);
DATE_FUNCTION_LIST.add(QueryFunctionHelper.WEEK);
DATE_FUNCTION_LIST.add(QueryFunctionHelper.DAY_NAME);
}
private Map dateHash = null;
private Map arithmeticHash = null;
private Map caseWhenHash = null;
private List dateKeys = null;
private List arithmeticKeys = null;
private List caseWhenKeys = null;
private int caseWhenCount;
private List renameColList = null;
private Map aggHash = null;
private Map aggHash2 = null;
private Map orderHash = null;
private Map typesHash = null;
private Map aliasHash = null;
// Experiment
private List caseWhenFunctionList = null;
private boolean caseWhenFunction;
static final String DEF_FILTER = "this.cache['data']__f";
private Map functionMap = null;
Map processedSelector = new HashMap<>();
ArrayList aggKeys = new ArrayList<>();
ArrayList headers = null;
// this is the headers being kept on the array list being generated
ArrayList actHeaders = null;
int groupIndex = 0;
ArrayList types= null;
long start = 0;
long end = 500;
// need to keep the ordinality of the selectors and match that with the aliases
ArrayList groupColumns = null;
PyTranslator pyt = null;
boolean scalar = false;
// experimental stuff trying the numpy groupies guy
List groupColList = new ArrayList();
Map aggColMap = new HashMap();
List aggColList = new ArrayList();
List functionList = new ArrayList();
List orderList = new ArrayList();
Map orderListMap = new HashMap(); // keeps track of what the items are called
// cache of all the keys
List keyCache = new ArrayList();
// this is because we need to handle subquery
private transient PandasFrame pandasFrame;
public void setDataTypeMap(Map dataTypeMap) {
this.colDataTypes = dataTypeMap;
updateTypes();
}
private void updateTypes() {
Map newTypesMap = new HashMap<>();
for(String k : this.colDataTypes.keySet()) {
String newK = null;
if(k.contains("__")) {
newK = k.split("__")[1];
} else {
newK = k;
}
newTypesMap.put(newK, this.colDataTypes.get(k));
}
this.colDataTypes = newTypesMap;
}
public void setKeyCache(List keyCache) {
this.keyCache = keyCache;
}
public boolean isScalar() {
return scalar;
}
public void setPandasFrame(PandasFrame pandasFrame) {
this.pandasFrame = pandasFrame;
}
@Override
public String composeQuery() {
StringBuilder query = new StringBuilder();
if(Utility.getDIHelperProperty("SWIFTER") != null && !Utility.getDIHelperProperty("SWIFTER").trim().isEmpty()) {
swifter = Utility.getDIHelperProperty("SWIFTER").trim();
} else {
swifter = "";
}
// force swifter
swifter = "";
if(Utility.getDIHelperProperty("EXP") != null && !Utility.getDIHelperProperty("EXP").trim().isEmpty()) {
exp = Utility.getDIHelperProperty("EXP").trim();
} else {
exp = "";
}
headers = new ArrayList<>();
groupIndex = 0;
actHeaders = new ArrayList<>();
types= new ArrayList<>();
groupColumns = new ArrayList<>();
selectorCriteria = new StringBuilder("");
groupCriteria = new StringBuilder("");
aggCriteria = new StringBuilder("");
renCriteria = new StringBuilder("");
filterCriteria = new StringBuilder("");
havingCriteria = new StringBuilder("");
scalar = false;
functionMap = new HashMap<>();
aggHash = new HashMap<>();
aggKeys = new ArrayList<>();
aggHash2 = new HashMap<>();
typesHash = new HashMap<>(); // EXPERIMENTING
orderHash = new HashMap<>();
aliasHash = new HashMap<>();
orderBy = new StringBuilder("");
normalizer = new StringBuilder(".to_dict('split')");//['data']"); // Ideally I should be able to put drop duplicates here
ascending = new StringBuilder("");
dateCriteria = new StringBuilder("");
dateHash = new HashMap<>();
dateKeys = new ArrayList<>();
arithmeticCriteria = new StringBuilder("");
arithmeticHash = new HashMap<>();
arithmeticKeys = new ArrayList<>();
caseWhenCriteria = new StringBuilder("");
caseWhenHash = new HashMap<>();
caseWhenFunctionList = new ArrayList<>();
caseWhenKeys = new ArrayList<>();
renameColList = new ArrayList<>();
caseWhenCount = 0;
long limit = 500;
start = 0 ;
end = limit;
if(((SelectQueryStruct) this.qs).getOffset() > 0) {
start = ((SelectQueryStruct) this.qs).getOffset();
}
if(((SelectQueryStruct) this.qs).getLimit() != 0) {
end = (start + ((SelectQueryStruct) this.qs).getLimit());
}
fillParts();
closeAll();
StringBuilder cachedFrame = new StringBuilder(wrapperFrameName);
if(overrideQuery == null) {
query.append(cachedFrame)
.append(dateCriteria)
.append(arithmeticCriteria)
.append(this.filterCriteria)
.append(this.havingCriteria)
.append(this.groupCriteria)
.append(this.aggCriteria2)
.append(caseWhenCriteria)
.append(renameCriteria)
.append(this.selectorCriteria);
if(!scalar && aggCriteria2.toString().isEmpty()) {
query.append(addDistinct(((SelectQueryStruct) this.qs).isDistinct()));
}
// TODO: need to be more elegant than this
query.append(scalar ? "" : orderBy)
.append(addLimitOffset(start, end))
//.append(orderBy2)
.append(normalizer);
//.append(".fillna('')");
// TODO: NEED TO DISTINCT THE LIST RETURNED
// if(!scalar && !aggCriteria2.toString().isEmpty()) {
// query.append(addDistinct(((SelectQueryStruct) this.qs).isDistinct()));
// }
} else {
query = overrideQuery;
if(actHeaders != null && actHeaders.size() > 0) {
headers = actHeaders;
}
}
return query.toString();
}
private void buildListMap() {
// step1 - iterate through order list
// for every order try to see if it is a groupby or is it a aggregate
// based on either one build that list
// as you build the aggregate also build the function list
// this ONLY works when there is one groupby
// this ONLY works when the groupby is ahead of calculated column.. although I will force it to the first one just now
try {
if(!groupColList.isEmpty()) {
String filter = "''";
if(filterCriteria.length() > 0)
filter = "\"" + composeFilterString() +"\"";
filter = filter.replace("__f", "");
filter = filter.replace(frameName, "this.cache['data']");
StringBuilder gList = new StringBuilder("[");
StringBuilder aggList = new StringBuilder("[");
StringBuilder fList = new StringBuilder("[");
String groupcol = (String)groupColList.get(0);
for(int selectIndex = 0;selectIndex < orderList.size();selectIndex++) {
String thisSelector = (String)orderList.get(selectIndex);
if(groupColList.contains(thisSelector)) {
// process it as group
gList.append("'").append(thisSelector).append("'");
composeGroupCacheString(thisSelector, true);
} else if(aggColMap.containsKey(thisSelector)) {
// process this as an aggregate
String aggCol = (String)aggColMap.get(thisSelector);
String aggFunc = (String)aggColMap.get(thisSelector+"__f");
aggList.append("'").append(aggCol).append("'");
fList.append("'").append(aggFunc).append("'");
composeAggCacheString(groupcol, aggCol, thisSelector, aggFunc, true);
}
}
gList.append("]");
aggList.append("]");
fList.append("]");
logger.info("index >>" + gList);
logger.info("agg >>" + aggList);
logger.info("Function >>" + fList);
// order map
logger.info("Order Map" + orderListMap);
StringBuilder orderString = new StringBuilder("[");
String cacheName = frameName + "w.cache";
for(int orderIndex = 0;orderIndex < orderList.size();orderIndex++) {
String thisOrder = (String)orderList.get(orderIndex);
if(orderIndex != 0)
orderString.append(",");
// pull the name of selector
String orderSelector = (String)orderListMap.get(thisOrder);
// if this was a group tag a list with it
if(!aggColMap.containsKey(thisOrder)) {
orderString.append("list(").append(cacheName).append("[\"").append(orderSelector).append("\"]").append(")");
} else {
orderString.append(cacheName).append("[\"").append(orderSelector).append("\"]");
}
}
orderString.append("]");
String script = frameName + "w.runGroupy(" + filter + ", " + gList + ", " + aggList + ", " + fList + ", '')";
Object obj = pyt.runScript(script);
// this will ultimately be the query
logger.info("And the order string " + orderString);
// try replacing the query
this.overrideQuery = orderString;
qs.getPragmap().put("format", "parquet");
}
else {
// nothing to see please move on
}
} catch (Exception e) {
logger.error("StackTrace: ", e);
}
}
private void fillParts() {
SelectQueryStruct sqs = (SelectQueryStruct) qs;
Map partMap = sqs.getParts();
if (partMap.containsKey(SelectQueryStruct.Query_Part.QUERY)) {
overrideQuery = new StringBuilder(partMap.get(SelectQueryStruct.Query_Part.QUERY) + "");
}
if (partMap.containsKey(SelectQueryStruct.Query_Part.FILTER)) {
filterCriteria = new StringBuilder(partMap.get(SelectQueryStruct.Query_Part.FILTER) + "");
} else {
addFilters();
}
if (partMap.containsKey(SelectQueryStruct.Query_Part.HAVING)) {
havingCriteria = new StringBuilder(partMap.get(SelectQueryStruct.Query_Part.HAVING) + "");
} else {
addHavings();
}
if (partMap.containsKey(SelectQueryStruct.Query_Part.SELECT)) {
selectorCriteria = new StringBuilder(partMap.get(SelectQueryStruct.Query_Part.SELECT) + "");
} else {
addSelectors();
}
if (partMap.containsKey(SelectQueryStruct.Query_Part.SORT)) {
orderBy = new StringBuilder(partMap.get(SelectQueryStruct.Query_Part.SORT) + "");
} else {
processOrderBy();
}
if (partMap.containsKey(SelectQueryStruct.Query_Part.AGGREGATE)) {
aggCriteria2 = new StringBuilder(partMap.get(SelectQueryStruct.Query_Part.AGGREGATE) + "");
} else {
genAggString();
}
if (partMap.containsKey(SelectQueryStruct.Query_Part.GROUP)) {
groupCriteria = new StringBuilder(partMap.get(SelectQueryStruct.Query_Part.GROUP) + "");
} else {
processGroupSelectors();
}
genIfElseString();
genDateFunctionString();
genArithmeticString();
}
private String addDistinct(boolean distinct) {
if(distinct) {
// try to find if there is more than 1 column
// if(orderHash.size() > 1)
// return "";
// else if(orderHash.size() == 1 && aggHash.size() == 0)
return ".drop_duplicates()";
}
return "";
}
private void closeFilters() {
if(filterCriteria.length() > 0 ) {
filterCriteria = new StringBuilder(".loc[").append(filterCriteria).append("]");
// update the selector only if if there is no agg
//if(selectorCriteria.length() == 0)
// selectorCriteria.append(".drop_duplicates()");
}
}
public void closeAll() {
boolean aggregate = false;
SelectQueryStruct sqs = (SelectQueryStruct)qs;
Map partMap = sqs.getParts();
if(this.aggCriteria2.toString().length() > 0 && !partMap.containsKey(SelectQueryStruct.Query_Part.AGGREGATE))
{
if(!((SelectQueryStruct) this.qs).getGroupBy().isEmpty()) {
this.aggCriteria = aggCriteria.append("})").append(".reset_index()");
this.aggCriteria2 = aggCriteria2.append(")").append(".reset_index()");
this.renCriteria = renCriteria.append("}).reset_index()");
} else if (headers.size() > 1) {
String tempCol = Utility.getRandomString(6);
this.aggCriteria2 = new StringBuilder(".assign(" + tempCol + "=0).groupby('"+tempCol+"')").append(aggCriteria2)
.append(").reset_index().drop('"+tempCol+"',axis=1)");
} else if(headers.size() == 1) {
this.aggCriteria = aggCriteria.append("}).reset_index()");
this.aggCriteria2 = aggCriteria2.append(").reset_index()");
normalizer = new StringBuilder(".to_dict('split')['data'][0][1]");
aggCriteria2 = aggCriteria;
scalar = true;
}
aggregate = true;
}
if (this.selectorCriteria.length() > 0 && !partMap.containsKey(SelectQueryStruct.Query_Part.SELECT) && !scalar) {
StringBuilder tempSelectorBuilder = new StringBuilder("[[");
this.selectorCriteria = tempSelectorBuilder.append(this.selectorCriteria).append("]]");
if (!renameColList.isEmpty()) {
renameCriteria.append(".rename(columns={");
for (int i = 0; i < renameColList.size(); i++) {
if (i == 0) {
renameCriteria.append(renameColList.get(i));
} else {
renameCriteria.append(",").append(renameColList.get(i));
}
}
renameCriteria.append("})");
}
} else if (!partMap.containsKey(SelectQueryStruct.Query_Part.SELECT)) {
this.selectorCriteria.delete(0, selectorCriteria.length());
}
if (havingCriteria.length() > 0 && !partMap.containsKey(SelectQueryStruct.Query_Part.HAVING)) {
StringBuilder tempGroupCriteria = new StringBuilder(groupCriteria);
havingCriteria = tempGroupCriteria.append("]).filter(lambda x: ").append(havingCriteria).append(")");
}
if(groupCriteria.length() > 0 && !partMap.containsKey(SelectQueryStruct.Query_Part.GROUP)) {
groupCriteria.append("], sort=False)");
// when doing a group by, it creates a different object
List groupSelectors = ((SelectQueryStruct) this.qs).getGroupBy();
if (actHeaders.size() == groupSelectors.size()) {
// to convert to dataframe we need to append
groupCriteria.append(".count().reset_index()");
}
}
if(filterCriteria.length() > 0 && !partMap.containsKey(SelectQueryStruct.Query_Part.FILTER)) {
filterCriteria = new StringBuilder(".loc[").append(filterCriteria).append("]");
}
if(orderBy.length() != 0 && !partMap.containsKey(SelectQueryStruct.Query_Part.SORT)) {
// combine it
orderBy.append("],").append(ascending).append("])");
}
if(orderBy2.length() != 0 && !partMap.containsKey(SelectQueryStruct.Query_Part.SORT)) {
// combine it
orderBy2.append("],").append(ascending2).append("])");
}
}
private void processOrderBy() {
List qcos = ((SelectQueryStruct) this.qs).getCombinedOrderBy();
for(int orderIndex = 0; orderIndex < qcos.size(); orderIndex++) {
IQuerySort sortOp = qcos.get(orderIndex);
if(sortOp.getQuerySortType() == IQuerySort.QUERY_SORT_TYPE.COLUMN) {
QueryColumnOrderBySelector orderBy = (QueryColumnOrderBySelector) sortOp;
String sort = null;
String alias = orderBy.getAlias();
if(alias.length() == 0) {
alias = orderBy.getTable();
}
ORDER_BY_DIRECTION sortDir = orderBy.getSortDir();
if(sortDir == ORDER_BY_DIRECTION.ASC) {
sort = "True";
} else if (sortDir == ORDER_BY_DIRECTION.DESC){
sort = "False";
}
StringBuilder orderByClause = null;
if(orderHash.containsKey(alias)) {
orderByClause = orderHash.get(alias);
}
if (orderByClause == null && aliasHash.containsKey(alias)) {
orderByClause = new StringBuilder("'"+aliasHash.get(alias)+"'");
}
if(orderByClause != null) {
// check if it is aggregate
// at this point the alias does it
//addOrder(orderByClause, sort);
if (orderHash.containsKey(alias)) {
addOrder(new StringBuilder(alias), sort);
} else {
addOrder(new StringBuilder(aliasHash.get(alias)), sort);
}
// also add the other piece to test
addOrder2(orderByClause, sort);
}
}
}
//if(!processed)
// orderBy = new StringBuilder("");
}
private void addOrder(StringBuilder curOrder, String asc) {
// I need to find out which are the pieces I need to drop
if(orderBy.length() == 0) {
orderBy = new StringBuilder(".sort_values([");
ascending = new StringBuilder("ascending=[");
} else {
orderBy.append(",");
ascending.append(",");
}
// add the ascending
ascending.append(asc);
// add the order by
orderBy.append("'").append(curOrder).append("'");
}
private void addOrder2(StringBuilder curOrder, String asc) {
// I need to find out which are the pieces I need to drop
// get the ordinal value
//int colIndex = headers.indexOf(curOrder);
if(orderBy2.length() == 0) {
orderBy2 = new StringBuilder(".sort_index(level=[");
ascending2 = new StringBuilder("ascending=[");
} else {
orderBy2.append(",");
ascending2.append(",");
}
// add the ascending
ascending2.append(asc);
// add the order by
orderBy2.append(curOrder);
}
private String addLimitOffset(long start, long end) {
StringBuilder sb = new StringBuilder();
sb.append(".iloc[" + start + ":");
if (end > 0) {
sb.append(end);
}
sb.append("]");
return sb.toString();
}
public void addFilters() {
addFilters(qs.getCombinedFilters().getFilters(), this.wrapperFrameName, this.filterCriteria, false);
}
public void addHavings() {
addHavingFilters(qs.getHavingFilters().getFilters(), this.wrapperFrameName, this.havingCriteria, false);
}
/**
* Adds all the parameters passed through the SELECT statement. Keeps track of SELECTOR headers and types for
* sync with output in PandasFrame.
*/
public void addSelectors() {
this.selectorCriteria = new StringBuilder();
List selectors = qs.getSelectors();
for(int i = 0; i < selectors.size(); i++) {
IQuerySelector selector = selectors.get(i);
SELECTOR_TYPE selectorType = selector.getSelectorType();
String newHeader = processSelector(selector, wrapperFrameName, true, true);
if (i == 0) {
this.selectorCriteria.append(newHeader);
} else {
this.selectorCriteria.append(",").append(newHeader);
}
StringBuilder sb = new StringBuilder(newHeader);
newHeader = newHeader.replace("'", "");
headers.add(newHeader);
orderHash.put(newHeader, sb);
if (selectorType == IQuerySelector.SELECTOR_TYPE.FUNCTION
&& ((QueryFunctionSelector) selector).getFunction().equalsIgnoreCase(QueryFunctionHelper.UNIQUE_GROUP_CONCAT)) {
actHeaders.add(((QueryFunctionSelector) selector).getAllQueryColumns().get(0).getAlias());
} else {
actHeaders.add(newHeader);
}
if (selectorType == IQuerySelector.SELECTOR_TYPE.COLUMN) {
SemossDataType curType = this.colDataTypes.get(((QueryColumnSelector) selector).getColumn());
typesHash.put(newHeader, curType);
}
}
for (String header : headers) {
if (typesHash.containsKey(header))
types.add(typesHash.get(header));
}
}
public String[] getHeaders()
{
if(headers != null)
{
String [] headerArray = new String[this.headers.size()];
this.headers.toArray(headerArray);
return headerArray;
}
return null;
}
public SemossDataType[] getTypes()
{
if(headers !=null)
{
SemossDataType [] typeArray = new SemossDataType[this.headers.size()];
this.types.toArray(typeArray);
return typeArray;
}
return null;
}
/*
* Process the types of Selectors being passed through the SELECT statement.
*/
public String processSelector(IQuerySelector selector, String tableName, boolean includeTableName, boolean useAlias, boolean...useTable) {
SELECTOR_TYPE selectorType = selector.getSelectorType();
String tableNameForCol = null;
if(useTable != null && useTable.length > 0 && useTable[0])
tableNameForCol = tableName;
if(selector.getSelectorType() == IQuerySelector.SELECTOR_TYPE.COLUMN) {
return processColumnSelector( (QueryColumnSelector) selector, tableNameForCol);
} else if(selectorType == IQuerySelector.SELECTOR_TYPE.CONSTANT) {
return processConstantSelector((QueryConstantSelector) selector);
} else if(selectorType == IQuerySelector.SELECTOR_TYPE.FUNCTION) {
return processFunctionSelector((QueryFunctionSelector)selector, tableName);
} else if(selectorType == IQuerySelector.SELECTOR_TYPE.ARITHMETIC) {
return processArithmeticSelector((QueryArithmeticSelector) selector, tableName, includeTableName, useAlias, useTable);
} else if(selectorType == IQuerySelector.SELECTOR_TYPE.IF_ELSE) {
return processIfElseSelector((QueryIfSelector) selector, tableName);
} else {
return null;
}
}
/**
* Process function calls for pandas date methods to extract fields from datetime objects.
*
* @param selector
* @param tableName
*/
private String processDateFunctionSelector(QueryFunctionSelector selector, String tableName) {
IQuerySelector innerSelector = selector.getInnerSelector().get(0);
StringBuilder sb = new StringBuilder();
String functionName = selector.getFunction();
String pandasFunction = QueryFunctionHelper.convertFunctionToPandasSyntax(functionName);
String alias = selector.getAlias();
String columnName = processSelector(innerSelector, tableName, true, true, true);
dateKeys.add(alias);
sb.append(alias).append("=").append(columnName).append(".apply(pd.to_datetime).").append(pandasFunction).append(".values");
dateHash.put(alias, sb);
typesHash.put(alias, SemossDataType.STRING);
// Add to functionMap. I need an example of this working, for processAgg and processDAte
functionMap.put(pandasFunction + columnName, selector.getAlias());
return "'" + alias.toString() + "'";
}
/**
* Combine any date methods previously called by SELECT variables.
*/
private void genDateFunctionString() {
for (String key : dateKeys) {
if (dateHash.containsKey(key)) {
if (dateCriteria.length() != 0) {
dateCriteria.append(",");
}
dateCriteria.append(dateHash.get(key));
}
}
if (dateCriteria.length() > 0) {
dateCriteria = new StringBuilder(".assign(").append(dateCriteria).append(")");
}
}
/**
* Processes QueryFunctionSelector. Currently handles aggregate and date field extraction methods.
*
* @param selector
* @param tableName
* @return
*/
private String processFunctionSelector(QueryFunctionSelector selector, String tableName) {
if (DATE_FUNCTION_LIST.contains(selector.getFunction())) {
return processDateFunctionSelector(selector, tableName);
}
else {
return processAggSelector(selector);
}
}
/**
* Process CASEH WHEN statement (SQL translation). Currently handles column and function selectors in the filter logic, and Column
* to values / values to columns. Does not yet handle column to column or arithmetic in the filter logic.
* @param selector
* @param tableName
* @return
*/
private String processIfElseSelector(QueryIfSelector selector, String tableName) {
IQueryFilter filter = selector.getCondition();
IQuerySelector precedent = selector.getPrecedent();
IQuerySelector antecedent = selector.getAntecedent();
caseWhenFunction = false;
String alias = selector.getAlias();
if (alias.length() == 0) {
alias = "CASE_WHEN_"+caseWhenCount;
caseWhenCount += 1;
}
StringBuilder filterBuilder = new StringBuilder(processSelector(precedent, tableName, false, false, false));
filterBuilder.append(" if(").append(processFilter(filter, tableName)).append(") else ");
if (antecedent != null) {
filterBuilder.append(processSelector(antecedent, tableName, false, false, false));
} else {
filterBuilder.append("np.nan");
}
if (caseWhenFunction) {
caseWhenFunctionList.add(alias);
caseWhenFunction = false;
}
caseWhenKeys.add(alias);
caseWhenHash.put(alias, filterBuilder);
typesHash.put(alias, SemossDataType.convertStringToDataType(selector.getDataType()));
return "'" + alias + "'";
}
/**
* Creates the assign(...) method to create new column(s) from CASE_WHEN logic.
*/
private void genIfElseString() {
for (String key : caseWhenKeys) {
//Map tempHash = caseWhen.get(key);
if (caseWhenHash.containsKey(key)) {
if (caseWhenCriteria.length() > 0) {
caseWhenCriteria.append(",");
}
caseWhenCriteria.append(key).append("=");
if (caseWhenFunctionList.contains(key)) {
caseWhenCriteria.append(this.wrapperFrameName).append(groupCriteria).append("])").append(aggCriteria2)
.append(").apply(lambda x: ").append(caseWhenHash.get(key)).append(",axis=1).values");
} else {
caseWhenCriteria.append(this.frameName).append(".apply(lambda x: ").append(caseWhenHash.get(key)).append(",axis=1).values");
}
}
}
if (caseWhenCriteria.length() > 0) {
caseWhenCriteria = new StringBuilder(".assign(").append(caseWhenCriteria).append(")");
}
}
/**
* Process selectors that are normal columns. Accounts for columns that need to be renamed.
*
* @param selector
* @param tableName
* @return
*/
private String processColumnSelector(QueryColumnSelector selector, String tableName) {
StringBuilder sb = new StringBuilder();
String columnName = selector.getColumn();
String alias = selector.getAlias();
if (!columnName.equals(alias)) {
sb.append("'" + columnName + "'").append(":").append("'" + alias + "'");
renameColList.add(sb);
}
aliasHash.put(columnName, alias);
if(tableName != null)
return new StringBuffer(tableName).append("['").append(alias).append("']") + "";
else
return "'" + alias + "'";
}
/**
*
* @param frameName
* @param wrapperFrameName
* @param originalFrameName
*/
public void setDataTableName(String frameName, String wrapperFrameName) {
this.frameName = frameName;
this.wrapperFrameName = wrapperFrameName;
}
/**
* Process a constance selector. Different output is returned base off of passed in boolean inputs.
* @param selector
* @return
*/
private String processConstantSelector(QueryConstantSelector selector) {
Object constant = selector.getConstant();
if(constant instanceof SubQueryExpression) {
ITask innerTask = null;
try {
innerTask = ((SubQueryExpression) constant).generateQsTask();
innerTask.setLogger(logger);
if(innerTask.hasNext()) {
Object value = innerTask.next().getValues()[0];
if(value instanceof Number) {
return value.toString();
} else {
return "\"" + constant + "\"";
}
}
} catch(Exception e) {
classLogger.error(Constants.STACKTRACE, e);
} finally {
if(innerTask != null) {
try {
innerTask.close();
} catch (IOException e) {
classLogger.error(Constants.STACKTRACE, e);
}
}
}
// if this doesn't return anything...
return "pd.NA";
} else if(constant instanceof Number) {
return constant.toString();
} else {
return "\"" + constant + "\"";
}
}
private void processGroupSelectors() {
List groupSelectors = ((SelectQueryStruct) this.qs).getGroupBy();
QueryColumnSelector queryColumnSelector = null;
for(int sIndex = 0;sIndex < groupSelectors.size();sIndex++) {
IQuerySelector groupBySelector = groupSelectors.get(sIndex);
if(groupBySelector.getSelectorType() == IQuerySelector.SELECTOR_TYPE.COLUMN) {
queryColumnSelector = (QueryColumnSelector) groupSelectors.get(sIndex);
processGroupSelector(queryColumnSelector);
} else {
String errorMessage = "Cannot group by non QueryColumnSelector type yet...";
logger.error(errorMessage);
throw new IllegalArgumentException(errorMessage);
}
String colName = queryColumnSelector.getColumn();
// EXPERIMENTAL BLOCK
this.groupColList.add(colName);
}
}
private void processGroupSelector(QueryColumnSelector selector) {
// if(!processedSelector.containsKey(selector.getAlias())) {
// if(!aggHash.containsKey(selector.getTable()))
{
if(groupCriteria.length() == 0)
groupCriteria.append(".groupby([");
else
groupCriteria.append(",");
groupCriteria.append("'").append(selector.getColumn()).append("'");
}
if(actHeaders.contains(selector.getColumn())) {
int index = actHeaders.indexOf(selector.getColumn());
actHeaders.remove(selector.getColumn());
//headers.remove(selector.getTable());
actHeaders.add(groupIndex, headers.get(index));
groupIndex++;
}
//headers.add(groupIndex, selector.getTable());
// we dont know how many groups would it be.. so updating
if(processedSelector.containsKey(selector.getColumn())) {
processedSelector.put(selector.getColumn(), Boolean.TRUE);
headers.add(selector.getColumn());
}
// }
}
public Map functionMap() {
return this.functionMap;
}
/**
* Generates the aggregate string to use in the pandas query. Takes into account the instances when
* an aggregation is needed but no aggregation parameters are passed through (i.e when querying a column and
* using GROUPBY & having.
*/
private void genAggString() {
aggCriteria = new StringBuilder("");
if (aggKeys.size() == 0 && !((SelectQueryStruct) this.qs).getGroupBy().isEmpty()) {
if (havingList.size() == 0) {
groupCriteria.delete(0, groupCriteria.length());
} else {
aggCriteria2.append(havingList.get(0));
}
}
for(int cIndex = 0;cIndex < aggKeys.size();cIndex++) {
String colKey = aggKeys.get(cIndex);
// I need to replace this with aggHash2
if(aggHash.containsKey(colKey)) {
if(aggCriteria.length() != 0)
aggCriteria.append(",");
aggCriteria.append(aggHash.get(colKey)).append("]");
}
//aggCriteria.append(aggHash.get(colKey));
if(aggHash2.containsKey(colKey)) {
if(aggCriteria2.length() != 0)
aggCriteria2.append(",");
aggCriteria2.append(aggHash2.get(colKey));
}
}
if(aggCriteria.length() > 0 || aggCriteria2.length() > 0) {
aggCriteria = new StringBuilder(".agg({").append(aggCriteria);
aggCriteria2 = new StringBuilder(".agg(").append(aggCriteria2);
}
// just a way to say the override was added by this guy and not coming from outside
if(overrideQuery != null && overrideQuery.length() > 0 && aggHash2.size() > 0 && !((SelectQueryStruct)qs).getParts().containsKey(SelectQueryStruct.Query_Part.QUERY))
{
overrideQuery.append("]");
}
}
/** Processes the Selectors that use aggregation.
* Need to be harmonized w/ addSelector.
*
* @param selector
* @return
*/
private String processAggSelector(QueryFunctionSelector selector) {
// if it is using a function.. usually it is an aggregation
String function = selector.getFunction();
String columnName = selector.getAllQueryColumns().get(0).getAlias();
logger.info("Column Name .. >>" + selector.getAllQueryColumns().get(0).getColumn() + "<<>>" + selector.getAllQueryColumns().get(0).getTable());
// you need to get to the column selector and then get the alias
String pandasFunction = QueryFunctionHelper.convertFunctionToPandasSyntax(function);
StringBuilder aggBuilder = new StringBuilder("");
StringBuilder aggBuilder2 = new StringBuilder("");
// I also need to keep track of the alias here so I can use that in the sort later
// I need to get the alias here
String aggAlias = selector.getAlias();
// format is
// mv.drop_duplicates().groupby(['Genre']).agg(Mango = ('Studio','count')).iloc[0:2000]
// mango is the name of the alias.. no quotes
aggBuilder2.append(aggAlias).append("=('").append(columnName).append("' , '").append(pandasFunction).append("')");
if(aggHash.containsKey(columnName)) {
aggBuilder = aggHash.get(columnName);
aggBuilder.append(",");
} else {
aggBuilder.append("'").append(columnName).append("':[");
}
aggBuilder.append("'" + pandasFunction +"'");
orderHash.put(selector.getAlias(), new StringBuilder("('").append(columnName).append("')"));
//headers.add(selector.getAlias());
aggHash.put(columnName, aggBuilder);
// adding it through alias
aggHash2.put(aggAlias, aggBuilder2);
aggKeys.add(columnName);
// also add the alias name
if(!aggKeys.contains(aggAlias))
aggKeys.add(aggAlias);
// if it is a group concat.. dont add this to actual headers here.. since it will get added during group by
functionMap.put(pandasFunction + columnName, selector.getAlias());
// I can avoid all of this by creating a dataframe and imputing.. but let us see how far we can inline this
// I am going to assume that this is the same type as header for most operations
SemossDataType curType = this.colDataTypes.get(columnName);
if(curType == SemossDataType.STRING || curType == SemossDataType.BOOLEAN) {
//types.add(SemossDataType.INT);
typesHash.put(aggAlias, SemossDataType.INT);
} else if (curType == SemossDataType.INT && pandasFunction.equalsIgnoreCase("mean")) {
//types.add(SemossDataType.DOUBLE);
typesHash.put(aggAlias, SemossDataType.DOUBLE);
} else {
//types.add(curType);
typesHash.put(aggAlias, curType);
}
// if the groupby is empty then this is just simple min and max
// need to revisit min and max
// quick fix for min and max
// I do need to honor the filter here
if(((SelectQueryStruct) this.qs).getGroupBy().isEmpty() && (pandasFunction.contains("min") || pandasFunction.contains("max"))) {
if(overrideQuery == null || overrideQuery.length() == 0) {
overrideQuery = new StringBuilder("[");
} else {
overrideQuery.append(",");
}
overrideQuery.append(wrapperFrameName).append("['").append(columnName).append("'].").append(pandasFunction).append("()");
}
// EXPERIMENTAL BLOCK
// I need to add this as well as the alias some place
// I dont think I need the column name at all
//aggCol_map.put(columnName, selector.getAlias());
aggColMap.put(selector.getAlias(), columnName);
aggColMap.put(selector.getAlias()+ "__f", pandasFunction);
aggColList.add(columnName);
// EXPERIMENTAL BLOCK
return "'" + aggAlias + "'";
}
/**
* Process arithmetic selector. Can handle column or Numeric input. Returns empty string - similar method build as
* the function processing methods.
* @param selector
* @param tableName
* @param includeTableName
* @param useAlias
* @param useTable
* @return
*/
private String processArithmeticSelector(QueryArithmeticSelector selector, String tableName, boolean includeTableName, boolean useAlias, boolean...useTable) {
IQuerySelector leftSelector = selector.getLeftSelector();
IQuerySelector rightSelector = selector.getRightSelector();
String mathExpr = selector.getMathExpr();
String alias = selector.getAlias();
String lColumnName = processSelector(leftSelector, tableName, includeTableName, useAlias, true);
String rColumnName = processSelector(rightSelector, tableName, includeTableName, useAlias, true);
StringBuilder sb = new StringBuilder();
StringBuilder replace = new StringBuilder();
SemossDataType leftDataType = SemossDataType.convertStringToDataType(leftSelector.getDataType());
if (leftDataType == null) {
if(leftSelector.getQueryStructName().contains("__")) {
leftDataType = this.colDataTypes.get(leftSelector.getQueryStructName().split("__")[1]);
} else {
leftDataType = this.colDataTypes.get(leftSelector.getQueryStructName());
}
}
SemossDataType rightDataType = SemossDataType.convertStringToDataType(rightSelector.getDataType());
if (rightDataType == null) {
if(rightSelector.getQueryStructName().contains("__")) {
rightDataType = this.colDataTypes.get(rightSelector.getQueryStructName().split("__")[1]);
} else {
rightDataType = this.colDataTypes.get(rightSelector.getQueryStructName());
}
rightDataType = this.colDataTypes.get(rightSelector.getQueryStructName());
}
if (!(leftDataType == SemossDataType.INT || leftDataType == SemossDataType.DOUBLE) ||
!(rightDataType == SemossDataType.INT || rightDataType == SemossDataType.DOUBLE)) {
throw new IllegalArgumentException("Arithmetic selectors must be of type INT or DOUBLE.");
}
if (mathExpr.equals("/")) {
replace.append(".replace([np.inf, -np.inf], np.nan)");
}
arithmeticKeys.add(alias);
sb.append(alias).append("=(").append(lColumnName).append(mathExpr).append(rColumnName).append(")").append(replace).append(".values");
arithmeticHash.put(alias, sb);
if (leftDataType == SemossDataType.DOUBLE || rightDataType == SemossDataType.DOUBLE || mathExpr.equals("/")) {
//types.add(SemossDataType.DOUBLE);
typesHash.put(alias, SemossDataType.DOUBLE);
} else {
//types.add(SemossDataType.INT);
typesHash.put(alias, SemossDataType.INT);
}
return "'" + alias + "'";
}
/**
* Create the assign(...) method to put together a new column based off of arithmetic.
*/
private void genArithmeticString() {
for (String key : arithmeticKeys) {
if (arithmeticHash.containsKey(key)) {
if (arithmeticCriteria.length() != 0) {
arithmeticCriteria.append(",");
}
arithmeticCriteria.append(arithmeticHash.get(key));
}
}
if (arithmeticCriteria.length() > 0) {
arithmeticCriteria = new StringBuilder(".assign(").append(arithmeticCriteria).append(")");
}
}
//////////////////////////////////// end adding selectors /////////////////////////////////////
//////////////////////////////////// start adding filters /////////////////////////////////////
public void addFilters(List filters, String tableName, StringBuilder builder, boolean useAlias) {
for(IQueryFilter filter : filters) {
StringBuilder filterSyntax = processFilter(filter, tableName, useAlias);
if(filterSyntax != null) {
if (builder.length() > 0) {
builder.append(" & ");
}
builder.append(filterSyntax.toString());
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////// Filters for python lambda functions /////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Decode general filter into its simpler parts. Method is sued for selectors that use lambda functions for filtering, specifically
* those used in the IF_ELSE logic.
* @param filter
* @param tableName
* @return
*/
private StringBuilder processFilter(IQueryFilter filter, String tableName) {
IQueryFilter.QUERY_FILTER_TYPE filterType = filter.getQueryFilterType();
if (filterType == IQueryFilter.QUERY_FILTER_TYPE.SIMPLE) {
return processSimpleQueryFilter((SimpleQueryFilter) filter, tableName);
} else if (filterType == IQueryFilter.QUERY_FILTER_TYPE.AND) {
return processAndQueryFilter((AndQueryFilter) filter, tableName);
} else if (filterType == IQueryFilter.QUERY_FILTER_TYPE.OR) {
return processOrQueryFilter((OrQueryFilter) filter, tableName);
}
return null;
}
/**
* Process Simple query filters. Passes to methods that construct lambda functions for filtering.
* @param filter
* @param tableName
* @return
*/
private StringBuilder processSimpleQueryFilter(SimpleQueryFilter filter, String tableName) {
NounMetadata leftComp = filter.getLComparison();
NounMetadata rightComp = filter.getRComparison();
String thisComparator = filter.getComparator();
FILTER_TYPE fType = filter.getSimpleFilterType();
if (fType == FILTER_TYPE.COL_TO_VALUES) {
return createLambdaFilter(leftComp, rightComp, thisComparator, tableName);
} else if (fType == FILTER_TYPE.VALUES_TO_COL) {
return createLambdaFilter(rightComp, leftComp, IQueryFilter.getReverseNumericalComparator(thisComparator), tableName);
} else if (fType == FILTER_TYPE.COL_TO_COL) {
// TODO
} else if(fType == FILTER_TYPE.COL_TO_QUERY) {
return createSubqueryLambdaFilter(leftComp, rightComp, thisComparator, tableName);
} else if(fType == FILTER_TYPE.QUERY_TO_COL) {
return createSubqueryLambdaFilter(rightComp, leftComp, IQueryFilter.getReverseNumericalComparator(thisComparator), tableName);
}
return null;
}
/**
* Create lambda string for filtering. Used specifically in the IF_ELSE processor.
* @param leftComp
* @param rightComp
* @param thisComparator
* @param tableName
* @return
*/
private StringBuilder createLambdaFilter(NounMetadata leftComp, NounMetadata rightComp, String thisComparator, String tableName) {
// Get the data object to be filtered on as well as data type
IQuerySelector leftSelector = (IQuerySelector) leftComp.getValue();
SemossDataType leftDataType = SemossDataType.convertStringToDataType(leftSelector.getDataType());
String leftSelectorExpression = processSelector(leftSelector, tableName, true, false, false);
if (leftDataType == null) {
if(leftSelector.getQueryStructName().contains("__")) {
leftDataType = this.colDataTypes.get(leftSelector.getQueryStructName().split("__")[1]);
} else {
leftDataType = this.colDataTypes.get(leftSelector.getQueryStructName());
}
}
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy