All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.verdictdb.sqlreader.RelationStandardizer Maven / Gradle / Ivy

There is a newer version: 0.5.8
Show newest version
/*
 *    Copyright 2018 University of Michigan
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package org.verdictdb.sqlreader;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;

import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.verdictdb.connection.MetaDataProvider;
import org.verdictdb.core.sqlobject.AbstractRelation;
import org.verdictdb.core.sqlobject.AliasReference;
import org.verdictdb.core.sqlobject.AliasedColumn;
import org.verdictdb.core.sqlobject.AsteriskColumn;
import org.verdictdb.core.sqlobject.BaseColumn;
import org.verdictdb.core.sqlobject.BaseTable;
import org.verdictdb.core.sqlobject.ColumnOp;
import org.verdictdb.core.sqlobject.ConstantColumn;
import org.verdictdb.core.sqlobject.GroupingAttribute;
import org.verdictdb.core.sqlobject.JoinTable;
import org.verdictdb.core.sqlobject.OrderbyAttribute;
import org.verdictdb.core.sqlobject.SelectItem;
import org.verdictdb.core.sqlobject.SelectQuery;
import org.verdictdb.core.sqlobject.SubqueryColumn;
import org.verdictdb.core.sqlobject.UnnamedColumn;
import org.verdictdb.exception.VerdictDBDbmsException;
import org.verdictdb.sqlsyntax.H2Syntax;
import org.verdictdb.sqlsyntax.SqlSyntax;

public class RelationStandardizer {

  private MetaDataProvider meta;

  private SqlSyntax syntax;

  private static long itemID = 1;

  private static long duplicateIdentifer = 1;

  private static String verdictTableAliasPrefix = "vt";

  // key is the column name and value is table alias name
  private HashMap colNameAndTableAlias = new HashMap<>();

  // key is the schema name and table name and the value is table alias name
  private HashMap, String> tableInfoAndAlias = new HashMap<>();

  // key is the select column name, value is their alias
  private HashMap colNameAndColAlias = new HashMap<>();

  // key is columnOp, value is their alias name
  private HashMap columnOpAliasMap = new HashMap<>();

  // key is schema name, column name, value is alias
  // only store value if there are duplicate column names
  private HashMap, String> duplicateColNameAndColAlias = new HashMap<>();
  /**
   * * If From list is a subquery, we need to record the column alias name in colNameAndTempColAlias
   * so that we can replace the select item with the column alias name we generate.
   */
  private HashMap colNameAndTempColAlias = new HashMap<>();

  // Since we replace all table alias using our generated alias name, this map will record the table
  // alias name
  // we replaced.
  private HashMap oldTableAliasMap = new HashMap<>();

  public RelationStandardizer(MetaDataProvider meta) {
    this.meta = meta;
  }

  public RelationStandardizer(MetaDataProvider meta, SqlSyntax syntax) {
    this.meta = meta;
    this.syntax = syntax;
  }

  /**
   * (optional) set database syntax to enable database-specific standardization.
   *
   * @param syntax
   */
  public void setSyntax(SqlSyntax syntax) {
    this.syntax = syntax;
  }

  private BaseColumn replaceBaseColumn(BaseColumn col) {
    if (col.getTableSourceAlias().equals("")) {
      if (!(col.getSchemaName().equals(""))) {
        col.setTableSourceAlias(
            tableInfoAndAlias.get(new ImmutablePair<>(col.getSchemaName(), col.getTableName())));
      } else {
        col.setTableSourceAlias(colNameAndTableAlias.get(col.getColumnName()));
      }
    }
    if (colNameAndTempColAlias.containsKey(col.getColumnName())) {
      col.setColumnName(colNameAndTempColAlias.get(col.getColumnName()));
    }
    if (oldTableAliasMap.containsKey(col.getTableSourceAlias())) {
      col.setTableSourceAlias(oldTableAliasMap.get(col.getTableSourceAlias()));
    }
    if (tableInfoAndAlias.containsValue(col.getTableSourceAlias())) {
      for (Map.Entry, String> entry : tableInfoAndAlias.entrySet()) {
        if (entry.getValue().equals(col.getTableSourceAlias())) {
          col.setSchemaName(entry.getKey().getLeft());
          col.setTableName(entry.getKey().getRight());
          break;
        }
      }
    }
    if (col.getSchemaName().equals("")) {
      col.setSchemaName(meta.getDefaultSchema());
      if (tableInfoAndAlias.containsKey(
          new ImmutablePair<>(col.getSchemaName(), col.getTableSourceAlias()))) {
        col.setTableSourceAlias(
            tableInfoAndAlias.get(
                new ImmutablePair<>(col.getSchemaName(), col.getTableSourceAlias())));
      }
    }

    return col;
  }

  private List replaceSelectList(List selectItemList)
      throws VerdictDBDbmsException {
    List newSelectItemList = new ArrayList<>();
    for (SelectItem sel : selectItemList) {
      if (!(sel instanceof AliasedColumn) && !(sel instanceof AsteriskColumn)) {
        if (sel instanceof BaseColumn) {
          sel = replaceBaseColumn((BaseColumn) sel);
          if (!colNameAndColAlias.containsValue(((BaseColumn) sel).getColumnName())) {
            colNameAndColAlias.put(
                ((BaseColumn) sel).getColumnName(), ((BaseColumn) sel).getColumnName());
            newSelectItemList.add(
                new AliasedColumn((BaseColumn) sel, ((BaseColumn) sel).getColumnName()));
          } else {
            duplicateColNameAndColAlias.put(
                new ImmutablePair<>(
                    ((BaseColumn) sel).getTableSourceAlias(), ((BaseColumn) sel).getColumnName()),
                ((BaseColumn) sel).getColumnName() + duplicateIdentifer);
            newSelectItemList.add(
                new AliasedColumn(
                    (BaseColumn) sel, ((BaseColumn) sel).getColumnName() + duplicateIdentifer++));
          }
        } else if (sel instanceof ColumnOp) {
          // First replace the possible base column inside the columnop using the same way we did on
          // Where clause
          sel = replaceFilter((ColumnOp) sel);

          if (((ColumnOp) sel).getOpType().equals("count")) {
            columnOpAliasMap.put((ColumnOp) sel, "c" + itemID);
            newSelectItemList.add(new AliasedColumn((ColumnOp) sel, "c" + itemID++));
          } else if (((ColumnOp) sel).getOpType().equals("sum")) {
            columnOpAliasMap.put((ColumnOp) sel, "s" + itemID);
            newSelectItemList.add(new AliasedColumn((ColumnOp) sel, "s" + itemID++));
          } else if (((ColumnOp) sel).getOpType().equals("avg")) {
            columnOpAliasMap.put((ColumnOp) sel, "a" + itemID);
            newSelectItemList.add(new AliasedColumn((ColumnOp) sel, "a" + itemID++));
          } else if (((ColumnOp) sel).getOpType().equals("countdistinct")) {
            columnOpAliasMap.put((ColumnOp) sel, "cd" + itemID);
            newSelectItemList.add(new AliasedColumn((ColumnOp) sel, "cd" + itemID++));
          } else {
            columnOpAliasMap.put((ColumnOp) sel, "vc" + itemID);
            newSelectItemList.add(new AliasedColumn((ColumnOp) sel, "vc" + itemID++));
          }
        }
      } else {
        if (sel instanceof AliasedColumn) {
          ((AliasedColumn) sel).setColumn(replaceFilter(((AliasedColumn) sel).getColumn()));
        }
        newSelectItemList.add(sel);
        if (sel instanceof AliasedColumn
            && ((AliasedColumn) sel).getColumn() instanceof BaseColumn) {
          colNameAndColAlias.put(
              ((BaseColumn) ((AliasedColumn) sel).getColumn()).getColumnName(),
              ((AliasedColumn) sel).getAliasName());
        } else if (sel instanceof AliasedColumn
            && ((AliasedColumn) sel).getColumn() instanceof ColumnOp) {
          columnOpAliasMap.put(
              ((ColumnOp) ((AliasedColumn) sel).getColumn()), ((AliasedColumn) sel).getAliasName());
        }
      }
    }
    return newSelectItemList;
  }

  // Use BFS to search all the condition.
  private UnnamedColumn replaceFilter(UnnamedColumn condition) throws VerdictDBDbmsException {
    List searchList = new Vector<>();
    searchList.add(condition);
    while (!searchList.isEmpty()) {
      UnnamedColumn cond = searchList.get(0);
      searchList.remove(0);
      if (cond instanceof BaseColumn) {
        cond = replaceBaseColumn((BaseColumn) cond);
      } else if (cond instanceof ColumnOp) {
        for (UnnamedColumn col : ((ColumnOp) cond).getOperands()) {
          searchList.add(col);
        }
      } else if (cond instanceof SubqueryColumn) {
        RelationStandardizer g = new RelationStandardizer(meta, syntax);
        g.oldTableAliasMap.putAll(oldTableAliasMap);
        g.setColNameAndColAlias(colNameAndColAlias);
        g.setColumnOpAliasMap(columnOpAliasMap);
        g.setColNameAndTableAlias(colNameAndTableAlias);
        g.setTableInfoAndAlias(tableInfoAndAlias);
        SelectQuery newSubquery = g.standardize(((SubqueryColumn) cond).getSubquery());
        ((SubqueryColumn) cond).setSubquery(newSubquery);
      }
    }
    return condition;
  }

  private AliasedColumn matchAliasFromSelectList(List selectItems, BaseColumn col) {
    for (SelectItem item : selectItems) {
      if (item instanceof AliasedColumn) {
        AliasedColumn aliasedColumn = (AliasedColumn) item;
        if (aliasedColumn.getAliasName().equals(col)) {
          return aliasedColumn;
        }
      }
    }
    return null;
  }

  private List replaceGroupby(
      List selectItems, List groupingAttributeList)
      throws VerdictDBDbmsException {
    return this.replaceGroupby(selectItems, groupingAttributeList, false);
  }

  /**
   * @return: replaced Groupby list or Orderby list If it is groupby, we should return column
   *     instead of alias
   */
  private List replaceGroupby(
      List selectItems,
      List groupingAttributeList,
      boolean isForOrderBy)
      throws VerdictDBDbmsException {
    List newGroupby = new ArrayList<>();
    for (GroupingAttribute g : groupingAttributeList) {
      if (g instanceof BaseColumn) {
        // 'col' can be either a base column or an alias to a select item
        BaseColumn col = (BaseColumn) g;

        // Check for aliases
        AliasedColumn aliasMatch = matchAliasFromSelectList(selectItems, col);
        if (aliasMatch != null && !isForOrderBy) {
          UnnamedColumn column = aliasMatch.getColumn();
          // Unless it is a subquery (I think it would not be possible, but just in case),
          // we use the actual operation in the group-by.
          if (column instanceof SubqueryColumn) {
            newGroupby.add(new AliasReference(aliasMatch.getAliasName()));
          } else {
            newGroupby.add(column);
          }
        } else if (((BaseColumn) g).getTableSourceAlias() != null) {
          // if it is a base column, let's get its current table alias and replace.
          String tableSource = ((BaseColumn) g).getTableSourceAlias();
          String columnName = ((BaseColumn) g).getColumnName();
          if (duplicateColNameAndColAlias.containsKey(
              new ImmutablePair<>(tableSource, columnName))) {
            newGroupby.add(
                getGroupOrOrderByColumn(
                    tableSource,
                    duplicateColNameAndColAlias.get(new ImmutablePair<>(tableSource, columnName)),
                    isForOrderBy));
          } else if (colNameAndColAlias.containsKey(columnName)) {
            newGroupby.add(
                getGroupOrOrderByColumn(
                    oldTableAliasMap.get(tableSource), columnName, isForOrderBy));
          } else
            newGroupby.add(getGroupOrOrderByColumn(((BaseColumn) g).getColumnName(), isForOrderBy));
        } else
          newGroupby.add(getGroupOrOrderByColumn(((BaseColumn) g).getColumnName(), isForOrderBy));
      } else if (g instanceof ColumnOp) {
        // If it is a column-op, we substitute its table reference to our alias unless
        // this method is called to get order-by columns. In such case, we simply use alias.
        // Also, H2 does not support ColumnOp in group-by.
        if (isForOrderBy || (syntax instanceof H2Syntax)) {
          ColumnOp replaced = (ColumnOp) replaceFilter((ColumnOp) g);
          if (columnOpAliasMap.containsKey(replaced)) {
            newGroupby.add(new AliasReference(columnOpAliasMap.get(replaced)));
          } else newGroupby.add(replaced);
        } else {
          ColumnOp newCol = ((ColumnOp) g).deepcopy();
          this.replaceGroupByReference(newCol);
          newGroupby.add(newCol);
        }
      } else if (g instanceof ConstantColumn) {
        // replace index with column alias
        String value = (String) ((ConstantColumn) g).getValue();
        try {
          Integer.parseInt(value);
        } catch (NumberFormatException e) {
          newGroupby.add(new AliasReference(value));
          continue;
        }
        int index = Integer.valueOf(value);
        AliasedColumn col = (AliasedColumn) selectItems.get(index - 1);
        UnnamedColumn column = col.getColumn();
        if (column instanceof BaseColumn && !isForOrderBy) {
          BaseColumn baseCol = (BaseColumn) column;
          newGroupby.add(new BaseColumn(baseCol.getTableSourceAlias(), baseCol.getColumnName()));
        } else {
          newGroupby.add(new AliasReference(col.getAliasName()));
        }
      }
    }
    return newGroupby;
  }

  // returns BaseColumn for group-by, AliasReference for order-by
  private GroupingAttribute getGroupOrOrderByColumn(
      String table, String column, boolean isForOrderBy) {
    if (isForOrderBy)
      return (table != null) ? new AliasReference(table, column) : new AliasReference(column);
    else return (table != null) ? new BaseColumn(table, column) : new BaseColumn(column);
  }

  // returns BaseColumn for group-by, AliasReference for order-by
  private GroupingAttribute getGroupOrOrderByColumn(String column, boolean isForOrderBy) {
    if (isForOrderBy) return new AliasReference(column);
    else return new BaseColumn(column);
  }

  private void replaceGroupByReference(UnnamedColumn c) {
    if (c instanceof ColumnOp) {
      ColumnOp colOp = (ColumnOp) c;
      for (UnnamedColumn o : colOp.getOperands()) {
        this.replaceGroupByReference(o);
      }
    } else if (c instanceof BaseColumn) {
      BaseColumn baseCol = (BaseColumn) c;
      String newRef = oldTableAliasMap.get(baseCol.getTableSourceAlias());
      if (newRef != null) baseCol.setTableSourceAlias(newRef);
      else {
        newRef = colNameAndTableAlias.get(baseCol.getColumnName());
        if (newRef != null) baseCol.setTableSourceAlias(newRef);
      }
    }
  }

  private List replaceOrderby(
      List selectItems, List orderbyAttributesList)
      throws VerdictDBDbmsException {
    List newOrderby = new ArrayList<>();
    for (OrderbyAttribute o : orderbyAttributesList) {
      newOrderby.add(
          new OrderbyAttribute(
              replaceGroupby(selectItems, Arrays.asList(o.getAttribute()), true).get(0),
              o.getOrder()));
    }
    return newOrderby;
  }

  /*
   * return the ColName contained by the table
   */
  private Pair, AbstractRelation> setupTableSource(AbstractRelation table)
      throws VerdictDBDbmsException {
    // in order to prevent informal table alias, we replace all table alias
    if (!(table instanceof JoinTable)) {
      if (table.getAliasName().isPresent()) {
        String alias = table.getAliasName().get();
        alias = alias.replace("`", "");
        alias = alias.replace("\"", "");
        oldTableAliasMap.put(alias, verdictTableAliasPrefix + itemID);
      }
      table.setAliasName(verdictTableAliasPrefix + itemID++);
    }
    // if (!table.getAliasName().isPresent() && !(table instanceof JoinTable)) {
    //  table.setAliasName(verdictTableAliasPrefix + itemID++);
    // }
    if (table instanceof BaseTable) {
      BaseTable bt = (BaseTable) table;
      List colName = new ArrayList<>();
      if (bt.getSchemaName() == null) {
        bt.setSchemaName(meta.getDefaultSchema());
      }
      List> cols = meta.getColumns(bt.getSchemaName(), bt.getTableName());
      for (Pair c : cols) {
        colNameAndTableAlias.put(c.getKey(), bt.getAliasName().get());
        colName.add(c.getKey());
      }
      tableInfoAndAlias.put(
          ImmutablePair.of(bt.getSchemaName(), bt.getTableName()), table.getAliasName().get());
      return new ImmutablePair<>(colName, table);
    } else if (table instanceof JoinTable) {
      List joinColName = new ArrayList<>();
      for (int i = 0; i < ((JoinTable) table).getJoinList().size(); i++) {
        Pair, AbstractRelation> result =
            setupTableSource(((JoinTable) table).getJoinList().get(i));
        ((JoinTable) table).getJoinList().set(i, result.getValue());
        joinColName.addAll(result.getKey());
        if (i != 0) {
          ((JoinTable) table)
              .getCondition()
              .set(i - 1, replaceFilter(((JoinTable) table).getCondition().get(i - 1)));
        }
      }
      return new ImmutablePair<>(joinColName, table);
    } else if (table instanceof SelectQuery) {
      List colName = new ArrayList<>();
      RelationStandardizer g = new RelationStandardizer(meta, syntax);
      g.oldTableAliasMap.putAll(oldTableAliasMap);
      g.setTableInfoAndAlias(tableInfoAndAlias);
      g.setColNameAndTableAlias(colNameAndTableAlias);
      g.setColNameAndColAlias(colNameAndColAlias);
      String aliasName = table.getAliasName().get();
      table = g.standardize((SelectQuery) table);
      table.setAliasName(aliasName);
      // Invariant: Only Aliased Column or Asterisk Column should appear in the subquery
      for (SelectItem sel : ((SelectQuery) table).getSelectList()) {
        if (sel instanceof AliasedColumn) {
          // If the aliased name of the column is replaced by ourselves, we should remember the
          // column name
          if (((AliasedColumn) sel).getColumn() instanceof BaseColumn
              && ((AliasedColumn) sel).getAliasName().matches("^vc[0-9]+$")) {
            colNameAndTableAlias.put(
                ((BaseColumn) ((AliasedColumn) sel).getColumn()).getColumnName(),
                table.getAliasName().get());
            colNameAndTempColAlias.put(
                ((BaseColumn) ((AliasedColumn) sel).getColumn()).getColumnName(),
                ((AliasedColumn) sel).getAliasName());
          } else
            colNameAndTableAlias.put(
                ((AliasedColumn) sel).getAliasName(), table.getAliasName().get());
          colName.add(((AliasedColumn) sel).getAliasName());
        } else if (sel instanceof AsteriskColumn) {
          // put all the columns in the fromlist of subquery to the colNameAndTableAlias
          HashMap subqueryColumnList = g.getColNameAndTableAlias();
          for (String col : subqueryColumnList.keySet()) {
            colNameAndTableAlias.put(col, table.getAliasName().get());
            colName.add(col);
          }
        }
      }
      return new ImmutablePair<>(colName, table);
    }
    return null;
  }

  /*
   * Figure out the table alias and the columns the table have
   */
  private List setupTableSources(SelectQuery relationToAlias)
      throws VerdictDBDbmsException {
    List fromList = relationToAlias.getFromList();
    for (int i = 0; i < fromList.size(); i++) {
      fromList.set(i, setupTableSource(fromList.get(i)).getValue());
    }
    return fromList;
  }

  public SelectQuery standardize(SelectQuery relationToAlias) throws VerdictDBDbmsException {
    List fromList = setupTableSources(relationToAlias);
    List selectItemList = replaceSelectList(relationToAlias.getSelectList());
    SelectQuery AliasedRelation = SelectQuery.create(selectItemList, fromList);

    // Filter
    UnnamedColumn where;
    if (relationToAlias.getFilter().isPresent()) {
      where = replaceFilter(relationToAlias.getFilter().get());
      AliasedRelation.addFilterByAnd(where);
    }

    // Group by
    List groupby;
    if (relationToAlias.getGroupby().size() != 0) {
      groupby = replaceGroupby(selectItemList, relationToAlias.getGroupby());
      AliasedRelation.addGroupby(groupby);
    }

    // Having
    UnnamedColumn having;
    if (relationToAlias.getHaving().isPresent()) {
      having = replaceFilter(relationToAlias.getHaving().get());
      // replace columnOp with alias if possible
      if (having instanceof ColumnOp) {
        List checklist = new ArrayList<>();
        checklist.add((ColumnOp) having);
        while (!checklist.isEmpty()) {
          ColumnOp columnOp = checklist.get(0);
          checklist.remove(0);
          for (UnnamedColumn operand : columnOp.getOperands()) {
            if (operand instanceof ColumnOp) {
              if (columnOpAliasMap.containsKey(operand)) {
                columnOp.setOperand(
                    columnOp.getOperands().indexOf(operand),
                    new AliasReference(columnOpAliasMap.get(operand)));
              } else checklist.add((ColumnOp) operand);
            }
            // if (operand instanceof SubqueryColumn) {
            //  throw new VerdictDBDbmsException("Do not support subquery in Having clause.");
            // }
          }
        }
      }
      AliasedRelation.addHavingByAnd(having);
    }

    // Order by
    List orderby;
    if (relationToAlias.getOrderby().size() != 0) {
      orderby = replaceOrderby(selectItemList, relationToAlias.getOrderby());
      AliasedRelation.addOrderby(orderby);
    }

    if (relationToAlias.getLimit().isPresent()) {
      AliasedRelation.addLimit(relationToAlias.getLimit().get());
    }
    return AliasedRelation;
  }

  public HashMap getColNameAndColAlias() {
    return colNameAndColAlias;
  }

  public HashMap, String> getTableInfoAndAlias() {
    return tableInfoAndAlias;
  }

  public HashMap getColNameAndTableAlias() {
    return colNameAndTableAlias;
  }

  public void setColNameAndTableAlias(HashMap colNameAndTableAlias) {
    for (String key : colNameAndTableAlias.keySet()) {
      this.colNameAndTableAlias.put(key, colNameAndTableAlias.get(key));
    }
  }

  public void setTableInfoAndAlias(HashMap, String> tableInfoAndAlias) {
    for (Pair key : tableInfoAndAlias.keySet()) {
      this.tableInfoAndAlias.put(key, tableInfoAndAlias.get(key));
    }
  }

  public void setColNameAndColAlias(HashMap colNameAndColAlias) {
    for (String key : colNameAndColAlias.keySet()) {
      this.colNameAndColAlias.put(key, colNameAndColAlias.get(key));
    }
  }

  public void setColumnOpAliasMap(HashMap columnOpAliasMap) {
    this.columnOpAliasMap = columnOpAliasMap;
  }

  public HashMap getColumnOpAliasMap() {
    return columnOpAliasMap;
  }

  public static void resetItemID() {
    itemID = 1;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy