All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lens.cube.parse.HQLParser Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.lens.cube.parse;

import static org.apache.lens.cube.error.LensCubeErrorCode.COULD_NOT_PARSE_EXPRESSION;
import static org.apache.lens.cube.error.LensCubeErrorCode.SYNTAX_ERROR;

import static org.apache.hadoop.hive.ql.parse.HiveParser.*;

import java.io.IOException;
import java.lang.reflect.Field;
import java.util.*;
import java.util.regex.Pattern;

import org.apache.lens.server.api.error.LensException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.parse.*;

import org.antlr.runtime.CommonToken;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.TokenRewriteStream;
import org.antlr.runtime.tree.Tree;

import com.google.common.base.Optional;
import com.google.common.collect.Sets;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;


@Slf4j
public final class HQLParser {

  private HQLParser() {

  }

  public static final Pattern P_WSPACE = Pattern.compile("\\s+");

  public static boolean isTableColumnAST(ASTNode astNode) {
    return !(astNode == null || astNode.getChildren() == null || astNode.getChildCount() != 2) && astNode.getChild(0)
      .getType() == HiveParser.TOK_TABLE_OR_COL && astNode.getChild(1).getType() == HiveParser.Identifier;
  }

  public static boolean isPrimitiveBooleanExpression(ASTNode ast) {
    return HQLParser.FILTER_OPERATORS.contains(ast.getType());
  }

  public static boolean isPrimitiveBooleanFunction(ASTNode ast) {
    if (ast.getType() == TOK_FUNCTION) {
      if (ast.getChild(0).getText().equals("in")) {
        return true;
      }
    }
    return false;
  }
  public static ASTNode getDotAST(String tableAlias, String fieldAlias) {
    ASTNode child = new ASTNode(new CommonToken(DOT, "."));
    child.addChild(new ASTNode(new CommonToken(TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL")));
    child.getChild(0).addChild(new ASTNode(new CommonToken(Identifier, tableAlias)));
    child.addChild(new ASTNode(new CommonToken(Identifier, fieldAlias)));
    return child;
  }

  public interface ASTNodeVisitor {
    void visit(TreeNode node) throws LensException;
  }

  public static class TreeNode {
    final TreeNode parent;
    final ASTNode node;

    public TreeNode(TreeNode parent, ASTNode node) {
      this.parent = parent;
      this.node = node;
    }

    public TreeNode getParent() {
      return parent;
    }

    public ASTNode getNode() {
      return node;
    }
  }

  public static final Set BINARY_OPERATORS;
  public static final Set N_ARY_OPERATORS;
  public static final Set FILTER_OPERATORS;
  public static final Set ARITHMETIC_OPERATORS;
  public static final Set UNARY_OPERATORS;
  public static final Set PRIMITIVE_TYPES;

  static {
    HashSet ops = new HashSet<>();
    ops.add(DOT);
    ops.add(KW_AND);
    ops.add(KW_OR);
    ops.add(EQUAL);
    ops.add(EQUAL_NS);
    ops.add(NOTEQUAL);
    ops.add(GREATERTHAN);
    ops.add(GREATERTHANOREQUALTO);
    ops.add(LESSTHAN);
    ops.add(LESSTHANOREQUALTO);
    ops.add(PLUS);
    ops.add(MINUS);
    ops.add(STAR);
    ops.add(DIVIDE);
    ops.add(MOD);
    ops.add(KW_LIKE);
    ops.add(KW_RLIKE);
    ops.add(KW_REGEXP);
    ops.add(AMPERSAND);
    ops.add(BITWISEOR);
    ops.add(BITWISEXOR);

    BINARY_OPERATORS = Collections.unmodifiableSet(ops);
    N_ARY_OPERATORS = Collections.unmodifiableSet(Sets.newHashSet(KW_AND, KW_OR, PLUS, STAR,
      AMPERSAND, BITWISEOR, BITWISEXOR));

    ARITHMETIC_OPERATORS = new HashSet<>();
    ARITHMETIC_OPERATORS.add(PLUS);
    ARITHMETIC_OPERATORS.add(MINUS);
    ARITHMETIC_OPERATORS.add(STAR);
    ARITHMETIC_OPERATORS.add(DIVIDE);
    ARITHMETIC_OPERATORS.add(MOD);

    HashSet unaryOps = new HashSet<>();
    unaryOps.add(KW_NOT);
    unaryOps.add(TILDE);
    UNARY_OPERATORS = Collections.unmodifiableSet(unaryOps);

    HashSet primitiveTypes = new HashSet<>();
    primitiveTypes.add(TOK_TINYINT);
    primitiveTypes.add(TOK_SMALLINT);
    primitiveTypes.add(TOK_INT);
    primitiveTypes.add(TOK_BIGINT);
    primitiveTypes.add(TOK_BOOLEAN);
    primitiveTypes.add(TOK_FLOAT);
    primitiveTypes.add(TOK_DOUBLE);
    primitiveTypes.add(TOK_DATE);
    primitiveTypes.add(TOK_DATETIME);
    primitiveTypes.add(TOK_TIMESTAMP);
    primitiveTypes.add(TOK_STRING);
    primitiveTypes.add(TOK_BINARY);
    primitiveTypes.add(TOK_DECIMAL);
    primitiveTypes.add(TOK_VARCHAR);
    primitiveTypes.add(TOK_CHAR);
    PRIMITIVE_TYPES = Collections.unmodifiableSet(primitiveTypes);

    FILTER_OPERATORS = Sets.newHashSet(GREATERTHAN, GREATERTHANOREQUALTO, LESSTHAN, LESSTHANOREQUALTO, EQUAL,
      EQUAL_NS, NOTEQUAL);
  }

  public static ASTNode parseHQL(String query, HiveConf conf) throws LensException {
    ParseDriver driver = new ParseDriver();
    ASTNode tree = null;
    Context ctx = null;
    try {
      ctx = new Context(conf);
      tree = driver.parse(query, ctx);
      tree = ParseUtils.findRootNonNullToken(tree);
    } catch (ParseException e) {
      throw new LensException(SYNTAX_ERROR.getLensErrorInfo(), e, e.getMessage());
    } catch (IOException e) {
      throw new RuntimeException(e);
    } finally {
      if (ctx != null) {
        try {
          ctx.clear();
        } catch (IOException e) {
          // ignoring exception in clear
        }
      }
    }
    return tree;
  }

  public static ASTNode parseExpr(String expr) throws LensException {
    return parseExpr(expr, null);
  }
  public static ASTNode parseExpr(String expr, Configuration conf) throws LensException {
    ASTNode tree;
    try {
      tree = parseExpression(expr, conf);
    } catch (ParseException|RecognitionException e) {
      throw new LensException(COULD_NOT_PARSE_EXPRESSION.getLensErrorInfo(), e, expr);
    }
    return ParseUtils.findRootNonNullToken(tree);
  }
  public static ASTNode parseExpression(String command, Configuration conf)
    throws ParseException, RecognitionException {
    ParseDriver driver = new ParseDriver();
    ParseDriver.HiveLexerX lexer = driver.new HiveLexerX(driver.new ANTLRNoCaseStringStream(command));
    if (conf != null) {
      lexer.setHiveConf(conf);
    }
    TokenRewriteStream tokens = new TokenRewriteStream(lexer);
    HiveParser parser = new HiveParser(tokens);
    if (conf != null) {
      parser.setHiveConf(conf);
    }
    parser.setTreeAdaptor(ParseDriver.adaptor);
    return (ASTNode)parser.expression().getTree();
  }

  public static void printAST(ASTNode node) {
    try {
      printAST(getHiveTokenMapping(), node, 0, 0);
    } catch (Exception e) {
      log.error("Error in printing AST.", e);
    }
    System.out.println();
  }

  /**
   * Debug function for printing query AST to stdout
   *
   * @param tokenMapping token mapping
   * @param node         node
   * @param level        level
   * @param child        child
   */
  public static void printAST(Map tokenMapping, ASTNode node, int level, int child) {
    if (node == null || node.isNil()) {
      return;
    }

    for (int i = 0; i < level; i++) {
      System.out.print("  ");
    }

    System.out.print(node.getText() + " [" + tokenMapping.get(node.getType()) + "]");
    System.out.print(" (l" + level + "c" + child + "p" + node.getCharPositionInLine() + ")");

    if (node.getChildCount() > 0) {
      System.out.println(" {");

      for (int i = 0; i < node.getChildCount(); i++) {
        Tree tree = node.getChild(i);
        if (tree instanceof ASTNode) {
          printAST(tokenMapping, (ASTNode) tree, level + 1, i + 1);
        } else {
          System.out.println("NON ASTNode");
        }
        System.out.println();
      }

      for (int i = 0; i < level; i++) {
        System.out.print("  ");
      }

      System.out.print("}");

    } else {
      System.out.print('$');
    }
  }

  public static Map getHiveTokenMapping() throws Exception {
    Map mapping = new HashMap<>();

    for (Field f : HiveParser.class.getFields()) {
      if (f.getType() == int.class) {
        Integer tokenId = f.getInt(null);
        String token = f.getName();
        mapping.put(tokenId, token);
      }
    }

    return mapping;
  }

  /**
   * Find a node in the tree rooted at root, given the path of type of tokens from the root's children to the desired
   * node
   *
   * @param root node from which searching is to be started
   * @param path starts at the level of root's children
   * @return Node if found, else null
   */
  public static ASTNode findNodeByPath(ASTNode root, int... path) {
    for (int i = 0; i < path.length; i++) {
      int type = path[i];
      boolean hasChildWithType = false;

      for (int j = 0; j < root.getChildCount(); j++) {
        ASTNode node = (ASTNode) root.getChild(j);
        if (node.getType() == type) {
          hasChildWithType = true;
          root = node;
          // If this is the last type in path, return this node
          if (i == path.length - 1) {
            return root;
          } else {
            // Go to next level
            break;
          }
        }
      }

      if (!hasChildWithType) {
        // No path from this level
        break;
      }
    }

    return null;
  }

  /**
   * Breadth first traversal of AST
   *
   * @param root      node from where to start bft
   * @param visitor   action to take on each visit
   * @throws LensException
   */
  public static void bft(ASTNode root, ASTNodeVisitor visitor) throws LensException {
    if (root == null) {
      throw new NullPointerException("Root cannot be null");
    }

    if (visitor == null) {
      throw new NullPointerException("Visitor cannot be null");
    }
    Queue queue = new LinkedList<>();
    queue.add(new TreeNode(null, root));

    while (!queue.isEmpty()) {
      TreeNode node = queue.poll();
      visitor.visit(node);
      ASTNode astNode = node.getNode();
      for (int i = 0; i < astNode.getChildCount(); i++) {
        queue.offer(new TreeNode(node, (ASTNode) astNode.getChild(i)));
      }
    }
  }

  static boolean hasSpaces(String text) {
    return P_WSPACE.matcher(text).find();
  }

  public static void toInfixString(ASTNode root, StringBuilder buf) {
    toInfixString(root, buf, AppendMode.LOWER_CASE);
  }

  /**
   * Recursively reconstruct query string given a query AST
   *
   * @param root root node
   * @param buf  preallocated builder where the reconstructed string will be written
   */
  public static void toInfixString(ASTNode root, StringBuilder buf, AppendMode appendMode) {
    if (root == null) {
      return;
    }
    int rootType = root.getType();
    String rootText = root.getText();
    // Operand, print contents
    if (Identifier == rootType || Number == rootType || StringLiteral == rootType || KW_TRUE == rootType
      || KW_FALSE == rootType || KW_FORMATTED == rootType || KW_EXTENDED == rootType || KW_DEPENDENCY == rootType) {
      // StringLiterals should not be lower cased.
      if (StringLiteral == rootType) {
        buf.append(rootText);
      } else if (KW_TRUE == rootType) {
        buf.append(" true ");
      } else if (KW_FALSE == rootType) {
        buf.append(" false ");
      } else if (Identifier == rootType && TOK_SELEXPR == root.getParent().getType()) {
        // back quote column alias in all cases. This is required since some alias values can match DB keywords
        // (example : year as alias) and in such case queries can fail on certain DBs if the alias in not back quoted
        buf.append(" as `").append(rootText).append("`");
      } else {
        buf.append(rootText == null ? "" : appendMode.convert(rootText));
      }

    } else if (TOK_ALLCOLREF == rootType) {
      if (root.getChildCount() > 0) {
        for (int i = 0; i < root.getChildCount(); i++) {
          toInfixString((ASTNode) root.getChild(i), buf, appendMode);
        }
        buf.append(".");
      }
      buf.append("*");
    } else if (TOK_FUNCTIONSTAR == rootType) {
      if (root.getChildCount() > 0) {
        for (int i = 0; i < root.getChildCount(); i++) {
          toInfixString((ASTNode) root.getChild(i), buf, appendMode);
        }
      }
      buf.append("(*)");
    } else if (UNARY_OPERATORS.contains(rootType)) {
      if (KW_NOT == rootType) {
        // Check if this is actually NOT IN
        if (findNodeByPath(root, TOK_FUNCTION, KW_IN) == null) {
          buf.append(" not ");
        }
      } else if (TILDE == rootType) {
        buf.append(" ~");
      }

      for (int i = 0; i < root.getChildCount(); i++) {
        toInfixString((ASTNode) root.getChild(i), buf, appendMode);
      }

    } else if (BINARY_OPERATORS.contains(rootType)) {
      boolean surround = true;
      if (N_ARY_OPERATORS.contains(rootType)
        && (root.getParent() == null || rootType == root.getParent().getType())) {
        surround = false;
      }
      if (surround) {
        buf.append("(");
      }
      if (MINUS == rootType && root.getChildCount() == 1) {
        // If minus has only one child, then it's a unary operator.
        // Add Operator name first
        buf.append(appendMode.convert(rootText));
        // Operand
        toInfixString((ASTNode) root.getChild(0), buf, appendMode);
      } else {
        // Left operand
        toInfixString((ASTNode) root.getChild(0), buf, appendMode);
        // Operator name
        if (rootType != DOT) {
          buf.append(' ').append(appendMode.convert(rootText)).append(' ');
        } else {
          buf.append(appendMode.convert(rootText));
        }
        // Right operand
        toInfixString((ASTNode) root.getChild(1), buf, appendMode);
      }
      if (surround) {
        buf.append(")");
      }
    } else if (LSQUARE == rootType) {
      // square brackets for array and map types
      toInfixString((ASTNode) root.getChild(0), buf, appendMode);
      buf.append("[");
      toInfixString((ASTNode) root.getChild(1), buf, appendMode);
      buf.append("]");
    } else if (PRIMITIVE_TYPES.contains(rootType)) {
      if (rootType == TOK_TINYINT) {
        buf.append("tinyint");
      } else if (rootType == TOK_SMALLINT) {
        buf.append("smallint");
      } else if (rootType == TOK_INT) {
        buf.append("int");
      } else if (rootType == TOK_BIGINT) {
        buf.append("bigint");
      } else if (rootType == TOK_BOOLEAN) {
        buf.append("boolean");
      } else if (rootType == TOK_FLOAT) {
        buf.append("float");
      } else if (rootType == TOK_DOUBLE) {
        buf.append("double");
      } else if (rootType == TOK_DATE) {
        buf.append("date");
      } else if (rootType == TOK_DATETIME) {
        buf.append("datetime");
      } else if (rootType == TOK_TIMESTAMP) {
        buf.append("timestamp");
      } else if (rootType == TOK_STRING) {
        buf.append("string");
      } else if (rootType == TOK_BINARY) {
        buf.append("binary");
      } else if (rootType == TOK_DECIMAL) {
        buf.append("decimal");
        if (root.getChildCount() >= 1) {
          buf.append("(").append(root.getChild(0).getText());
          if (root.getChildCount() == 2) {
            buf.append(",").append(root.getChild(1).getText());
          }
          buf.append(")");
        }
      } else if (rootType == TOK_VARCHAR) {
        buf.append("varchar");
        if (root.getChildCount() >= 1) {
          buf.append("(").append(root.getChild(0).getText()).append(")");
        }
      } else if (rootType == TOK_CHAR) {
        buf.append("char");
        if (root.getChildCount() >= 1) {
          buf.append("(").append(root.getChild(0).getText()).append(")");
        }
      } else {
        buf.append(rootText);
      }
    } else if (TOK_FUNCTION == root.getType()) {
      // Handle UDFs, conditional operators.
      functionString(root, buf, appendMode);

    } else if (TOK_FUNCTIONDI == rootType) {
      // Distinct is a different case.
      String fname = root.getChild(0).getText();

      buf.append(appendMode.convert(fname)).append("( distinct ");

      // Arguments to distinct separated by comma
      for (int i = 1; i < root.getChildCount(); i++) {
        toInfixString((ASTNode) root.getChild(i), buf, appendMode);
        if (i != root.getChildCount() - 1) {
          buf.append(", ");
        }
      }
      buf.append(")");

    } else if (TOK_TABSORTCOLNAMEDESC == rootType || TOK_TABSORTCOLNAMEASC == rootType) {
      for (int i = 0; i < root.getChildCount(); i++) {
        StringBuilder orderByCol = new StringBuilder();
        toInfixString((ASTNode) root.getChild(i), orderByCol, appendMode);
        String colStr = orderByCol.toString().trim();
        if (colStr.startsWith("(") && colStr.endsWith(")")) {
          colStr = colStr.substring(1, colStr.length() - 1);
        }
        buf.append(colStr);
      }
      buf.append(rootType == TOK_TABSORTCOLNAMEDESC ? " desc" : " asc");
    } else if (TOK_SELECT == rootType || TOK_ORDERBY == rootType || TOK_GROUPBY == rootType) {
      for (int i = 0; i < root.getChildCount(); i++) {
        toInfixString((ASTNode) root.getChild(i), buf, appendMode);
        if (i != root.getChildCount() - 1) {
          buf.append(", ");
        }
      }

    } else if (TOK_SELECTDI == rootType) {
      buf.append(" distinct ");
      for (int i = 0; i < root.getChildCount(); i++) {
        toInfixString((ASTNode) root.getChild(i), buf, appendMode);
        if (i != root.getChildCount() - 1) {
          buf.append(", ");
        }
      }

    } else if (TOK_DIR == rootType) {
      StringBuilder sb = new StringBuilder();
      boolean local = false;
      for (int i = 0; i < root.getChildCount(); i++) {

        if (root.getChild(i).getType() == KW_LOCAL) {
          local = true;
        } else {
          toInfixString((ASTNode) root.getChild(i), sb, appendMode);
        }
      }
      buf.append(local ? " local": "").append(" directory ").append(sb);
    } else if (TOK_TAB == rootType) {
      buf.append(" table ");
      for (int i = 0; i < root.getChildCount(); i++) {
        toInfixString((ASTNode) root.getChild(i), buf, appendMode);
      }

    } else {
      if (root.getChildCount() > 0) {
        for (int i = 0; i < root.getChildCount(); i++) {
          toInfixString((ASTNode) root.getChild(i), buf, appendMode);
        }
      } else {
        // for other types which are not handled above
        buf.append(rootText);
      }
    }
  }

  // Get string representation of a function node in query AST
  private static void functionString(ASTNode root, StringBuilder buf, AppendMode appendMode) {
    // special handling for CASE udf
    if (findNodeByPath(root, KW_CASE) != null) {
      buf.append("case ");
      toInfixString((ASTNode) root.getChild(1), buf, appendMode);
      // each of the conditions
      ArrayList caseChildren = root.getChildren();
      int from = 2;
      int nchildren = caseChildren.size();
      int to = nchildren % 2 == 1 ? nchildren - 1 : nchildren;

      for (int i = from; i < to; i += 2) {
        buf.append(" when ");
        toInfixString((ASTNode) caseChildren.get(i), buf, appendMode);
        buf.append(" then ");
        toInfixString((ASTNode) caseChildren.get(i + 1), buf, appendMode);
      }

      // check if there is an ELSE node
      if (nchildren % 2 == 1) {
        buf.append(" else ");
        toInfixString((ASTNode) caseChildren.get(nchildren - 1), buf, appendMode);
      }

      buf.append(" end");

    } else if (findNodeByPath(root, KW_WHEN) != null) {
      // 2nd form of case statement

      buf.append("case ");
      // each of the conditions
      ArrayList caseChildren = root.getChildren();
      int from = 1;
      int nchildren = caseChildren.size();
      int to = nchildren % 2 == 1 ? nchildren : nchildren - 1;

      for (int i = from; i < to; i += 2) {
        buf.append(" when ");
        toInfixString((ASTNode) caseChildren.get(i), buf, appendMode);
        buf.append(" then ");
        toInfixString((ASTNode) caseChildren.get(i + 1), buf, appendMode);
      }

      // check if there is an ELSE node
      if (nchildren % 2 == 0) {
        buf.append(" else ");
        toInfixString((ASTNode) caseChildren.get(nchildren - 1), buf, appendMode);
      }

      buf.append(" end");

    } else if (findNodeByPath(root, TOK_ISNULL) != null) {
      // IS NULL operator
      toInfixString((ASTNode) root.getChild(1), buf, appendMode);
      buf.append(" is null");

    } else if (findNodeByPath(root, TOK_ISNOTNULL) != null) {
      // IS NOT NULL operator
      toInfixString((ASTNode) root.getChild(1), buf, appendMode);
      buf.append(" is not null");

    } else if (root.getChild(0).getType() == Identifier
      && ((ASTNode) root.getChild(0)).getToken().getText().equalsIgnoreCase("between")) {
      // Handle between and not in between
      ASTNode tokTrue = findNodeByPath(root, KW_TRUE);
      ASTNode tokFalse = findNodeByPath(root, KW_FALSE);
      if (tokTrue != null) {
        // NOT BETWEEN
        toInfixString((ASTNode) root.getChild(2), buf, appendMode);
        buf.append(" not between ");
        toInfixString((ASTNode) root.getChild(3), buf, appendMode);
        buf.append(" and ");
        toInfixString((ASTNode) root.getChild(4), buf, appendMode);
      } else if (tokFalse != null) {
        // BETWEEN
        toInfixString((ASTNode) root.getChild(2), buf, appendMode);
        buf.append(" between ");
        toInfixString((ASTNode) root.getChild(3), buf, appendMode);
        buf.append(" and ");
        toInfixString((ASTNode) root.getChild(4), buf, appendMode);
      }

    } else if (findNodeByPath(root, KW_IN) != null) {
      // IN operator

      toInfixString((ASTNode) root.getChild(1), buf, appendMode);

      // check if this is NOT In
      ASTNode rootParent = (ASTNode) root.getParent();
      if (rootParent != null && rootParent.getType() == KW_NOT) {
        buf.append(" not ");
      }

      buf.append(" in (");

      for (int i = 2; i < root.getChildCount(); i++) {
        toInfixString((ASTNode) root.getChild(i), buf, appendMode);
        if (i < root.getChildCount() - 1) {
          buf.append(" , ");
        }
      }

      buf.append(")");
    } else if (findNodeByPath(root, KW_CAST) != null) {
      buf.append("cast");
      toInfixString((ASTNode) root.getChild(1), buf, appendMode);
      buf.append(" as ");
      toInfixString((ASTNode) root.getChild(0), buf, appendMode);
    } else {
      int rootType = root.getChild(0).getType();
      if (PRIMITIVE_TYPES.contains(rootType)) {
        // cast expression maps to the following ast
        // KW_CAST LPAREN expression KW_AS primitiveType RPAREN -> ^(TOK_FUNCTION primitiveType expression)
        buf.append("cast(");
        toInfixString((ASTNode) root.getChild(1), buf, appendMode);
        buf.append(" as ");
        toInfixString((ASTNode) root.getChild(0), buf, appendMode);
        buf.append(")");
      } else {
        // Normal UDF
        String fname = root.getChild(0).getText();
        // Function name
        buf.append(appendMode.convert(fname)).append("(");
        // Arguments separated by comma
        for (int i = 1; i < root.getChildCount(); i++) {
          toInfixString((ASTNode) root.getChild(i), buf, appendMode);
          if (i != root.getChildCount() - 1) {
            buf.append(", ");
          }
        }
        buf.append(")");
      }
    }
  }


  public static String getString(ASTNode tree, AppendMode appendMode) {
    StringBuilder buf = new StringBuilder();
    toInfixString(tree, buf, appendMode);
    return buf.toString().trim().replaceAll("\\s+", " ");
  }

  public static String getString(ASTNode tree) {
    StringBuilder buf = new StringBuilder();
    toInfixString(tree, buf);
    return buf.toString().trim();
  }

  public static String getColName(ASTNode node) {
    String colname;
    int nodeType = node.getType();
    if (nodeType == HiveParser.TOK_TABLE_OR_COL) {
      colname = node.getChild(0).getText();
    } else {
      // node in 'alias.column' format
      ASTNode colIdent = (ASTNode) node.getChild(1);
      colname = colIdent.getText();
    }

    return colname;
  }

  public static Set getColsInExpr(final String tableAlias, ASTNode expr) throws LensException {
    final Set colsInExpr = new HashSet<>();
    HQLParser.bft(expr, new ASTNodeVisitor() {
      @Override
      public void visit(TreeNode visited) {
        ASTNode node = visited.getNode();
        ASTNode parent = null;
        if (visited.getParent() != null) {
          parent = visited.getParent().getNode();
        }
        if (node.getToken().getType() == DOT) {
          String alias = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).getText().toLowerCase();
          ASTNode colIdent = (ASTNode) node.getChild(1);
          String column = colIdent.getText().toLowerCase();
          if (tableAlias.equalsIgnoreCase(alias)) {
            colsInExpr.add(column);
          }
        }
      }
    });
    return colsInExpr;
  }

  public static boolean isAggregateAST(ASTNode node) {
    int exprTokenType = node.getType();
    if (exprTokenType == HiveParser.TOK_FUNCTION || exprTokenType == HiveParser.TOK_FUNCTIONDI
      || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
      assert (node.getChildCount() != 0);
      if (node.getChild(0).getType() == HiveParser.Identifier) {
        String functionName = BaseSemanticAnalyzer.unescapeIdentifier(node.getChild(0).getText());
        try {
          if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) {
            return true;
          }
        } catch (SemanticException e) {
          log.error("Error trying to find whether {} is aggregate.", getString(node), e);
          return false;
        }
      }
    }

    return false;
  }

  public static boolean isNonAggregateFunctionAST(ASTNode node) {
    int exprTokenType = node.getType();
    if (exprTokenType == HiveParser.TOK_FUNCTION || exprTokenType == HiveParser.TOK_FUNCTIONDI
      || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) {
      assert (node.getChildCount() != 0);
      if (node.getChild(0).getType() == HiveParser.Identifier) {
        String functionName = BaseSemanticAnalyzer.unescapeIdentifier(node.getChild(0).getText());
        try {
          if (FunctionRegistry.getGenericUDAFResolver(functionName) == null) {
            return true;
          }
        } catch (SemanticException e) {
          log.error("Error trying to find whether {} is udf node.", getString(node), e);
          return false;
        }
      }
    }
    return false;
  }

  /**
   * @param node an ASTNode
   * @return true when input node is a SELECT AST Node. Otherwise, false.
   */
  public static boolean isSelectASTNode(final ASTNode node) {

    Optional astNodeType = getASTNodeType(node);
    return astNodeType.isPresent() && astNodeType.get() == HiveParser.TOK_SELECT;

  }

  /**
   * @param node an ASTNode
   * @return When node is null or token inside node is null, then Optional.absent is returned. Otherwise, an integer
   * representing ASTNodeType is returned.
   */
  private static Optional getASTNodeType(final ASTNode node) {

    Optional astNodeType = Optional.absent();
    if (node != null && node.getToken() != null) {
      astNodeType = Optional.of(node.getType());
    }

    return astNodeType;
  }

  public static boolean hasAggregate(ASTNode node) {
    int nodeType = node.getType();
    if (nodeType == HiveParser.TOK_TABLE_OR_COL || nodeType == HiveParser.DOT) {
      return false;
    } else {
      if (HQLParser.isAggregateAST(node)) {
        return true;
      }

      for (int i = 0; i < node.getChildCount(); i++) {
        if (hasAggregate((ASTNode) node.getChild(i))) {
          return true;
        }
      }
      return false;
    }
  }

  public static boolean equalsAST(ASTNode n1, ASTNode n2) {
    if (n1 == null && n2 != null) {
      return false;
    }

    if (n1 != null && n2 == null) {
      return false;
    }

    if (n1 == null) {
      return true;
    }

    if (n1.getType() != n2.getType()) {
      return false;
    }

    // Compare text. For literals, comparison is case sensitive
    if ((n1.getType() == StringLiteral && !StringUtils.equals(n1.getText(), n2.getText()))) {
      return false;
    }

    if (!StringUtils.equalsIgnoreCase(n1.getText(), n2.getText())) {
      return false;
    }

    // Compare children
    if (n1.getChildCount() != n2.getChildCount()) {
      return false;
    }

    for (int i = 0; i < n1.getChildCount(); i++) {
      if (!equalsAST((ASTNode) n1.getChild(i), (ASTNode) n2.getChild(i))) {
        return false;
      }
    }

    return true;
  }

  public static ASTNode leftMostChild(ASTNode node) {
    while (node.getChildren() != null) {
      node = (ASTNode) node.getChild(0);
    }
    return node;
  }
  @Data
  public static class HashableASTNode {
    private final ASTNode ast;
    private int hashCode = -1;
    private boolean hashCodeComputed = false;

    @Override
    public int hashCode() {
      if (!hashCodeComputed) {
        hashCode = getString(ast).hashCode();
        hashCodeComputed = true;
      }
      return hashCode;
    }

    @Override
    public boolean equals(Object o) {
      return o instanceof HashableASTNode && this.hashCode() == o.hashCode() && getString(this.getAst())
        .trim().equalsIgnoreCase(getString(((HashableASTNode) o).getAst()).trim());
    }
  }

  public enum AppendMode {
    LOWER_CASE {
      @Override public String convert(String s) {
        return s.toLowerCase();
      }
    },
    DEFAULT;
    public String convert(String s) {
      return s;
    }
  }
  static ASTNode trimHavingAst(ASTNode astNode, Collection columns) {
    if (astNode != null) {
      if (astNode.getParent() != null && astNode.getParent().getType() == DOT && astNode.getChildIndex() == 1) {
        return columns.contains(astNode.getText()) ? astNode : null;
      }
      for (int i = astNode.getChildCount() - 1; i >= 0; i--) {
        ASTNode replacement = trimHavingAst((ASTNode) astNode.getChild(i), columns);
        if (replacement == null) {
          astNode.deleteChild(i);
        } else {
          astNode.setChild(i, replacement);
        }
      }
      if (isAggregateAST(astNode) || BINARY_OPERATORS.contains(astNode.getType())) {
        if (astNode.getChildCount() == 1) {
          ASTNode child = (ASTNode) astNode.getChild(0);
          if (!BINARY_OPERATORS.contains(child.getType())) {
            return null;
          } else {
            return child;
          }
        }
      }
    }
    return astNode;
  }
  static ASTNode trimOrderByAst(ASTNode astNode, Collection columns) {
    if (astNode != null) {
      if (astNode.getParent() != null && astNode.getParent().getType() == DOT && astNode.getChildIndex() == 1) {
        return columns.contains(astNode.getText()) ? astNode : null;
      }
      for (int i = astNode.getChildCount() - 1; i >= 0; i--) {
        ASTNode replacement = trimOrderByAst((ASTNode) astNode.getChild(i), columns);
        if (replacement == null) {
          astNode.deleteChild(i);
        } else {
          astNode.setChild(i, replacement);
        }
      }
      switch (astNode.getType()) {
      case DOT:
        if (astNode.getChildCount() < 2) {
          return null;
        }
        break;
      case TOK_TABSORTCOLNAMEASC:
      case TOK_TABSORTCOLNAMEDESC:
      case TOK_NULLS_FIRST:
      case TOK_NULLS_LAST:
        if (astNode.getChildCount() == 0) {
          return null;
        }
        break;
      }
    }
    return astNode;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy