All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.parse.WindowingSpec Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse;

import java.util.ArrayList;
import java.util.HashMap;
import org.antlr.runtime.CommonToken;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;

/*
 * Captures the Window processing specified in a Query. A Query may
 * contain:
 * - UDAF invocations on a Window.
 * - Lead/Lag function invocations that can only be evaluated in a
 *   Partition.
 * - For Queries that don't have a Group By all UDAF invocations are
 *   treated as Window Function invocations.
 * - For Queries that don't have a Group By, the Having condition is
 *   handled as a post processing on the rows output by Windowing
 *   processing.
 * Windowing is a container of all the Select Expressions that are
 * to be handled by Windowing. These are held in 2 lists: the functions
 * list holds WindowFunction invocations; the expressions list holds
 * Select Expressions having Lead/Lag function calls. It may also
 * contain an ASTNode representing the post filter to apply on the
 * output of Window Functions.
 * Windowing also contains all the Windows defined in the Query. One of
 * the Windows is designated as the 'default' Window. If the Query has a
 * Distribute By/Cluster By clause; then the information in these
 * clauses is captured as a Partitioning and used as the default Window
 * for the Query. Otherwise the first Window specified is treated as the
 * default.
 * Finally Windowing maintains a Map from an 'alias' to the ASTNode that
 * represents the Select Expression that was translated to a Window
 * Function invocation or a Window Expression. This is used when
 * building RowResolvers.
 */
public class WindowingSpec {
  private HashMap aliasToWdwExpr;
  private HashMap windowSpecs;
  private ArrayList windowExpressions;

  public WindowingSpec() {
    aliasToWdwExpr = new HashMap();
    windowSpecs = new HashMap();
    windowExpressions = new ArrayList();
  }

  public void addWindowSpec(String name, WindowSpec wdwSpec) {
    windowSpecs.put(name, wdwSpec);
  }

  public void addWindowFunction(WindowFunctionSpec wFn) {
    windowExpressions.add(wFn);
    aliasToWdwExpr.put(wFn.getAlias(), wFn);
  }

  public HashMap getAliasToWdwExpr() {
    return aliasToWdwExpr;
  }

  public HashMap getWindowSpecs() {
    return windowSpecs;
  }

  public ArrayList getWindowExpressions() {
    return windowExpressions;
  }

  public PartitioningSpec getQueryPartitioningSpec() {
    /*
     * Why no null and class checks?
     * With the new design a WindowingSpec must contain a WindowFunctionSpec.
     * todo: cleanup datastructs.
     */
    WindowFunctionSpec wFn = (WindowFunctionSpec) getWindowExpressions().get(0);
    return wFn.getWindowSpec().getPartitioning();
  }

  public PartitionSpec getQueryPartitionSpec() {
    return getQueryPartitioningSpec().getPartSpec();
  }

  public OrderSpec getQueryOrderSpec() {
    return getQueryPartitioningSpec().getOrderSpec();
  }

  /*
   * Apply the rules in the Spec. to fill in any missing pieces of every Window Specification,
   * also validate that the effective Specification is valid. The rules applied are:
   * - For Wdw Specs that refer to Window Defns, inherit missing components.
   * - A Window Spec with no Partition Spec, is Partitioned on a Constant(number 0)
   * - For missing Wdw Frames or for Frames with only a Start Boundary, completely specify them
   *   by the rules in {@link effectiveWindowFrame}
   * - Validate the effective Window Frames with the rules in {@link validateWindowFrame}
   * - If there is no Order, then add the Partition expressions as the Order.
   */
  public void validateAndMakeEffective() throws SemanticException {
    for (WindowExpressionSpec expr : getWindowExpressions()) {
      WindowFunctionSpec wFn = (WindowFunctionSpec) expr;
      WindowSpec wdwSpec = wFn.getWindowSpec();

      // 1. For Wdw Specs that refer to Window Defns, inherit missing components
      if (wdwSpec != null) {
        ArrayList sources = new ArrayList();
        fillInWindowSpec(wdwSpec.getSourceId(), wdwSpec, sources);
      }

      if (wdwSpec == null) {
        wdwSpec = new WindowSpec();
        wFn.setWindowSpec(wdwSpec);
      }

      // 2. A Window Spec with no Parition Spec, is Partitioned on a Constant(number 0)
      applyConstantPartition(wdwSpec);

      // 3. For missing Wdw Frames or for Frames with only a Start Boundary, completely
      //    specify them by the rules in {@link effectiveWindowFrame}
      effectiveWindowFrame(wFn);

      // 4. Validate the effective Window Frames with the rules in {@link validateWindowFrame}
      validateWindowFrame(wdwSpec);

      // 5. Add the Partition expressions as the Order if there is no Order and validate Order spec.
      setAndValidateOrderSpec(wFn);
    }
  }

  private void fillInWindowSpec(String sourceId, WindowSpec dest, ArrayList visited)
      throws SemanticException {
    if (sourceId != null) {
      if (visited.contains(sourceId)) {
        visited.add(sourceId);
        throw new SemanticException(String.format("Cycle in Window references %s", visited));
      }
      WindowSpec source = getWindowSpecs().get(sourceId);
      if (source == null || source.equals(dest)) {
        throw new SemanticException(String.format("%s refers to an unknown source", dest));
      }

      if (dest.getPartition() == null) {
        dest.setPartition(source.getPartition());
      }

      if (dest.getOrder() == null) {
        dest.setOrder(source.getOrder());
      }

      if (dest.getWindowFrame() == null) {
        dest.setWindowFrame(source.getWindowFrame());
      }

      visited.add(sourceId);

      fillInWindowSpec(source.getSourceId(), dest, visited);
    }
  }

  private void applyConstantPartition(WindowSpec wdwSpec) {
    PartitionSpec partSpec = wdwSpec.getPartition();
    if (partSpec == null) {
      partSpec = new PartitionSpec();
      PartitionExpression partExpr = new PartitionExpression();
      partExpr.setExpression(new ASTNode(new CommonToken(HiveParser.Number, "0")));
      partSpec.addExpression(partExpr);
      wdwSpec.setPartition(partSpec);
    }
  }

  /*
   * - A Window Frame that has only the start boundary, then it is interpreted as:
   *     BETWEEN  AND CURRENT ROW
   * - A Window Specification with an Order Specification and no Window Frame is
   *   interpreted as: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
   * - A Window Specification with no Order and no Window Frame is interpreted as:
   *     ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
   */
  private void effectiveWindowFrame(WindowFunctionSpec wFn)
      throws SemanticException {
    WindowSpec wdwSpec = wFn.getWindowSpec();
    WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(wFn.getName());
    boolean supportsWindowing = wFnInfo == null ? true : wFnInfo.isSupportsWindow();
    WindowFrameSpec wFrame = wdwSpec.getWindowFrame();
    OrderSpec orderSpec = wdwSpec.getOrder();
    if (wFrame == null) {
      if (!supportsWindowing) {
        if (wFn.getName().toLowerCase().equals(FunctionRegistry.LAST_VALUE_FUNC_NAME)
            && orderSpec != null) {
          /*
           * last_value: when an Sort Key is specified, then last_value should return the
           * last value among rows with the same Sort Key value.
           */
          wFrame = new WindowFrameSpec(
              WindowType.ROWS,
              new BoundarySpec(Direction.CURRENT),
              new BoundarySpec(Direction.FOLLOWING, 0)
              );
        } else {
          wFrame = new WindowFrameSpec(
              WindowType.ROWS,
              new BoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
              new BoundarySpec(Direction.FOLLOWING, BoundarySpec.UNBOUNDED_AMOUNT)
              );
        }
      } else {
        if ( orderSpec == null ) {
          wFrame = new WindowFrameSpec(
              WindowType.ROWS,
              new BoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
              new BoundarySpec(Direction.FOLLOWING, BoundarySpec.UNBOUNDED_AMOUNT)
              );
        } else {
          wFrame = new WindowFrameSpec(
              WindowType.RANGE,
              new BoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
              new BoundarySpec(Direction.CURRENT)
          );
        }
      }

      wdwSpec.setWindowFrame(wFrame);
    } else if (wFrame.getEnd() == null) {
      wFrame.setEnd(new BoundarySpec(Direction.CURRENT));
    }
  }

  private void validateWindowFrame(WindowSpec wdwSpec) throws SemanticException {
    WindowFrameSpec wFrame = wdwSpec.getWindowFrame();
    BoundarySpec start = wFrame.getStart();
    BoundarySpec end = wFrame.getEnd();

    if (start.getDirection() == Direction.FOLLOWING && !start.isBounded()) {
      throw new SemanticException("Start of a WindowFrame cannot be UNBOUNDED FOLLOWING");
    }

    if (end.getDirection() == Direction.PRECEDING && !end.isBounded()) {
      throw new SemanticException("End of a WindowFrame cannot be UNBOUNDED PRECEDING");
    }
  }

  /**
   * Add default order spec if there is no order and validate order spec for valued based
   * windowing since only one sort key is allowed.
   * @param wFn Window function spec
   * @throws SemanticException
   */
  private void setAndValidateOrderSpec(WindowFunctionSpec wFn) throws SemanticException {
    WindowSpec wdwSpec = wFn.getWindowSpec();
    wdwSpec.ensureOrderSpec(wFn);
    WindowFrameSpec wFrame = wdwSpec.getWindowFrame();
    OrderSpec order = wdwSpec.getOrder();

    BoundarySpec start = wFrame.getStart();
    BoundarySpec end = wFrame.getEnd();

    if (wFrame.getWindowType() == WindowType.RANGE) {
      if (order == null || order.getExpressions().size() == 0) {
        throw new SemanticException("Range based Window Frame needs to specify ORDER BY clause");
      }

      boolean currentRange =
          start.getDirection() == Direction.CURRENT && end.getDirection() == Direction.CURRENT;
      boolean defaultPreceding = start.getDirection() == Direction.PRECEDING && !start.isBounded()
          && end.getDirection() == Direction.CURRENT;
      boolean defaultFollowing = start.getDirection() == Direction.CURRENT
          && end.getDirection() == Direction.FOLLOWING && !end.isBounded();
      boolean defaultPrecedingFollowing = start.getDirection() == Direction.PRECEDING
          && !start.isBounded() && end.getDirection() == Direction.FOLLOWING && !end.isBounded();
      boolean multiOrderAllowed = currentRange || defaultPreceding || defaultFollowing || defaultPrecedingFollowing;
      if ( order.getExpressions().size() != 1 && !multiOrderAllowed) {
        throw new SemanticException("Range value based Window Frame can have only 1 Sort Key");
      }
    }
  }

  /*
   * Represents a Select Expression in the context of Windowing. These can
   * refer to the output of Windowing Functions and can navigate the
   * Partition using Lead/Lag functions.
   */
  public static class WindowExpressionSpec {
    String alias;
    ASTNode expression;

    public String getAlias() {
      return alias;
    }

    public void setAlias(String alias) {
      this.alias = alias;
    }

    public ASTNode getExpression() {
      return expression;
    }

    public void setExpression(ASTNode expression) {
      this.expression = expression;
    }
  }

  /*
   * Represents a UDAF invocation in the context of a Window Frame. As
   * explained above sometimes UDAFs will be handled as Window Functions
   * even w/o an explicit Window specification. This is to support Queries
   * that have no Group By clause. A Window Function invocation captures:
   * - the ASTNode that represents this invocation
   * - its name
   * - whether it is star/distinct invocation.
   * - its alias
   * - and an optional Window specification
   */
  public static class WindowFunctionSpec extends WindowExpressionSpec {
    String name;
    boolean isStar;
    boolean isDistinct;
    boolean respectNulls;
    ArrayList args;
    WindowSpec windowSpec;

    public String getName() {
      return name;
    }

    public void setName(String name) {
      this.name = name;
    }

    public boolean isStar() {
      return isStar;
    }

    public void setStar(boolean isStar) {
      this.isStar = isStar;
    }

    public boolean isDistinct() {
      return isDistinct;
    }

    public void setDistinct(boolean isDistinct) {
      this.isDistinct = isDistinct;
    }

    public ArrayList getArgs() {
      args = args == null ? new ArrayList() : args;
      return args;
    }

    public void setArgs(ArrayList args) {
      this.args = args;
    }

    public void addArg(ASTNode arg) {
      args = args == null ? new ArrayList() : args;
      args.add((ASTNode) arg);
    }

    public WindowSpec getWindowSpec() {
      return windowSpec;
    }

    public void setWindowSpec(WindowSpec windowSpec) {
      this.windowSpec = windowSpec;
    }

    public boolean isRespectNulls() {
      return respectNulls;
    }

    public void setRespectNulls(boolean respectNulls) {
      this.respectNulls = respectNulls;
    }

    @Override
    public String toString() {
      StringBuilder buf = new StringBuilder();
      buf.append(name).append("(");
      if (isStar) {
        buf.append("*");
      } else {
        if (isDistinct) {
          buf.append("distinct ");
        }
        if (args != null) {
          boolean first = true;
          for (ASTNode arg : args) {
            if (first) {
              first = false;
            } else {
              buf.append(", ");
            }
            buf.append(arg.toStringTree());
          }
        }
      }

      buf.append(")");

      if (!respectNulls) {
        buf.append(" ignore nulls ");
      }

      if ( windowSpec != null ){
        buf.append(" ").append(windowSpec.toString());
      }

      if (alias != null) {
        buf.append(" as ").append(alias);
      }

      return buf.toString();
    }
  }

  /*
   * It represents a WindowFrame applied to a Partitioning. A Window can
   * refer to a source Window by name. The source Window provides the
   * basis for this Window definition. This Window specification
   * extends/overrides the source Window definition. In our e.g. the
   * Select Expression $sum(p_retailprice) over (w1)$ is translated into a
   * WindowFunction instance that has a Window specification that refers
   * to the global Window Specification 'w1'. The Function's specification
   * has no content, but inherits all its attributes from 'w1' during
   * subsequent phases of translation.
   */
  public static class WindowSpec {
    private String sourceId;
    private PartitioningSpec partitioning;
    private WindowFrameSpec windowFrame;

    public String getSourceId() {
      return sourceId;
    }

    public void setSourceId(String sourceId) {
      this.sourceId = sourceId;
    }

    public PartitioningSpec getPartitioning() {
      return partitioning;
    }

    public void setPartitioning(PartitioningSpec partitioning) {
      this.partitioning = partitioning;
    }

    public WindowFrameSpec getWindowFrame() {
      return windowFrame;
    }

    public void setWindowFrame(WindowFrameSpec windowFrame) {
      this.windowFrame = windowFrame;
    }

    public PartitionSpec getPartition() {
      return getPartitioning() == null ? null : getPartitioning().getPartSpec();
    }

    public void setPartition(PartitionSpec partSpec) {
      partitioning = partitioning == null ? new PartitioningSpec() : partitioning;
      partitioning.setPartSpec(partSpec);
    }

    public OrderSpec getOrder() {
      return getPartitioning() == null ? null : getPartitioning().getOrderSpec();
    }

    public void setOrder(OrderSpec orderSpec) {
      partitioning = partitioning == null ? new PartitioningSpec() : partitioning;
      partitioning.setOrderSpec(orderSpec);
    }
    /*
     * When there is no Order specified, we add the Partition expressions as
     * Order expressions. This is an implementation artifact. For UDAFS that
     * imply order (like rank, dense_rank) depend on the Order Expressions to
     * work. Internally we pass the Order Expressions as Args to these functions.
     * We could change the translation so that the Functions are setup with
     * Partition expressions when the OrderSpec is null; but for now we are setting up
     * an OrderSpec that copies the Partition expressions.
     */
    protected void ensureOrderSpec(WindowFunctionSpec wFn) throws SemanticException {
      if (getOrder() == null) {
        /*
         * In case of RANGE windows, an explicit ORDER BY clause is needed, otherwise the query ends
         * up in undefined behavior, considering the following:
         *
         * count(*) over(partition by partitioning_col range between 1 preceding and current row) as cs1
         *
         * The query above leads to undefined behavior, because it makes no sense to calculate range on:
         * 1. undefined column (obviously)
         * 2. the partitioning column, as the partitioning column means that all the rows
         *    have the same value within the partition (or NULL)
         */
        if (windowFrame.getWindowType() == WindowType.RANGE
            && (windowFrame.getStart().isBoundedNotCurrent()
                || windowFrame.getEnd().isBoundedNotCurrent())) {
          throw new SemanticException(
              "RANGE with offset PRECEDING/FOLLOWING requires at least one ORDER BY column");
        }
        OrderSpec order = new OrderSpec();
        order.prefixBy(getPartition());
        setOrder(order);
      }
    }

    @Override
    public String toString() {
      return String.format("Window Spec=[%s%s%s]",
          sourceId == null ? "" : "Name='" + sourceId + "'",
          partitioning == null ? "" : partitioning,
          windowFrame == null ? "" : windowFrame);
    }
  };

  /*
   * A WindowFrame specifies the Range on which a Window Function should
   * be applied for the 'current' row. Its is specified by a start and
   * end Boundary.
   */
  public static class WindowFrameSpec {
    private WindowType windowType;
    private BoundarySpec start;
    private BoundarySpec end;

    public WindowFrameSpec(WindowType windowType, BoundarySpec start, BoundarySpec end) {
      this.windowType = windowType;
      this.start = start;
      this.end = end;
    }

    public WindowFrameSpec(WindowType windowType, BoundarySpec start) {
      this(windowType, start, null);
    }

    public BoundarySpec getStart() {
      return start;
    }

    public void setStart(BoundarySpec start) {
      this.start = start;
    }

    public BoundarySpec getEnd() {
      return end;
    }

    public void setEnd(BoundarySpec end) {
      this.end = end;
    }

    public WindowType getWindowType() {
      return this.windowType;
    }

    @Override
    public String toString() {
      return String.format("window(type=%s, start=%s, end=%s)", this.windowType, start, end);
    }
  }

  public static enum Direction {
    PRECEDING, CURRENT, FOLLOWING
  };

  // The types for ROWS BETWEEN or RANGE BETWEEN windowing spec
  public static enum WindowType {
    ROWS, RANGE
  };

  /*
   * A Boundary specifies how many rows back/forward a WindowFrame extends from the
   * current row. A Boundary is specified as:
   * - Range Boundary :: as the number of rows to go forward or back from
                    the Current Row.
   * - Current Row :: which implies the Boundary is at the current row.
   * - Value Boundary :: which is specified as the amount the value of an
                    Expression must decrease/increase
   */
  public static class BoundarySpec implements Comparable {
    public static final int UNBOUNDED_AMOUNT = Integer.MAX_VALUE;

    Direction direction;
    int amt;

    public BoundarySpec() {
    }

    public BoundarySpec(Direction direction) {
      this(direction, 0);
    }

    public BoundarySpec(Direction direction, int amt) {
      this.direction = direction;
      this.amt = amt;
    }

    public Direction getDirection() {
      return direction;
    }

    public void setDirection(Direction direction) {
      this.direction = direction;
    }

    public int getAmt() {
      return amt;
    }

    public void setAmt(int amt) {
      this.amt = amt;
    }

    public boolean isBounded() {
      return this.amt != UNBOUNDED_AMOUNT;
    }

    /**
     * Returns true for bounded non-current frames.
     * @return
     */
    public boolean isBoundedNotCurrent() {
      return isBounded() && direction != Direction.CURRENT;
    }

    @Override
    public String toString() {
      if (this.direction == Direction.CURRENT) {
        return "currentRow";
      }

      return String.format("%s %s", (amt == UNBOUNDED_AMOUNT ? "Unbounded" : amt), direction);
    }

    public int compareTo(BoundarySpec other) {
      int c = direction.compareTo(other.getDirection());
      if (c != 0) {
        return c;
      }

      // Valid range is "range/rows between 10 preceding and 2 preceding" for preceding case
      return this.direction == Direction.PRECEDING ? other.amt - amt : amt - other.amt;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy