All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.parse.WindowingSpec Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.parse;

import java.util.ArrayList;
import java.util.HashMap;
import org.antlr.runtime.CommonToken;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;

/*
 * Captures the Window processing specified in a Query. A Query may
 * contain:
 * - UDAF invocations on a Window.
 * - Lead/Lag function invocations that can only be evaluated in a
 *   Partition.
 * - For Queries that don't have a Group By all UDAF invocations are
 *   treated as Window Function invocations.
 * - For Queries that don't have a Group By, the Having condition is
 *   handled as a post processing on the rows output by Windowing
 *   processing.
 * Windowing is a container of all the Select Expressions that are
 * to be handled by Windowing. These are held in 2 lists: the functions
 * list holds WindowFunction invocations; the expressions list holds
 * Select Expressions having Lead/Lag function calls. It may also
 * contain an ASTNode representing the post filter to apply on the
 * output of Window Functions.
 * Windowing also contains all the Windows defined in the Query. One of
 * the Windows is designated as the 'default' Window. If the Query has a
 * Distribute By/Cluster By clause; then the information in these
 * clauses is captured as a Partitioning and used as the default Window
 * for the Query. Otherwise the first Window specified is treated as the
 * default.
 * Finally Windowing maintains a Map from an 'alias' to the ASTNode that
 * represents the Select Expression that was translated to a Window
 * Function invocation or a Window Expression. This is used when
 * building RowResolvers.
 */
public class WindowingSpec {
  private HashMap aliasToWdwExpr;
  private HashMap windowSpecs;
  private ArrayList windowExpressions;

  public WindowingSpec() {
    aliasToWdwExpr = new HashMap();
    windowSpecs = new HashMap();
    windowExpressions = new ArrayList();
  }

  public void addWindowSpec(String name, WindowSpec wdwSpec) {
    windowSpecs.put(name, wdwSpec);
  }

  public void addWindowFunction(WindowFunctionSpec wFn) {
    windowExpressions.add(wFn);
    aliasToWdwExpr.put(wFn.getAlias(), wFn);
  }

  public HashMap getAliasToWdwExpr() {
    return aliasToWdwExpr;
  }

  public HashMap getWindowSpecs() {
    return windowSpecs;
  }

  public ArrayList getWindowExpressions() {
    return windowExpressions;
  }

  public PartitioningSpec getQueryPartitioningSpec() {
    /*
     * Why no null and class checks?
     * With the new design a WindowingSpec must contain a WindowFunctionSpec.
     * todo: cleanup datastructs.
     */
    WindowFunctionSpec wFn = (WindowFunctionSpec) getWindowExpressions().get(0);
    return wFn.getWindowSpec().getPartitioning();
  }

  public PartitionSpec getQueryPartitionSpec() {
    return getQueryPartitioningSpec().getPartSpec();
  }

  public OrderSpec getQueryOrderSpec() {
    return getQueryPartitioningSpec().getOrderSpec();
  }

  /*
   * Apply the rules in the Spec. to fill in any missing pieces of every Window Specification,
   * also validate that the effective Specification is valid. The rules applied are:
   * - For Wdw Specs that refer to Window Defns, inherit missing components.
   * - A Window Spec with no Parition Spec, is Partitioned on a Constant(number 0)
   * - For missing Wdw Frames or for Frames with only a Start Boundary, completely specify them
   *   by the rules in {@link effectiveWindowFrame}
   * - Validate the effective Window Frames with the rules in {@link validateWindowFrame}
   * - If there is no Order, then add the Partition expressions as the Order.
   */
  public void validateAndMakeEffective() throws SemanticException {
    for(WindowExpressionSpec expr : getWindowExpressions()) {
      WindowFunctionSpec wFn = (WindowFunctionSpec) expr;
      WindowSpec wdwSpec = wFn.getWindowSpec();

      // 1. For Wdw Specs that refer to Window Defns, inherit missing components
      if ( wdwSpec != null ) {
        ArrayList sources = new ArrayList();
        fillInWindowSpec(wdwSpec.getSourceId(), wdwSpec, sources);
      }

      if ( wdwSpec == null ) {
        wdwSpec = new WindowSpec();
        wFn.setWindowSpec(wdwSpec);
      }

      // 2. A Window Spec with no Parition Spec, is Partitioned on a Constant(number 0)
      applyConstantPartition(wdwSpec);

      // 3. For missing Wdw Frames or for Frames with only a Start Boundary, completely
      //    specify them by the rules in {@link effectiveWindowFrame}
      effectiveWindowFrame(wFn);

      // 4. Validate the effective Window Frames with the rules in {@link validateWindowFrame}
      validateWindowFrame(wdwSpec);

      // 5. Add the Partition expressions as the Order if there is no Order and validate Order spec.
      setAndValidateOrderSpec(wFn);
    }
  }

  private void fillInWindowSpec(String sourceId, WindowSpec dest, ArrayList visited)
      throws SemanticException
  {
    if (sourceId != null)
    {
      if ( visited.contains(sourceId)) {
        visited.add(sourceId);
        throw new SemanticException(String.format("Cycle in Window references %s", visited));
      }
      WindowSpec source = getWindowSpecs().get(sourceId);
      if (source == null || source.equals(dest))
      {
        throw new SemanticException(String.format("%s refers to an unknown source" ,
            dest));
      }

      if (dest.getPartition() == null)
      {
        dest.setPartition(source.getPartition());
      }

      if (dest.getOrder() == null)
      {
        dest.setOrder(source.getOrder());
      }

      if (dest.getWindowFrame() == null)
      {
        dest.setWindowFrame(source.getWindowFrame());
      }

      visited.add(sourceId);

      fillInWindowSpec(source.getSourceId(), dest, visited);
    }
  }

  private void applyConstantPartition(WindowSpec wdwSpec) {
    PartitionSpec partSpec = wdwSpec.getPartition();
    if ( partSpec == null ) {
      partSpec = new PartitionSpec();
      PartitionExpression partExpr = new PartitionExpression();
      partExpr.setExpression(new ASTNode(new CommonToken(HiveParser.Number, "0")));
      partSpec.addExpression(partExpr);
      wdwSpec.setPartition(partSpec);
    }
  }

  /*
   * - A Window Frame that has only the start boundary, then it is interpreted as:
   *     BETWEEN  AND CURRENT ROW
   * - A Window Specification with an Order Specification and no Window Frame is
   *   interpreted as: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
   * - A Window Specification with no Order and no Window Frame is interpreted as:
   *     ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
   */
  private void effectiveWindowFrame(WindowFunctionSpec wFn)
      throws SemanticException {
    WindowSpec wdwSpec = wFn.getWindowSpec();
    WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(wFn.getName());
    boolean supportsWindowing = wFnInfo == null ? true : wFnInfo.isSupportsWindow();
    WindowFrameSpec wFrame = wdwSpec.getWindowFrame();
    OrderSpec orderSpec = wdwSpec.getOrder();
    if ( wFrame == null ) {
      if (!supportsWindowing ) {
        if ( wFn.getName().toLowerCase().equals(FunctionRegistry.LAST_VALUE_FUNC_NAME)
            && orderSpec != null ) {
          /*
           * last_value: when an Sort Key is specified, then last_value should return the
           * last value among rows with the same Sort Key value.
           */
          wFrame = new WindowFrameSpec(
              WindowType.ROWS,
              new BoundarySpec(Direction.CURRENT),
              new BoundarySpec(Direction.FOLLOWING, 0)
              );
        } else {
          wFrame = new WindowFrameSpec(
              WindowType.ROWS,
              new BoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
              new BoundarySpec(Direction.FOLLOWING, BoundarySpec.UNBOUNDED_AMOUNT)
              );
        }
      } else {
        if ( orderSpec == null ) {
          wFrame = new WindowFrameSpec(
              WindowType.ROWS,
              new BoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
              new BoundarySpec(Direction.FOLLOWING, BoundarySpec.UNBOUNDED_AMOUNT)
              );
        } else {
          wFrame = new WindowFrameSpec(
              WindowType.RANGE,
              new BoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
              new BoundarySpec(Direction.CURRENT)
          );
        }
      }

      wdwSpec.setWindowFrame(wFrame);
    }
    else if ( wFrame.getEnd() == null ) {
      wFrame.setEnd(new BoundarySpec(Direction.CURRENT));
    }
  }

  private void validateWindowFrame(WindowSpec wdwSpec) throws SemanticException {
    WindowFrameSpec wFrame = wdwSpec.getWindowFrame();
    BoundarySpec start = wFrame.getStart();
    BoundarySpec end = wFrame.getEnd();

    if ( start.getDirection() == Direction.FOLLOWING &&
        start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT ) {
      throw new SemanticException("Start of a WindowFrame cannot be UNBOUNDED FOLLOWING");
    }

    if ( end.getDirection() == Direction.PRECEDING &&
        end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT ) {
      throw new SemanticException("End of a WindowFrame cannot be UNBOUNDED PRECEDING");
    }
  }

  /**
   * Add default order spec if there is no order and validate order spec for valued based
   * windowing since only one sort key is allowed.
   * @param wFn Window function spec
   * @throws SemanticException
   */
  private void setAndValidateOrderSpec(WindowFunctionSpec wFn) throws SemanticException {
    WindowSpec wdwSpec = wFn.getWindowSpec();
    wdwSpec.ensureOrderSpec(wFn);
    WindowFrameSpec wFrame = wdwSpec.getWindowFrame();
    OrderSpec order = wdwSpec.getOrder();

    BoundarySpec start = wFrame.getStart();
    BoundarySpec end = wFrame.getEnd();

    if (wFrame.getWindowType() == WindowType.RANGE) {
      if (order == null || order.getExpressions().size() == 0) {
        throw new SemanticException("Range based Window Frame needs to specify ORDER BY clause");
      }

      boolean currentRange = start.getDirection() == Direction.CURRENT &&
              end.getDirection() == Direction.CURRENT;
      boolean defaultPreceding = start.getDirection() == Direction.PRECEDING &&
              start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT &&
              end.getDirection() == Direction.CURRENT;
      boolean defaultFollowing = start.getDirection() == Direction.CURRENT &&
              end.getDirection() == Direction.FOLLOWING &&
              end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT;
      boolean defaultPrecedingFollowing = start.getDirection() == Direction.PRECEDING &&
              start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT &&
              end.getDirection() == Direction.FOLLOWING &&
              end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT;
      boolean multiOrderAllowed = currentRange || defaultPreceding || defaultFollowing || defaultPrecedingFollowing;
      if ( order.getExpressions().size() != 1 && !multiOrderAllowed) {
        throw new SemanticException("Range value based Window Frame can have only 1 Sort Key");
      }
    }
  }

  /*
   * Represents a Select Expression in the context of Windowing. These can
   * refer to the output of Windowing Functions and can navigate the
   * Partition using Lead/Lag functions.
   */
  public static class WindowExpressionSpec {
    String alias;
    ASTNode expression;
    public String getAlias() {
      return alias;
    }
    public void setAlias(String alias) {
      this.alias = alias;
    }
    public ASTNode getExpression() {
      return expression;
    }
    public void setExpression(ASTNode expression) {
      this.expression = expression;
    }
  }

  /*
   * Represents a UDAF invocation in the context of a Window Frame. As
   * explained above sometimes UDAFs will be handled as Window Functions
   * even w/o an explicit Window specification. This is to support Queries
   * that have no Group By clause. A Window Function invocation captures:
   * - the ASTNode that represents this invocation
   * - its name
   * - whether it is star/distinct invocation.
   * - its alias
   * - and an optional Window specification
   */
  public static class WindowFunctionSpec extends WindowExpressionSpec
  {
    String name;
    boolean isStar;
    boolean isDistinct;
    ArrayList args;
    WindowSpec windowSpec;

    public String getName() {
      return name;
    }
    public void setName(String name) {
      this.name = name;
    }
    public boolean isStar() {
      return isStar;
    }
    public void setStar(boolean isStar) {
      this.isStar = isStar;
    }
    public boolean isDistinct() {
      return isDistinct;
    }
    public void setDistinct(boolean isDistinct) {
      this.isDistinct = isDistinct;
    }
    public ArrayList getArgs() {
      args = args == null ? new ArrayList() : args;
      return args;
    }
    public void setArgs(ArrayList args) {
      this.args = args;
    }
    public void addArg(ASTNode arg) {
      args = args == null ? new ArrayList() : args;
      args.add((ASTNode)arg);
    }
    public WindowSpec getWindowSpec() {
      return windowSpec;
    }
    public void setWindowSpec(WindowSpec windowSpec) {
      this.windowSpec = windowSpec;
    }
    @Override
    public String toString() {
      StringBuilder buf = new StringBuilder();
      buf.append(name).append("(");
      if (isStar )
      {
        buf.append("*");
      }
      else
      {
        if ( isDistinct )
        {
          buf.append("distinct ");
        }
        if ( args != null )
        {
          boolean first = true;
          for(ASTNode arg : args)
          {
            if ( first) {
              first = false;
            } else {
              buf.append(", ");
            }
            buf.append(arg.toStringTree());
          }
        }
      }

      buf.append(")");

      if ( windowSpec != null )
      {
        buf.append(" ").append(windowSpec.toString());
      }

      if ( alias != null )
      {
        buf.append(" as ").append(alias);
      }

      return buf.toString();
    }

  }

  /*
   * It represents a WindowFrame applied to a Partitioning. A Window can
   * refer to a source Window by name. The source Window provides the
   * basis for this Window definition. This Window specification
   * extends/overrides the source Window definition. In our e.g. the
   * Select Expression $sum(p_retailprice) over (w1)$ is translated into a
   * WindowFunction instance that has a Window specification that refers
   * to the global Window Specification 'w1'. The Function's specification
   * has no content, but inherits all its attributes from 'w1' during
   * subsequent phases of translation.
   */
  public static class WindowSpec
  {
    private String sourceId;
    private PartitioningSpec partitioning;
    private WindowFrameSpec windowFrame;

    public String getSourceId() {
      return sourceId;
    }
    public void setSourceId(String sourceId) {
      this.sourceId = sourceId;
    }
    public PartitioningSpec getPartitioning() {
      return partitioning;
    }
    public void setPartitioning(PartitioningSpec partitioning) {
      this.partitioning = partitioning;
    }
    public WindowFrameSpec getWindowFrame() {
      return windowFrame;
    }
    public void setWindowFrame(WindowFrameSpec windowFrame) {
      this.windowFrame = windowFrame;
    }
    public PartitionSpec getPartition() {
      return getPartitioning() == null ? null : getPartitioning().getPartSpec();
    }
    public void setPartition(PartitionSpec partSpec) {
      partitioning = partitioning == null ? new PartitioningSpec() : partitioning;
      partitioning.setPartSpec(partSpec);
    }
    public OrderSpec getOrder() {
      return getPartitioning() == null ? null : getPartitioning().getOrderSpec();
    }
    public void setOrder(OrderSpec orderSpec) {
      partitioning = partitioning == null ? new PartitioningSpec() : partitioning;
      partitioning.setOrderSpec(orderSpec);
    }
    /*
     * When there is no Order specified, we add the Partition expressions as
     * Order expressions. This is an implementation artifact. For UDAFS that
     * imply order (like rank, dense_rank) depend on the Order Expressions to
     * work. Internally we pass the Order Expressions as Args to these functions.
     * We could change the translation so that the Functions are setup with
     * Partition expressions when the OrderSpec is null; but for now we are setting up
     * an OrderSpec that copies the Partition expressions.
     */
    protected void ensureOrderSpec(WindowFunctionSpec wFn) throws SemanticException {
      if ( getOrder() == null ) {
        OrderSpec order = new OrderSpec();
        order.prefixBy(getPartition());
        setOrder(order);
      }
    }

    @Override
    public String toString() {
      return String.format("Window Spec=[%s%s%s]",
          sourceId == null ? "" : "Name='" + sourceId + "'",
          partitioning == null ? "" : partitioning,
          windowFrame == null ? "" : windowFrame);
    }
  };

  /*
   * A WindowFrame specifies the Range on which a Window Function should
   * be applied for the 'current' row. Its is specified by a start and
   * end Boundary.
   */
  public static class WindowFrameSpec
  {
    private WindowType windowType;
    private BoundarySpec start;
    private BoundarySpec end;

    public WindowFrameSpec(WindowType windowType, BoundarySpec start, BoundarySpec end)
    {
      this.windowType = windowType;
      this.start = start;
      this.end = end;
    }

    public WindowFrameSpec(WindowType windowType, BoundarySpec start)
    {
      this(windowType, start, null);
    }

    public BoundarySpec getStart()
    {
      return start;
    }

    public void setStart(BoundarySpec start)
    {
      this.start = start;
    }

    public BoundarySpec getEnd()
    {
      return end;
    }

    public void setEnd(BoundarySpec end)
    {
      this.end = end;
    }

    public WindowType getWindowType() {
      return this.windowType;
    }

    @Override
    public String toString()
    {
      return String.format("window(type=%s, start=%s, end=%s)",
          this.windowType, start, end);
    }

  }

  public static enum Direction
  {
    PRECEDING,
    CURRENT,
    FOLLOWING
  };

  // The types for ROWS BETWEEN or RANGE BETWEEN windowing spec
  public static enum WindowType
  {
    ROWS,
    RANGE
  };

  /*
   * A Boundary specifies how many rows back/forward a WindowFrame extends from the
   * current row. A Boundary is specified as:
   * - Range Boundary :: as the number of rows to go forward or back from
                    the Current Row.
   * - Current Row :: which implies the Boundary is at the current row.
   * - Value Boundary :: which is specified as the amount the value of an
                    Expression must decrease/increase
   */
  public static class BoundarySpec implements Comparable
  {
    public static final int UNBOUNDED_AMOUNT = Integer.MAX_VALUE;

    Direction direction;
    int amt;

    public BoundarySpec() {
    }

    public BoundarySpec(Direction direction) {
      this(direction, 0);
    }

    public BoundarySpec(Direction direction, int amt)
    {
      this.direction = direction;
      this.amt = amt;
    }

    public Direction getDirection()
    {
      return direction;
    }

    public void setDirection(Direction direction)
    {
      this.direction = direction;
    }

    public int getAmt()
    {
      return amt;
    }

    public void setAmt(int amt)
    {
      this.amt = amt;
    }

    @Override
    public String toString()
    {
      if (this.direction == Direction.CURRENT) {
        return "currentRow";
      }

      return String.format("%s %s", (amt == UNBOUNDED_AMOUNT ? "Unbounded" : amt),
          direction);
    }

    public int compareTo(BoundarySpec other)
    {
      int c = direction.compareTo(other.getDirection());
      if (c != 0) {
        return c;
      }

      // Valid range is "range/rows between 10 preceding and 2 preceding" for preceding case
      return this.direction == Direction.PRECEDING ? other.amt - amt : amt - other.amt;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy