org.apache.hadoop.hive.ql.parse.WindowingSpec Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import java.util.ArrayList;
import java.util.HashMap;
import org.antlr.runtime.CommonToken;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.WindowFunctionInfo;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
/*
* Captures the Window processing specified in a Query. A Query may
* contain:
* - UDAF invocations on a Window.
* - Lead/Lag function invocations that can only be evaluated in a
* Partition.
* - For Queries that don't have a Group By all UDAF invocations are
* treated as Window Function invocations.
* - For Queries that don't have a Group By, the Having condition is
* handled as a post processing on the rows output by Windowing
* processing.
* Windowing is a container of all the Select Expressions that are
* to be handled by Windowing. These are held in 2 lists: the functions
* list holds WindowFunction invocations; the expressions list holds
* Select Expressions having Lead/Lag function calls. It may also
* contain an ASTNode representing the post filter to apply on the
* output of Window Functions.
* Windowing also contains all the Windows defined in the Query. One of
* the Windows is designated as the 'default' Window. If the Query has a
* Distribute By/Cluster By clause; then the information in these
* clauses is captured as a Partitioning and used as the default Window
* for the Query. Otherwise the first Window specified is treated as the
* default.
* Finally Windowing maintains a Map from an 'alias' to the ASTNode that
* represents the Select Expression that was translated to a Window
* Function invocation or a Window Expression. This is used when
* building RowResolvers.
*/
public class WindowingSpec {
private HashMap aliasToWdwExpr;
private HashMap windowSpecs;
private ArrayList windowExpressions;
public WindowingSpec() {
aliasToWdwExpr = new HashMap();
windowSpecs = new HashMap();
windowExpressions = new ArrayList();
}
public void addWindowSpec(String name, WindowSpec wdwSpec) {
windowSpecs.put(name, wdwSpec);
}
public void addWindowFunction(WindowFunctionSpec wFn) {
windowExpressions.add(wFn);
aliasToWdwExpr.put(wFn.getAlias(), wFn);
}
public HashMap getAliasToWdwExpr() {
return aliasToWdwExpr;
}
public HashMap getWindowSpecs() {
return windowSpecs;
}
public ArrayList getWindowExpressions() {
return windowExpressions;
}
public PartitioningSpec getQueryPartitioningSpec() {
/*
* Why no null and class checks?
* With the new design a WindowingSpec must contain a WindowFunctionSpec.
* todo: cleanup datastructs.
*/
WindowFunctionSpec wFn = (WindowFunctionSpec) getWindowExpressions().get(0);
return wFn.getWindowSpec().getPartitioning();
}
public PartitionSpec getQueryPartitionSpec() {
return getQueryPartitioningSpec().getPartSpec();
}
public OrderSpec getQueryOrderSpec() {
return getQueryPartitioningSpec().getOrderSpec();
}
/*
* Apply the rules in the Spec. to fill in any missing pieces of every Window Specification,
* also validate that the effective Specification is valid. The rules applied are:
* - For Wdw Specs that refer to Window Defns, inherit missing components.
* - A Window Spec with no Parition Spec, is Partitioned on a Constant(number 0)
* - For missing Wdw Frames or for Frames with only a Start Boundary, completely specify them
* by the rules in {@link effectiveWindowFrame}
* - Validate the effective Window Frames with the rules in {@link validateWindowFrame}
* - If there is no Order, then add the Partition expressions as the Order.
*/
public void validateAndMakeEffective() throws SemanticException {
for(WindowExpressionSpec expr : getWindowExpressions()) {
WindowFunctionSpec wFn = (WindowFunctionSpec) expr;
WindowSpec wdwSpec = wFn.getWindowSpec();
// 1. For Wdw Specs that refer to Window Defns, inherit missing components
if ( wdwSpec != null ) {
ArrayList sources = new ArrayList();
fillInWindowSpec(wdwSpec.getSourceId(), wdwSpec, sources);
}
if ( wdwSpec == null ) {
wdwSpec = new WindowSpec();
wFn.setWindowSpec(wdwSpec);
}
// 2. A Window Spec with no Parition Spec, is Partitioned on a Constant(number 0)
applyConstantPartition(wdwSpec);
// 3. For missing Wdw Frames or for Frames with only a Start Boundary, completely
// specify them by the rules in {@link effectiveWindowFrame}
effectiveWindowFrame(wFn);
// 4. Validate the effective Window Frames with the rules in {@link validateWindowFrame}
validateWindowFrame(wdwSpec);
// 5. Add the Partition expressions as the Order if there is no Order and validate Order spec.
setAndValidateOrderSpec(wFn);
}
}
private void fillInWindowSpec(String sourceId, WindowSpec dest, ArrayList visited)
throws SemanticException
{
if (sourceId != null)
{
if ( visited.contains(sourceId)) {
visited.add(sourceId);
throw new SemanticException(String.format("Cycle in Window references %s", visited));
}
WindowSpec source = getWindowSpecs().get(sourceId);
if (source == null || source.equals(dest))
{
throw new SemanticException(String.format("%s refers to an unknown source" ,
dest));
}
if (dest.getPartition() == null)
{
dest.setPartition(source.getPartition());
}
if (dest.getOrder() == null)
{
dest.setOrder(source.getOrder());
}
if (dest.getWindowFrame() == null)
{
dest.setWindowFrame(source.getWindowFrame());
}
visited.add(sourceId);
fillInWindowSpec(source.getSourceId(), dest, visited);
}
}
private void applyConstantPartition(WindowSpec wdwSpec) {
PartitionSpec partSpec = wdwSpec.getPartition();
if ( partSpec == null ) {
partSpec = new PartitionSpec();
PartitionExpression partExpr = new PartitionExpression();
partExpr.setExpression(new ASTNode(new CommonToken(HiveParser.Number, "0")));
partSpec.addExpression(partExpr);
wdwSpec.setPartition(partSpec);
}
}
/*
* - A Window Frame that has only the start boundary, then it is interpreted as:
* BETWEEN AND CURRENT ROW
* - A Window Specification with an Order Specification and no Window Frame is
* interpreted as: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
* - A Window Specification with no Order and no Window Frame is interpreted as:
* ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
*/
private void effectiveWindowFrame(WindowFunctionSpec wFn)
throws SemanticException {
WindowSpec wdwSpec = wFn.getWindowSpec();
WindowFunctionInfo wFnInfo = FunctionRegistry.getWindowFunctionInfo(wFn.getName());
boolean supportsWindowing = wFnInfo == null ? true : wFnInfo.isSupportsWindow();
WindowFrameSpec wFrame = wdwSpec.getWindowFrame();
OrderSpec orderSpec = wdwSpec.getOrder();
if ( wFrame == null ) {
if (!supportsWindowing ) {
if ( wFn.getName().toLowerCase().equals(FunctionRegistry.LAST_VALUE_FUNC_NAME)
&& orderSpec != null ) {
/*
* last_value: when an Sort Key is specified, then last_value should return the
* last value among rows with the same Sort Key value.
*/
wFrame = new WindowFrameSpec(
WindowType.ROWS,
new BoundarySpec(Direction.CURRENT),
new BoundarySpec(Direction.FOLLOWING, 0)
);
} else {
wFrame = new WindowFrameSpec(
WindowType.ROWS,
new BoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
new BoundarySpec(Direction.FOLLOWING, BoundarySpec.UNBOUNDED_AMOUNT)
);
}
} else {
if ( orderSpec == null ) {
wFrame = new WindowFrameSpec(
WindowType.ROWS,
new BoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
new BoundarySpec(Direction.FOLLOWING, BoundarySpec.UNBOUNDED_AMOUNT)
);
} else {
wFrame = new WindowFrameSpec(
WindowType.RANGE,
new BoundarySpec(Direction.PRECEDING, BoundarySpec.UNBOUNDED_AMOUNT),
new BoundarySpec(Direction.CURRENT)
);
}
}
wdwSpec.setWindowFrame(wFrame);
}
else if ( wFrame.getEnd() == null ) {
wFrame.setEnd(new BoundarySpec(Direction.CURRENT));
}
}
private void validateWindowFrame(WindowSpec wdwSpec) throws SemanticException {
WindowFrameSpec wFrame = wdwSpec.getWindowFrame();
BoundarySpec start = wFrame.getStart();
BoundarySpec end = wFrame.getEnd();
if ( start.getDirection() == Direction.FOLLOWING &&
start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT ) {
throw new SemanticException("Start of a WindowFrame cannot be UNBOUNDED FOLLOWING");
}
if ( end.getDirection() == Direction.PRECEDING &&
end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT ) {
throw new SemanticException("End of a WindowFrame cannot be UNBOUNDED PRECEDING");
}
}
/**
* Add default order spec if there is no order and validate order spec for valued based
* windowing since only one sort key is allowed.
* @param wFn Window function spec
* @throws SemanticException
*/
private void setAndValidateOrderSpec(WindowFunctionSpec wFn) throws SemanticException {
WindowSpec wdwSpec = wFn.getWindowSpec();
wdwSpec.ensureOrderSpec(wFn);
WindowFrameSpec wFrame = wdwSpec.getWindowFrame();
OrderSpec order = wdwSpec.getOrder();
BoundarySpec start = wFrame.getStart();
BoundarySpec end = wFrame.getEnd();
if (wFrame.getWindowType() == WindowType.RANGE) {
if (order == null || order.getExpressions().size() == 0) {
throw new SemanticException("Range based Window Frame needs to specify ORDER BY clause");
}
boolean currentRange = start.getDirection() == Direction.CURRENT &&
end.getDirection() == Direction.CURRENT;
boolean defaultPreceding = start.getDirection() == Direction.PRECEDING &&
start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT &&
end.getDirection() == Direction.CURRENT;
boolean defaultFollowing = start.getDirection() == Direction.CURRENT &&
end.getDirection() == Direction.FOLLOWING &&
end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT;
boolean defaultPrecedingFollowing = start.getDirection() == Direction.PRECEDING &&
start.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT &&
end.getDirection() == Direction.FOLLOWING &&
end.getAmt() == BoundarySpec.UNBOUNDED_AMOUNT;
boolean multiOrderAllowed = currentRange || defaultPreceding || defaultFollowing || defaultPrecedingFollowing;
if ( order.getExpressions().size() != 1 && !multiOrderAllowed) {
throw new SemanticException("Range value based Window Frame can have only 1 Sort Key");
}
}
}
/*
* Represents a Select Expression in the context of Windowing. These can
* refer to the output of Windowing Functions and can navigate the
* Partition using Lead/Lag functions.
*/
public static class WindowExpressionSpec {
String alias;
ASTNode expression;
public String getAlias() {
return alias;
}
public void setAlias(String alias) {
this.alias = alias;
}
public ASTNode getExpression() {
return expression;
}
public void setExpression(ASTNode expression) {
this.expression = expression;
}
}
/*
* Represents a UDAF invocation in the context of a Window Frame. As
* explained above sometimes UDAFs will be handled as Window Functions
* even w/o an explicit Window specification. This is to support Queries
* that have no Group By clause. A Window Function invocation captures:
* - the ASTNode that represents this invocation
* - its name
* - whether it is star/distinct invocation.
* - its alias
* - and an optional Window specification
*/
public static class WindowFunctionSpec extends WindowExpressionSpec
{
String name;
boolean isStar;
boolean isDistinct;
ArrayList args;
WindowSpec windowSpec;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public boolean isStar() {
return isStar;
}
public void setStar(boolean isStar) {
this.isStar = isStar;
}
public boolean isDistinct() {
return isDistinct;
}
public void setDistinct(boolean isDistinct) {
this.isDistinct = isDistinct;
}
public ArrayList getArgs() {
args = args == null ? new ArrayList() : args;
return args;
}
public void setArgs(ArrayList args) {
this.args = args;
}
public void addArg(ASTNode arg) {
args = args == null ? new ArrayList() : args;
args.add((ASTNode)arg);
}
public WindowSpec getWindowSpec() {
return windowSpec;
}
public void setWindowSpec(WindowSpec windowSpec) {
this.windowSpec = windowSpec;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append(name).append("(");
if (isStar )
{
buf.append("*");
}
else
{
if ( isDistinct )
{
buf.append("distinct ");
}
if ( args != null )
{
boolean first = true;
for(ASTNode arg : args)
{
if ( first) {
first = false;
} else {
buf.append(", ");
}
buf.append(arg.toStringTree());
}
}
}
buf.append(")");
if ( windowSpec != null )
{
buf.append(" ").append(windowSpec.toString());
}
if ( alias != null )
{
buf.append(" as ").append(alias);
}
return buf.toString();
}
}
/*
* It represents a WindowFrame applied to a Partitioning. A Window can
* refer to a source Window by name. The source Window provides the
* basis for this Window definition. This Window specification
* extends/overrides the source Window definition. In our e.g. the
* Select Expression $sum(p_retailprice) over (w1)$ is translated into a
* WindowFunction instance that has a Window specification that refers
* to the global Window Specification 'w1'. The Function's specification
* has no content, but inherits all its attributes from 'w1' during
* subsequent phases of translation.
*/
public static class WindowSpec
{
private String sourceId;
private PartitioningSpec partitioning;
private WindowFrameSpec windowFrame;
public String getSourceId() {
return sourceId;
}
public void setSourceId(String sourceId) {
this.sourceId = sourceId;
}
public PartitioningSpec getPartitioning() {
return partitioning;
}
public void setPartitioning(PartitioningSpec partitioning) {
this.partitioning = partitioning;
}
public WindowFrameSpec getWindowFrame() {
return windowFrame;
}
public void setWindowFrame(WindowFrameSpec windowFrame) {
this.windowFrame = windowFrame;
}
public PartitionSpec getPartition() {
return getPartitioning() == null ? null : getPartitioning().getPartSpec();
}
public void setPartition(PartitionSpec partSpec) {
partitioning = partitioning == null ? new PartitioningSpec() : partitioning;
partitioning.setPartSpec(partSpec);
}
public OrderSpec getOrder() {
return getPartitioning() == null ? null : getPartitioning().getOrderSpec();
}
public void setOrder(OrderSpec orderSpec) {
partitioning = partitioning == null ? new PartitioningSpec() : partitioning;
partitioning.setOrderSpec(orderSpec);
}
/*
* When there is no Order specified, we add the Partition expressions as
* Order expressions. This is an implementation artifact. For UDAFS that
* imply order (like rank, dense_rank) depend on the Order Expressions to
* work. Internally we pass the Order Expressions as Args to these functions.
* We could change the translation so that the Functions are setup with
* Partition expressions when the OrderSpec is null; but for now we are setting up
* an OrderSpec that copies the Partition expressions.
*/
protected void ensureOrderSpec(WindowFunctionSpec wFn) throws SemanticException {
if ( getOrder() == null ) {
OrderSpec order = new OrderSpec();
order.prefixBy(getPartition());
setOrder(order);
}
}
@Override
public String toString() {
return String.format("Window Spec=[%s%s%s]",
sourceId == null ? "" : "Name='" + sourceId + "'",
partitioning == null ? "" : partitioning,
windowFrame == null ? "" : windowFrame);
}
};
/*
* A WindowFrame specifies the Range on which a Window Function should
* be applied for the 'current' row. Its is specified by a start and
* end Boundary.
*/
public static class WindowFrameSpec
{
private WindowType windowType;
private BoundarySpec start;
private BoundarySpec end;
public WindowFrameSpec(WindowType windowType, BoundarySpec start, BoundarySpec end)
{
this.windowType = windowType;
this.start = start;
this.end = end;
}
public WindowFrameSpec(WindowType windowType, BoundarySpec start)
{
this(windowType, start, null);
}
public BoundarySpec getStart()
{
return start;
}
public void setStart(BoundarySpec start)
{
this.start = start;
}
public BoundarySpec getEnd()
{
return end;
}
public void setEnd(BoundarySpec end)
{
this.end = end;
}
public WindowType getWindowType() {
return this.windowType;
}
@Override
public String toString()
{
return String.format("window(type=%s, start=%s, end=%s)",
this.windowType, start, end);
}
}
public static enum Direction
{
PRECEDING,
CURRENT,
FOLLOWING
};
// The types for ROWS BETWEEN or RANGE BETWEEN windowing spec
public static enum WindowType
{
ROWS,
RANGE
};
/*
* A Boundary specifies how many rows back/forward a WindowFrame extends from the
* current row. A Boundary is specified as:
* - Range Boundary :: as the number of rows to go forward or back from
the Current Row.
* - Current Row :: which implies the Boundary is at the current row.
* - Value Boundary :: which is specified as the amount the value of an
Expression must decrease/increase
*/
public static class BoundarySpec implements Comparable
{
public static final int UNBOUNDED_AMOUNT = Integer.MAX_VALUE;
Direction direction;
int amt;
public BoundarySpec() {
}
public BoundarySpec(Direction direction) {
this(direction, 0);
}
public BoundarySpec(Direction direction, int amt)
{
this.direction = direction;
this.amt = amt;
}
public Direction getDirection()
{
return direction;
}
public void setDirection(Direction direction)
{
this.direction = direction;
}
public int getAmt()
{
return amt;
}
public void setAmt(int amt)
{
this.amt = amt;
}
@Override
public String toString()
{
if (this.direction == Direction.CURRENT) {
return "currentRow";
}
return String.format("%s %s", (amt == UNBOUNDED_AMOUNT ? "Unbounded" : amt),
direction);
}
public int compareTo(BoundarySpec other)
{
int c = direction.compareTo(other.getDirection());
if (c != 0) {
return c;
}
// Valid range is "range/rows between 10 preceding and 2 preceding" for preceding case
return this.direction == Direction.PRECEDING ? other.amt - amt : amt - other.amt;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy